mirror of
https://git.ryujinx.app/ryubing/ryujinx.git
synced 2025-07-23 17:27:12 +02:00
Implement UQADD16, UQADD8, UQSUB16, UQSUB8, VQRDMULH, VSLI and VSWP Arm32 instructions (#7174)
This commit is contained in:
parent
7969fb6bba
commit
8d8983049e
12 changed files with 445 additions and 13 deletions
|
@ -2,6 +2,8 @@ using ARMeilleure.Decoders;
|
|||
using ARMeilleure.IntermediateRepresentation;
|
||||
using ARMeilleure.State;
|
||||
using ARMeilleure.Translation;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using static ARMeilleure.Instructions.InstEmitAluHelper;
|
||||
using static ARMeilleure.Instructions.InstEmitHelper;
|
||||
|
@ -558,6 +560,46 @@ namespace ARMeilleure.Instructions
|
|||
EmitHsub8(context, unsigned: true);
|
||||
}
|
||||
|
||||
public static void Uqadd16(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32AluReg op = (OpCode32AluReg)context.CurrOp;
|
||||
|
||||
SetIntA32(context, op.Rd, EmitUnsigned16BitPair(context, GetIntA32(context, op.Rn), GetIntA32(context, op.Rm), (d, n, m) =>
|
||||
{
|
||||
EmitSaturateUqadd(context, d, context.Add(n, m), 16);
|
||||
}));
|
||||
}
|
||||
|
||||
public static void Uqadd8(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32AluReg op = (OpCode32AluReg)context.CurrOp;
|
||||
|
||||
SetIntA32(context, op.Rd, EmitUnsigned8BitPair(context, GetIntA32(context, op.Rn), GetIntA32(context, op.Rm), (d, n, m) =>
|
||||
{
|
||||
EmitSaturateUqadd(context, d, context.Add(n, m), 8);
|
||||
}));
|
||||
}
|
||||
|
||||
public static void Uqsub16(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32AluReg op = (OpCode32AluReg)context.CurrOp;
|
||||
|
||||
SetIntA32(context, op.Rd, EmitUnsigned16BitPair(context, GetIntA32(context, op.Rn), GetIntA32(context, op.Rm), (d, n, m) =>
|
||||
{
|
||||
EmitSaturateUqsub(context, d, context.Subtract(n, m), 16);
|
||||
}));
|
||||
}
|
||||
|
||||
public static void Uqsub8(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32AluReg op = (OpCode32AluReg)context.CurrOp;
|
||||
|
||||
SetIntA32(context, op.Rd, EmitUnsigned8BitPair(context, GetIntA32(context, op.Rn), GetIntA32(context, op.Rm), (d, n, m) =>
|
||||
{
|
||||
EmitSaturateUqsub(context, d, context.Subtract(n, m), 8);
|
||||
}));
|
||||
}
|
||||
|
||||
public static void Usat(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32Sat op = (OpCode32Sat)context.CurrOp;
|
||||
|
@ -934,6 +976,148 @@ namespace ARMeilleure.Instructions
|
|||
}
|
||||
}
|
||||
|
||||
private static void EmitSaturateUqadd(ArmEmitterContext context, Operand result, Operand value, uint saturateTo)
|
||||
{
|
||||
Debug.Assert(saturateTo <= 32);
|
||||
|
||||
if (saturateTo == 32)
|
||||
{
|
||||
// No saturation possible for this case.
|
||||
|
||||
context.Copy(result, value);
|
||||
|
||||
return;
|
||||
}
|
||||
else if (saturateTo == 0)
|
||||
{
|
||||
// Result is always zero if we saturate 0 bits.
|
||||
|
||||
context.Copy(result, Const(0));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// If the result is 0, the values are equal and we don't need saturation.
|
||||
Operand lblNoSat = Label();
|
||||
context.BranchIfFalse(lblNoSat, context.ShiftRightUI(value, Const((int)saturateTo)));
|
||||
|
||||
// Saturate.
|
||||
context.Copy(result, Const(uint.MaxValue >> (32 - (int)saturateTo)));
|
||||
|
||||
Operand lblExit = Label();
|
||||
context.Branch(lblExit);
|
||||
|
||||
context.MarkLabel(lblNoSat);
|
||||
|
||||
context.Copy(result, value);
|
||||
|
||||
context.MarkLabel(lblExit);
|
||||
}
|
||||
|
||||
private static void EmitSaturateUqsub(ArmEmitterContext context, Operand result, Operand value, uint saturateTo)
|
||||
{
|
||||
Debug.Assert(saturateTo <= 32);
|
||||
|
||||
if (saturateTo == 32)
|
||||
{
|
||||
// No saturation possible for this case.
|
||||
|
||||
context.Copy(result, value);
|
||||
|
||||
return;
|
||||
}
|
||||
else if (saturateTo == 0)
|
||||
{
|
||||
// Result is always zero if we saturate 0 bits.
|
||||
|
||||
context.Copy(result, Const(0));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// If the result is 0, the values are equal and we don't need saturation.
|
||||
Operand lblNoSat = Label();
|
||||
context.BranchIf(lblNoSat, value, Const(0), Comparison.GreaterOrEqual);
|
||||
|
||||
// Saturate.
|
||||
// Assumes that the value can only underflow, since this is only used for unsigned subtraction.
|
||||
context.Copy(result, Const(0));
|
||||
|
||||
Operand lblExit = Label();
|
||||
context.Branch(lblExit);
|
||||
|
||||
context.MarkLabel(lblNoSat);
|
||||
|
||||
context.Copy(result, value);
|
||||
|
||||
context.MarkLabel(lblExit);
|
||||
}
|
||||
|
||||
private static Operand EmitUnsigned16BitPair(ArmEmitterContext context, Operand rn, Operand rm, Action<Operand, Operand, Operand> elementAction)
|
||||
{
|
||||
Operand tempD = context.AllocateLocal(OperandType.I32);
|
||||
|
||||
Operand tempN = context.ZeroExtend16(OperandType.I32, rn);
|
||||
Operand tempM = context.ZeroExtend16(OperandType.I32, rm);
|
||||
elementAction(tempD, tempN, tempM);
|
||||
Operand tempD2 = context.ZeroExtend16(OperandType.I32, tempD);
|
||||
|
||||
tempN = context.ShiftRightUI(rn, Const(16));
|
||||
tempM = context.ShiftRightUI(rm, Const(16));
|
||||
elementAction(tempD, tempN, tempM);
|
||||
return context.BitwiseOr(tempD2, context.ShiftLeft(tempD, Const(16)));
|
||||
}
|
||||
|
||||
private static Operand EmitSigned8BitPair(ArmEmitterContext context, Operand rn, Operand rm, Action<Operand, Operand, Operand> elementAction)
|
||||
{
|
||||
return Emit8BitPair(context, rn, rm, elementAction, unsigned: false);
|
||||
}
|
||||
|
||||
private static Operand EmitUnsigned8BitPair(ArmEmitterContext context, Operand rn, Operand rm, Action<Operand, Operand, Operand> elementAction)
|
||||
{
|
||||
return Emit8BitPair(context, rn, rm, elementAction, unsigned: true);
|
||||
}
|
||||
|
||||
private static Operand Emit8BitPair(ArmEmitterContext context, Operand rn, Operand rm, Action<Operand, Operand, Operand> elementAction, bool unsigned)
|
||||
{
|
||||
Operand tempD = context.AllocateLocal(OperandType.I32);
|
||||
Operand result = default;
|
||||
|
||||
for (int b = 0; b < 4; b++)
|
||||
{
|
||||
Operand nByte = b != 0 ? context.ShiftRightUI(rn, Const(b * 8)) : rn;
|
||||
Operand mByte = b != 0 ? context.ShiftRightUI(rm, Const(b * 8)) : rm;
|
||||
|
||||
if (unsigned)
|
||||
{
|
||||
nByte = context.ZeroExtend8(OperandType.I32, nByte);
|
||||
mByte = context.ZeroExtend8(OperandType.I32, mByte);
|
||||
}
|
||||
else
|
||||
{
|
||||
nByte = context.SignExtend8(OperandType.I32, nByte);
|
||||
mByte = context.SignExtend8(OperandType.I32, mByte);
|
||||
}
|
||||
|
||||
elementAction(tempD, nByte, mByte);
|
||||
|
||||
if (b == 0)
|
||||
{
|
||||
result = context.ZeroExtend8(OperandType.I32, tempD);
|
||||
}
|
||||
else if (b < 3)
|
||||
{
|
||||
result = context.BitwiseOr(result, context.ShiftLeft(context.ZeroExtend8(OperandType.I32, tempD), Const(b * 8)));
|
||||
}
|
||||
else
|
||||
{
|
||||
result = context.BitwiseOr(result, context.ShiftLeft(tempD, Const(24)));
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static void EmitAluStore(ArmEmitterContext context, Operand value)
|
||||
{
|
||||
IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
|
||||
|
|
|
@ -1246,6 +1246,33 @@ namespace ARMeilleure.Instructions
|
|||
EmitVectorUnaryNarrowOp32(context, (op1) => EmitSatQ(context, op1, 8 << op.Size, signedSrc: true, signedDst: false), signed: true);
|
||||
}
|
||||
|
||||
public static void Vqrdmulh(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
||||
int eSize = 8 << op.Size;
|
||||
|
||||
EmitVectorBinaryOpI32(context, (op1, op2) =>
|
||||
{
|
||||
if (op.Size == 2)
|
||||
{
|
||||
op1 = context.SignExtend32(OperandType.I64, op1);
|
||||
op2 = context.SignExtend32(OperandType.I64, op2);
|
||||
}
|
||||
|
||||
Operand res = context.Multiply(op1, op2);
|
||||
res = context.Add(res, Const(res.Type, 1L << (eSize - 2)));
|
||||
res = context.ShiftRightSI(res, Const(eSize - 1));
|
||||
res = EmitSatQ(context, res, eSize, signedSrc: true, signedDst: true);
|
||||
|
||||
if (op.Size == 2)
|
||||
{
|
||||
res = context.ConvertI64ToI32(res);
|
||||
}
|
||||
|
||||
return res;
|
||||
}, signed: true);
|
||||
}
|
||||
|
||||
public static void Vqsub(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
||||
|
|
|
@ -191,6 +191,26 @@ namespace ARMeilleure.Instructions
|
|||
context.Copy(GetVecA32(op.Qd), res);
|
||||
}
|
||||
|
||||
public static void Vswp(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
|
||||
|
||||
if (op.Q)
|
||||
{
|
||||
Operand temp = context.Copy(GetVecA32(op.Qd));
|
||||
|
||||
context.Copy(GetVecA32(op.Qd), GetVecA32(op.Qm));
|
||||
context.Copy(GetVecA32(op.Qm), temp);
|
||||
}
|
||||
else
|
||||
{
|
||||
Operand temp = ExtractScalar(context, OperandType.I64, op.Vd);
|
||||
|
||||
InsertScalar(context, op.Vd, ExtractScalar(context, OperandType.I64, op.Vm));
|
||||
InsertScalar(context, op.Vm, temp);
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vtbl(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32SimdTbl op = (OpCode32SimdTbl)context.CurrOp;
|
||||
|
|
|
@ -130,6 +130,36 @@ namespace ARMeilleure.Instructions
|
|||
EmitVectorUnaryNarrowOp32(context, (op1) => context.ShiftRightUI(op1, Const(shift)));
|
||||
}
|
||||
|
||||
public static void Vsli_I(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
|
||||
int shift = op.Shift;
|
||||
int eSize = 8 << op.Size;
|
||||
|
||||
ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0UL;
|
||||
|
||||
Operand res = GetVec(op.Qd);
|
||||
|
||||
int elems = op.GetBytesCount() >> op.Size;
|
||||
|
||||
for (int index = 0; index < elems; index++)
|
||||
{
|
||||
Operand me = EmitVectorExtractZx(context, op.Qm, op.Im + index, op.Size);
|
||||
|
||||
Operand neShifted = context.ShiftLeft(me, Const(shift));
|
||||
|
||||
Operand de = EmitVectorExtractZx(context, op.Qd, op.Id + index, op.Size);
|
||||
|
||||
Operand deMasked = context.BitwiseAnd(de, Const(mask));
|
||||
|
||||
Operand e = context.BitwiseOr(neShifted, deMasked);
|
||||
|
||||
res = EmitVectorInsert(context, res, e, op.Id + index, op.Size);
|
||||
}
|
||||
|
||||
context.Copy(GetVec(op.Qd), res);
|
||||
}
|
||||
|
||||
public static void Vsra(ArmEmitterContext context)
|
||||
{
|
||||
OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
|
||||
|
|
|
@ -571,6 +571,10 @@ namespace ARMeilleure.Instructions
|
|||
Umaal,
|
||||
Umlal,
|
||||
Umull,
|
||||
Uqadd16,
|
||||
Uqadd8,
|
||||
Uqsub16,
|
||||
Uqsub8,
|
||||
Usat,
|
||||
Usat16,
|
||||
Usub8,
|
||||
|
@ -645,6 +649,7 @@ namespace ARMeilleure.Instructions
|
|||
Vqdmulh,
|
||||
Vqmovn,
|
||||
Vqmovun,
|
||||
Vqrdmulh,
|
||||
Vqrshrn,
|
||||
Vqrshrun,
|
||||
Vqshrn,
|
||||
|
@ -666,6 +671,7 @@ namespace ARMeilleure.Instructions
|
|||
Vshll,
|
||||
Vshr,
|
||||
Vshrn,
|
||||
Vsli,
|
||||
Vst1,
|
||||
Vst2,
|
||||
Vst3,
|
||||
|
@ -682,6 +688,7 @@ namespace ARMeilleure.Instructions
|
|||
Vsub,
|
||||
Vsubl,
|
||||
Vsubw,
|
||||
Vswp,
|
||||
Vtbl,
|
||||
Vtrn,
|
||||
Vtst,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue