Move solution and projects to src

This commit is contained in:
TSR Berry 2023-04-08 01:22:00 +02:00 committed by Mary
parent cd124bda58
commit cee7121058
3466 changed files with 55 additions and 55 deletions

View file

@ -0,0 +1,351 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using System.Collections.Generic;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static class AttributeMap
{
private enum StagesMask : byte
{
None = 0,
Compute = 1 << (int)ShaderStage.Compute,
Vertex = 1 << (int)ShaderStage.Vertex,
TessellationControl = 1 << (int)ShaderStage.TessellationControl,
TessellationEvaluation = 1 << (int)ShaderStage.TessellationEvaluation,
Geometry = 1 << (int)ShaderStage.Geometry,
Fragment = 1 << (int)ShaderStage.Fragment,
Tessellation = TessellationControl | TessellationEvaluation,
VertexTessellationGeometry = Vertex | Tessellation | Geometry,
TessellationGeometryFragment = Tessellation | Geometry | Fragment,
AllGraphics = Vertex | Tessellation | Geometry | Fragment
}
private struct AttributeEntry
{
public int BaseOffset { get; }
public AggregateType Type { get; }
public IoVariable IoVariable { get; }
public StagesMask InputMask { get; }
public StagesMask OutputMask { get; }
public AttributeEntry(
int baseOffset,
AggregateType type,
IoVariable ioVariable,
StagesMask inputMask,
StagesMask outputMask)
{
BaseOffset = baseOffset;
Type = type;
IoVariable = ioVariable;
InputMask = inputMask;
OutputMask = outputMask;
}
}
private static readonly IReadOnlyDictionary<int, AttributeEntry> _attributes;
private static readonly IReadOnlyDictionary<int, AttributeEntry> _attributesPerPatch;
static AttributeMap()
{
_attributes = CreateMap();
_attributesPerPatch = CreatePerPatchMap();
}
private static IReadOnlyDictionary<int, AttributeEntry> CreateMap()
{
var map = new Dictionary<int, AttributeEntry>();
Add(map, 0x060, AggregateType.S32, IoVariable.PrimitiveId, StagesMask.TessellationGeometryFragment, StagesMask.Geometry);
Add(map, 0x064, AggregateType.S32, IoVariable.Layer, StagesMask.Fragment, StagesMask.VertexTessellationGeometry);
Add(map, 0x068, AggregateType.S32, IoVariable.ViewportIndex, StagesMask.Fragment, StagesMask.VertexTessellationGeometry);
Add(map, 0x06c, AggregateType.FP32, IoVariable.PointSize, StagesMask.None, StagesMask.VertexTessellationGeometry);
Add(map, 0x070, AggregateType.Vector4 | AggregateType.FP32, IoVariable.Position, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
Add(map, 0x080, AggregateType.Vector4 | AggregateType.FP32, IoVariable.UserDefined, StagesMask.AllGraphics, StagesMask.VertexTessellationGeometry, 32);
Add(map, 0x280, AggregateType.Vector4 | AggregateType.FP32, IoVariable.FrontColorDiffuse, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
Add(map, 0x290, AggregateType.Vector4 | AggregateType.FP32, IoVariable.FrontColorSpecular, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
Add(map, 0x2a0, AggregateType.Vector4 | AggregateType.FP32, IoVariable.BackColorDiffuse, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
Add(map, 0x2b0, AggregateType.Vector4 | AggregateType.FP32, IoVariable.BackColorSpecular, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
Add(map, 0x2c0, AggregateType.Array | AggregateType.FP32, IoVariable.ClipDistance, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry, 8);
Add(map, 0x2e0, AggregateType.Vector2 | AggregateType.FP32, IoVariable.PointCoord, StagesMask.Fragment, StagesMask.None);
Add(map, 0x2e8, AggregateType.FP32, IoVariable.FogCoord, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
Add(map, 0x2f0, AggregateType.Vector2 | AggregateType.FP32, IoVariable.TessellationCoord, StagesMask.TessellationEvaluation, StagesMask.None);
Add(map, 0x2f8, AggregateType.S32, IoVariable.InstanceId, StagesMask.Vertex, StagesMask.None);
Add(map, 0x2fc, AggregateType.S32, IoVariable.VertexId, StagesMask.Vertex, StagesMask.None);
Add(map, 0x300, AggregateType.Vector4 | AggregateType.FP32, IoVariable.TextureCoord, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
Add(map, 0x3a0, AggregateType.Array | AggregateType.S32, IoVariable.ViewportMask, StagesMask.Fragment, StagesMask.VertexTessellationGeometry);
Add(map, 0x3fc, AggregateType.Bool, IoVariable.FrontFacing, StagesMask.Fragment, StagesMask.None);
return map;
}
private static IReadOnlyDictionary<int, AttributeEntry> CreatePerPatchMap()
{
var map = new Dictionary<int, AttributeEntry>();
Add(map, 0x000, AggregateType.Vector4 | AggregateType.FP32, IoVariable.TessellationLevelOuter, StagesMask.TessellationEvaluation, StagesMask.TessellationControl);
Add(map, 0x010, AggregateType.Vector2 | AggregateType.FP32, IoVariable.TessellationLevelInner, StagesMask.TessellationEvaluation, StagesMask.TessellationControl);
Add(map, 0x018, AggregateType.Vector4 | AggregateType.FP32, IoVariable.UserDefined, StagesMask.TessellationEvaluation, StagesMask.TessellationControl, 31, 0x200);
return map;
}
private static void Add(
Dictionary<int, AttributeEntry> attributes,
int offset,
AggregateType type,
IoVariable ioVariable,
StagesMask inputMask,
StagesMask outputMask,
int count = 1,
int upperBound = 0x400)
{
int baseOffset = offset;
int elementsCount = GetElementCount(type);
for (int index = 0; index < count; index++)
{
for (int elementIndex = 0; elementIndex < elementsCount; elementIndex++)
{
attributes.Add(offset, new AttributeEntry(baseOffset, type, ioVariable, inputMask, outputMask));
offset += 4;
if (offset >= upperBound)
{
return;
}
}
}
}
public static Operand GenerateAttributeLoad(EmitterContext context, Operand primVertex, int offset, bool isOutput, bool isPerPatch)
{
if (!(isPerPatch ? _attributesPerPatch : _attributes).TryGetValue(offset, out AttributeEntry entry))
{
context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} is not valid.");
return Const(0);
}
StagesMask validUseMask = isOutput ? entry.OutputMask : entry.InputMask;
if (((StagesMask)(1 << (int)context.Config.Stage) & validUseMask) == StagesMask.None)
{
context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} ({entry.IoVariable}) is not valid for stage {context.Config.Stage}.");
return Const(0);
}
if (!IsSupportedByHost(context.Config.GpuAccessor, context.Config.Stage, entry.IoVariable))
{
context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} ({entry.IoVariable}) is not supported by the host for stage {context.Config.Stage}.");
return Const(0);
}
if (HasInvocationId(context.Config.Stage, isOutput) && !isPerPatch)
{
primVertex = context.Load(StorageKind.Input, IoVariable.InvocationId);
}
int innerOffset = offset - entry.BaseOffset;
int innerIndex = innerOffset / 4;
StorageKind storageKind = isPerPatch
? (isOutput ? StorageKind.OutputPerPatch : StorageKind.InputPerPatch)
: (isOutput ? StorageKind.Output : StorageKind.Input);
IoVariable ioVariable = GetIoVariable(context.Config.Stage, in entry);
AggregateType type = GetType(context.Config, isOutput, innerIndex, in entry);
int elementCount = GetElementCount(type);
bool isArray = type.HasFlag(AggregateType.Array);
bool hasArrayIndex = isArray || context.Config.HasPerLocationInputOrOutput(ioVariable, isOutput);
bool hasElementIndex = elementCount > 1;
if (hasArrayIndex && hasElementIndex)
{
int arrayIndex = innerIndex / elementCount;
int elementIndex = innerIndex - (arrayIndex * elementCount);
return primVertex == null || isArray
? context.Load(storageKind, ioVariable, primVertex, Const(arrayIndex), Const(elementIndex))
: context.Load(storageKind, ioVariable, Const(arrayIndex), primVertex, Const(elementIndex));
}
else if (hasArrayIndex || hasElementIndex)
{
return primVertex == null || isArray || !hasArrayIndex
? context.Load(storageKind, ioVariable, primVertex, Const(innerIndex))
: context.Load(storageKind, ioVariable, Const(innerIndex), primVertex);
}
else
{
return context.Load(storageKind, ioVariable, primVertex);
}
}
public static void GenerateAttributeStore(EmitterContext context, int offset, bool isPerPatch, Operand value)
{
if (!(isPerPatch ? _attributesPerPatch : _attributes).TryGetValue(offset, out AttributeEntry entry))
{
context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} is not valid.");
return;
}
if (((StagesMask)(1 << (int)context.Config.Stage) & entry.OutputMask) == StagesMask.None)
{
context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} ({entry.IoVariable}) is not valid for stage {context.Config.Stage}.");
return;
}
if (!IsSupportedByHost(context.Config.GpuAccessor, context.Config.Stage, entry.IoVariable))
{
context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} ({entry.IoVariable}) is not supported by the host for stage {context.Config.Stage}.");
return;
}
Operand invocationId = null;
if (HasInvocationId(context.Config.Stage, isOutput: true) && !isPerPatch)
{
invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId);
}
int innerOffset = offset - entry.BaseOffset;
int innerIndex = innerOffset / 4;
StorageKind storageKind = isPerPatch ? StorageKind.OutputPerPatch : StorageKind.Output;
IoVariable ioVariable = GetIoVariable(context.Config.Stage, in entry);
AggregateType type = GetType(context.Config, isOutput: true, innerIndex, in entry);
int elementCount = GetElementCount(type);
bool isArray = type.HasFlag(AggregateType.Array);
bool hasArrayIndex = isArray || context.Config.HasPerLocationInputOrOutput(ioVariable, isOutput: true);
bool hasElementIndex = elementCount > 1;
if (hasArrayIndex && hasElementIndex)
{
int arrayIndex = innerIndex / elementCount;
int elementIndex = innerIndex - (arrayIndex * elementCount);
if (invocationId == null || isArray)
{
context.Store(storageKind, ioVariable, invocationId, Const(arrayIndex), Const(elementIndex), value);
}
else
{
context.Store(storageKind, ioVariable, Const(arrayIndex), invocationId, Const(elementIndex), value);
}
}
else if (hasArrayIndex || hasElementIndex)
{
if (invocationId == null || isArray || !hasArrayIndex)
{
context.Store(storageKind, ioVariable, invocationId, Const(innerIndex), value);
}
else
{
context.Store(storageKind, ioVariable, Const(innerIndex), invocationId, value);
}
}
else
{
context.Store(storageKind, ioVariable, invocationId, value);
}
}
private static bool IsSupportedByHost(IGpuAccessor gpuAccessor, ShaderStage stage, IoVariable ioVariable)
{
if (ioVariable == IoVariable.ViewportIndex && stage != ShaderStage.Geometry && stage != ShaderStage.Fragment)
{
return gpuAccessor.QueryHostSupportsViewportIndexVertexTessellation();
}
else if (ioVariable == IoVariable.ViewportMask)
{
return gpuAccessor.QueryHostSupportsViewportMask();
}
return true;
}
public static IoVariable GetIoVariable(ShaderConfig config, int offset, out int location)
{
location = 0;
if (!_attributes.TryGetValue(offset, out AttributeEntry entry))
{
return IoVariable.Invalid;
}
if (((StagesMask)(1 << (int)config.Stage) & entry.OutputMask) == StagesMask.None)
{
return IoVariable.Invalid;
}
if (config.HasPerLocationInputOrOutput(entry.IoVariable, isOutput: true))
{
location = (offset - entry.BaseOffset) / 16;
}
return GetIoVariable(config.Stage, in entry);
}
private static IoVariable GetIoVariable(ShaderStage stage, in AttributeEntry entry)
{
if (entry.IoVariable == IoVariable.Position && stage == ShaderStage.Fragment)
{
return IoVariable.FragmentCoord;
}
return entry.IoVariable;
}
private static AggregateType GetType(ShaderConfig config, bool isOutput, int innerIndex, in AttributeEntry entry)
{
AggregateType type = entry.Type;
if (entry.IoVariable == IoVariable.UserDefined)
{
type = config.GetUserDefinedType(innerIndex / 4, isOutput);
}
else if (entry.IoVariable == IoVariable.FragmentOutputColor)
{
type = config.GetFragmentOutputColorType(innerIndex / 4);
}
return type;
}
public static bool HasPrimitiveVertex(ShaderStage stage, bool isOutput)
{
if (isOutput)
{
return false;
}
return stage == ShaderStage.TessellationControl ||
stage == ShaderStage.TessellationEvaluation ||
stage == ShaderStage.Geometry;
}
public static bool HasInvocationId(ShaderStage stage, bool isOutput)
{
return isOutput && stage == ShaderStage.TessellationControl;
}
private static int GetElementCount(AggregateType type)
{
return (type & AggregateType.ElementCountMask) switch
{
AggregateType.Vector2 => 2,
AggregateType.Vector3 => 3,
AggregateType.Vector4 => 4,
_ => 1
};
}
}
}

View file

@ -0,0 +1,379 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.Translation;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void AtomCas(EmitterContext context)
{
InstAtomCas op = context.GetOp<InstAtomCas>();
context.Config.GpuAccessor.Log("Shader instruction AtomCas is not implemented.");
}
public static void AtomsCas(EmitterContext context)
{
InstAtomsCas op = context.GetOp<InstAtomsCas>();
context.Config.GpuAccessor.Log("Shader instruction AtomsCas is not implemented.");
}
public static void B2r(EmitterContext context)
{
InstB2r op = context.GetOp<InstB2r>();
context.Config.GpuAccessor.Log("Shader instruction B2r is not implemented.");
}
public static void Bpt(EmitterContext context)
{
InstBpt op = context.GetOp<InstBpt>();
context.Config.GpuAccessor.Log("Shader instruction Bpt is not implemented.");
}
public static void Cctl(EmitterContext context)
{
InstCctl op = context.GetOp<InstCctl>();
context.Config.GpuAccessor.Log("Shader instruction Cctl is not implemented.");
}
public static void Cctll(EmitterContext context)
{
InstCctll op = context.GetOp<InstCctll>();
context.Config.GpuAccessor.Log("Shader instruction Cctll is not implemented.");
}
public static void Cctlt(EmitterContext context)
{
InstCctlt op = context.GetOp<InstCctlt>();
context.Config.GpuAccessor.Log("Shader instruction Cctlt is not implemented.");
}
public static void Cs2r(EmitterContext context)
{
InstCs2r op = context.GetOp<InstCs2r>();
context.Config.GpuAccessor.Log("Shader instruction Cs2r is not implemented.");
}
public static void FchkR(EmitterContext context)
{
InstFchkR op = context.GetOp<InstFchkR>();
context.Config.GpuAccessor.Log("Shader instruction FchkR is not implemented.");
}
public static void FchkI(EmitterContext context)
{
InstFchkI op = context.GetOp<InstFchkI>();
context.Config.GpuAccessor.Log("Shader instruction FchkI is not implemented.");
}
public static void FchkC(EmitterContext context)
{
InstFchkC op = context.GetOp<InstFchkC>();
context.Config.GpuAccessor.Log("Shader instruction FchkC is not implemented.");
}
public static void Getcrsptr(EmitterContext context)
{
InstGetcrsptr op = context.GetOp<InstGetcrsptr>();
context.Config.GpuAccessor.Log("Shader instruction Getcrsptr is not implemented.");
}
public static void Getlmembase(EmitterContext context)
{
InstGetlmembase op = context.GetOp<InstGetlmembase>();
context.Config.GpuAccessor.Log("Shader instruction Getlmembase is not implemented.");
}
public static void Ide(EmitterContext context)
{
InstIde op = context.GetOp<InstIde>();
context.Config.GpuAccessor.Log("Shader instruction Ide is not implemented.");
}
public static void IdpR(EmitterContext context)
{
InstIdpR op = context.GetOp<InstIdpR>();
context.Config.GpuAccessor.Log("Shader instruction IdpR is not implemented.");
}
public static void IdpC(EmitterContext context)
{
InstIdpC op = context.GetOp<InstIdpC>();
context.Config.GpuAccessor.Log("Shader instruction IdpC is not implemented.");
}
public static void ImadspR(EmitterContext context)
{
InstImadspR op = context.GetOp<InstImadspR>();
context.Config.GpuAccessor.Log("Shader instruction ImadspR is not implemented.");
}
public static void ImadspI(EmitterContext context)
{
InstImadspI op = context.GetOp<InstImadspI>();
context.Config.GpuAccessor.Log("Shader instruction ImadspI is not implemented.");
}
public static void ImadspC(EmitterContext context)
{
InstImadspC op = context.GetOp<InstImadspC>();
context.Config.GpuAccessor.Log("Shader instruction ImadspC is not implemented.");
}
public static void ImadspRc(EmitterContext context)
{
InstImadspRc op = context.GetOp<InstImadspRc>();
context.Config.GpuAccessor.Log("Shader instruction ImadspRc is not implemented.");
}
public static void Jcal(EmitterContext context)
{
InstJcal op = context.GetOp<InstJcal>();
context.Config.GpuAccessor.Log("Shader instruction Jcal is not implemented.");
}
public static void Jmp(EmitterContext context)
{
InstJmp op = context.GetOp<InstJmp>();
context.Config.GpuAccessor.Log("Shader instruction Jmp is not implemented.");
}
public static void Jmx(EmitterContext context)
{
InstJmx op = context.GetOp<InstJmx>();
context.Config.GpuAccessor.Log("Shader instruction Jmx is not implemented.");
}
public static void Ld(EmitterContext context)
{
InstLd op = context.GetOp<InstLd>();
context.Config.GpuAccessor.Log("Shader instruction Ld is not implemented.");
}
public static void Lepc(EmitterContext context)
{
InstLepc op = context.GetOp<InstLepc>();
context.Config.GpuAccessor.Log("Shader instruction Lepc is not implemented.");
}
public static void Longjmp(EmitterContext context)
{
InstLongjmp op = context.GetOp<InstLongjmp>();
context.Config.GpuAccessor.Log("Shader instruction Longjmp is not implemented.");
}
public static void P2rR(EmitterContext context)
{
InstP2rR op = context.GetOp<InstP2rR>();
context.Config.GpuAccessor.Log("Shader instruction P2rR is not implemented.");
}
public static void P2rI(EmitterContext context)
{
InstP2rI op = context.GetOp<InstP2rI>();
context.Config.GpuAccessor.Log("Shader instruction P2rI is not implemented.");
}
public static void P2rC(EmitterContext context)
{
InstP2rC op = context.GetOp<InstP2rC>();
context.Config.GpuAccessor.Log("Shader instruction P2rC is not implemented.");
}
public static void Pexit(EmitterContext context)
{
InstPexit op = context.GetOp<InstPexit>();
context.Config.GpuAccessor.Log("Shader instruction Pexit is not implemented.");
}
public static void Pixld(EmitterContext context)
{
InstPixld op = context.GetOp<InstPixld>();
context.Config.GpuAccessor.Log("Shader instruction Pixld is not implemented.");
}
public static void Plongjmp(EmitterContext context)
{
InstPlongjmp op = context.GetOp<InstPlongjmp>();
context.Config.GpuAccessor.Log("Shader instruction Plongjmp is not implemented.");
}
public static void Pret(EmitterContext context)
{
InstPret op = context.GetOp<InstPret>();
context.Config.GpuAccessor.Log("Shader instruction Pret is not implemented.");
}
public static void PrmtR(EmitterContext context)
{
InstPrmtR op = context.GetOp<InstPrmtR>();
context.Config.GpuAccessor.Log("Shader instruction PrmtR is not implemented.");
}
public static void PrmtI(EmitterContext context)
{
InstPrmtI op = context.GetOp<InstPrmtI>();
context.Config.GpuAccessor.Log("Shader instruction PrmtI is not implemented.");
}
public static void PrmtC(EmitterContext context)
{
InstPrmtC op = context.GetOp<InstPrmtC>();
context.Config.GpuAccessor.Log("Shader instruction PrmtC is not implemented.");
}
public static void PrmtRc(EmitterContext context)
{
InstPrmtRc op = context.GetOp<InstPrmtRc>();
context.Config.GpuAccessor.Log("Shader instruction PrmtRc is not implemented.");
}
public static void R2b(EmitterContext context)
{
InstR2b op = context.GetOp<InstR2b>();
context.Config.GpuAccessor.Log("Shader instruction R2b is not implemented.");
}
public static void Ram(EmitterContext context)
{
InstRam op = context.GetOp<InstRam>();
context.Config.GpuAccessor.Log("Shader instruction Ram is not implemented.");
}
public static void Rtt(EmitterContext context)
{
InstRtt op = context.GetOp<InstRtt>();
context.Config.GpuAccessor.Log("Shader instruction Rtt is not implemented.");
}
public static void Sam(EmitterContext context)
{
InstSam op = context.GetOp<InstSam>();
context.Config.GpuAccessor.Log("Shader instruction Sam is not implemented.");
}
public static void Setcrsptr(EmitterContext context)
{
InstSetcrsptr op = context.GetOp<InstSetcrsptr>();
context.Config.GpuAccessor.Log("Shader instruction Setcrsptr is not implemented.");
}
public static void Setlmembase(EmitterContext context)
{
InstSetlmembase op = context.GetOp<InstSetlmembase>();
context.Config.GpuAccessor.Log("Shader instruction Setlmembase is not implemented.");
}
public static void St(EmitterContext context)
{
InstSt op = context.GetOp<InstSt>();
context.Config.GpuAccessor.Log("Shader instruction St is not implemented.");
}
public static void Stp(EmitterContext context)
{
InstStp op = context.GetOp<InstStp>();
context.Config.GpuAccessor.Log("Shader instruction Stp is not implemented.");
}
public static void Txa(EmitterContext context)
{
InstTxa op = context.GetOp<InstTxa>();
context.Config.GpuAccessor.Log("Shader instruction Txa is not implemented.");
}
public static void Vabsdiff(EmitterContext context)
{
InstVabsdiff op = context.GetOp<InstVabsdiff>();
context.Config.GpuAccessor.Log("Shader instruction Vabsdiff is not implemented.");
}
public static void Vabsdiff4(EmitterContext context)
{
InstVabsdiff4 op = context.GetOp<InstVabsdiff4>();
context.Config.GpuAccessor.Log("Shader instruction Vabsdiff4 is not implemented.");
}
public static void Vadd(EmitterContext context)
{
InstVadd op = context.GetOp<InstVadd>();
context.Config.GpuAccessor.Log("Shader instruction Vadd is not implemented.");
}
public static void Votevtg(EmitterContext context)
{
InstVotevtg op = context.GetOp<InstVotevtg>();
context.Config.GpuAccessor.Log("Shader instruction Votevtg is not implemented.");
}
public static void Vset(EmitterContext context)
{
InstVset op = context.GetOp<InstVset>();
context.Config.GpuAccessor.Log("Shader instruction Vset is not implemented.");
}
public static void Vshl(EmitterContext context)
{
InstVshl op = context.GetOp<InstVshl>();
context.Config.GpuAccessor.Log("Shader instruction Vshl is not implemented.");
}
public static void Vshr(EmitterContext context)
{
InstVshr op = context.GetOp<InstVshr>();
context.Config.GpuAccessor.Log("Shader instruction Vshr is not implemented.");
}
}
}

View file

@ -0,0 +1,160 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using System;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static class InstEmitAluHelper
{
public static long GetIntMin(IDstFmt type)
{
return type switch
{
IDstFmt.U16 => ushort.MinValue,
IDstFmt.S16 => short.MinValue,
IDstFmt.U32 => uint.MinValue,
IDstFmt.S32 => int.MinValue,
_ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.")
};
}
public static long GetIntMax(IDstFmt type)
{
return type switch
{
IDstFmt.U16 => ushort.MaxValue,
IDstFmt.S16 => short.MaxValue,
IDstFmt.U32 => uint.MaxValue,
IDstFmt.S32 => int.MaxValue,
_ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.")
};
}
public static long GetIntMin(ISrcDstFmt type)
{
return type switch
{
ISrcDstFmt.U8 => byte.MinValue,
ISrcDstFmt.S8 => sbyte.MinValue,
ISrcDstFmt.U16 => ushort.MinValue,
ISrcDstFmt.S16 => short.MinValue,
ISrcDstFmt.U32 => uint.MinValue,
ISrcDstFmt.S32 => int.MinValue,
_ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.")
};
}
public static long GetIntMax(ISrcDstFmt type)
{
return type switch
{
ISrcDstFmt.U8 => byte.MaxValue,
ISrcDstFmt.S8 => sbyte.MaxValue,
ISrcDstFmt.U16 => ushort.MaxValue,
ISrcDstFmt.S16 => short.MaxValue,
ISrcDstFmt.U32 => uint.MaxValue,
ISrcDstFmt.S32 => int.MaxValue,
_ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.")
};
}
public static Operand GetPredLogicalOp(EmitterContext context, BoolOp logicOp, Operand input, Operand pred)
{
return logicOp switch
{
BoolOp.And => context.BitwiseAnd(input, pred),
BoolOp.Or => context.BitwiseOr(input, pred),
BoolOp.Xor => context.BitwiseExclusiveOr(input, pred),
_ => input
};
}
public static Operand Extend(EmitterContext context, Operand src, VectorSelect type)
{
return type switch
{
VectorSelect.U8B0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8),
VectorSelect.U8B1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8),
VectorSelect.U8B2 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8),
VectorSelect.U8B3 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8),
VectorSelect.U16H0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16),
VectorSelect.U16H1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16),
VectorSelect.S8B0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8),
VectorSelect.S8B1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8),
VectorSelect.S8B2 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8),
VectorSelect.S8B3 => SignExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8),
VectorSelect.S16H0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16),
VectorSelect.S16H1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16),
_ => src
};
}
public static void SetZnFlags(EmitterContext context, Operand dest, bool setCC, bool extended = false)
{
if (!setCC)
{
return;
}
if (extended)
{
// When the operation is extended, it means we are doing
// the operation on a long word with any number of bits,
// so we need to AND the zero flag from result with the
// previous result when extended is specified, to ensure
// we have ZF set only if all words are zero, and not just
// the last one.
Operand oldZF = GetZF();
Operand res = context.BitwiseAnd(context.ICompareEqual(dest, Const(0)), oldZF);
context.Copy(GetZF(), res);
}
else
{
context.Copy(GetZF(), context.ICompareEqual(dest, Const(0)));
}
context.Copy(GetNF(), context.ICompareLess(dest, Const(0)));
}
public static void SetFPZnFlags(EmitterContext context, Operand dest, bool setCC, Instruction fpType = Instruction.FP32)
{
if (setCC)
{
Operand zero = ConstF(0);
if (fpType == Instruction.FP64)
{
zero = context.FP32ConvertToFP64(zero);
}
context.Copy(GetZF(), context.FPCompareEqual(dest, zero, fpType));
context.Copy(GetNF(), context.FPCompareLess (dest, zero, fpType));
}
}
public static (Operand, Operand) NegateLong(EmitterContext context, Operand low, Operand high)
{
low = context.BitwiseNot(low);
high = context.BitwiseNot(high);
low = AddWithCarry(context, low, Const(1), out Operand carryOut);
high = context.IAdd(high, carryOut);
return (low, high);
}
public static Operand AddWithCarry(EmitterContext context, Operand lhs, Operand rhs, out Operand carryOut)
{
Operand result = context.IAdd(lhs, rhs);
// C = Rd < Rn
carryOut = context.INegate(context.ICompareLessUnsigned(result, lhs));
return result;
}
}
}

View file

@ -0,0 +1,383 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Al2p(EmitterContext context)
{
InstAl2p op = context.GetOp<InstAl2p>();
context.Copy(GetDest(op.Dest), context.IAdd(GetSrcReg(context, op.SrcA), Const(op.Imm11)));
}
public static void Ald(EmitterContext context)
{
InstAld op = context.GetOp<InstAld>();
// Some of those attributes are per invocation,
// so we should ignore any primitive vertex indexing for those.
bool hasPrimitiveVertex = AttributeMap.HasPrimitiveVertex(context.Config.Stage, op.O) && !op.P;
if (!op.Phys)
{
hasPrimitiveVertex &= HasPrimitiveVertex(op.Imm11);
}
Operand primVertex = hasPrimitiveVertex ? context.Copy(GetSrcReg(context, op.SrcB)) : null;
for (int index = 0; index < (int)op.AlSize + 1; index++)
{
Register rd = new Register(op.Dest + index, RegisterType.Gpr);
if (rd.IsRZ)
{
break;
}
if (op.Phys)
{
Operand offset = context.ISubtract(GetSrcReg(context, op.SrcA), Const(AttributeConsts.UserAttributeBase));
Operand vecIndex = context.ShiftRightU32(offset, Const(4));
Operand elemIndex = context.BitwiseAnd(context.ShiftRightU32(offset, Const(2)), Const(3));
StorageKind storageKind = op.O ? StorageKind.Output : StorageKind.Input;
context.Copy(Register(rd), context.Load(storageKind, IoVariable.UserDefined, primVertex, vecIndex, elemIndex));
}
else if (op.SrcB == RegisterConsts.RegisterZeroIndex || op.P)
{
int offset = FixedFuncToUserAttribute(context.Config, op.Imm11 + index * 4, op.O);
context.FlagAttributeRead(offset);
bool isOutput = op.O && CanLoadOutput(offset);
if (!op.P && !isOutput && TryConvertIdToIndexForVulkan(context, offset, out Operand value))
{
context.Copy(Register(rd), value);
}
else
{
context.Copy(Register(rd), AttributeMap.GenerateAttributeLoad(context, primVertex, offset, isOutput, op.P));
}
}
else
{
int offset = FixedFuncToUserAttribute(context.Config, op.Imm11 + index * 4, op.O);
context.FlagAttributeRead(offset);
bool isOutput = op.O && CanLoadOutput(offset);
context.Copy(Register(rd), AttributeMap.GenerateAttributeLoad(context, primVertex, offset, isOutput, false));
}
}
}
public static void Ast(EmitterContext context)
{
InstAst op = context.GetOp<InstAst>();
for (int index = 0; index < (int)op.AlSize + 1; index++)
{
if (op.SrcB + index > RegisterConsts.RegisterZeroIndex)
{
break;
}
Register rd = new Register(op.SrcB + index, RegisterType.Gpr);
if (op.Phys)
{
Operand offset = context.ISubtract(GetSrcReg(context, op.SrcA), Const(AttributeConsts.UserAttributeBase));
Operand vecIndex = context.ShiftRightU32(offset, Const(4));
Operand elemIndex = context.BitwiseAnd(context.ShiftRightU32(offset, Const(2)), Const(3));
Operand invocationId = AttributeMap.HasInvocationId(context.Config.Stage, isOutput: true)
? context.Load(StorageKind.Input, IoVariable.InvocationId)
: null;
context.Store(StorageKind.Output, IoVariable.UserDefined, invocationId, vecIndex, elemIndex, Register(rd));
}
else
{
// TODO: Support indirect stores using Ra.
int offset = op.Imm11 + index * 4;
if (!context.Config.IsUsedOutputAttribute(offset))
{
return;
}
offset = FixedFuncToUserAttribute(context.Config, offset, isOutput: true);
context.FlagAttributeWritten(offset);
AttributeMap.GenerateAttributeStore(context, offset, op.P, Register(rd));
}
}
}
public static void Ipa(EmitterContext context)
{
InstIpa op = context.GetOp<InstIpa>();
context.FlagAttributeRead(op.Imm10);
Operand res;
bool isFixedFunc = false;
if (op.Idx)
{
Operand offset = context.ISubtract(GetSrcReg(context, op.SrcA), Const(AttributeConsts.UserAttributeBase));
Operand vecIndex = context.ShiftRightU32(offset, Const(4));
Operand elemIndex = context.BitwiseAnd(context.ShiftRightU32(offset, Const(2)), Const(3));
res = context.Load(StorageKind.Input, IoVariable.UserDefined, null, vecIndex, elemIndex);
res = context.FPMultiply(res, context.Load(StorageKind.Input, IoVariable.FragmentCoord, null, Const(3)));
}
else
{
isFixedFunc = TryFixedFuncToUserAttributeIpa(context, op.Imm10, out res);
if (op.Imm10 >= AttributeConsts.UserAttributeBase && op.Imm10 < AttributeConsts.UserAttributeEnd)
{
int index = (op.Imm10 - AttributeConsts.UserAttributeBase) >> 4;
if (context.Config.ImapTypes[index].GetFirstUsedType() == PixelImap.Perspective)
{
res = context.FPMultiply(res, context.Load(StorageKind.Input, IoVariable.FragmentCoord, null, Const(3)));
}
}
else if (op.Imm10 == AttributeConsts.PositionX || op.Imm10 == AttributeConsts.PositionY)
{
// FragCoord X/Y must be divided by the render target scale, if resolution scaling is active,
// because the shader code is not expecting scaled values.
res = context.FPDivide(res, context.Load(StorageKind.Input, IoVariable.SupportBlockRenderScale, null, Const(0)));
}
else if (op.Imm10 == AttributeConsts.FrontFacing && context.Config.GpuAccessor.QueryHostHasFrontFacingBug())
{
// gl_FrontFacing sometimes has incorrect (flipped) values depending how it is accessed on Intel GPUs.
// This weird trick makes it behave.
res = context.ICompareLess(context.INegate(context.IConvertS32ToFP32(res)), Const(0));
}
}
if (op.IpaOp == IpaOp.Multiply && !isFixedFunc)
{
Operand srcB = GetSrcReg(context, op.SrcB);
res = context.FPMultiply(res, srcB);
}
res = context.FPSaturate(res, op.Sat);
context.Copy(GetDest(op.Dest), res);
}
public static void Isberd(EmitterContext context)
{
InstIsberd op = context.GetOp<InstIsberd>();
// This instruction performs a load from ISBE (Internal Stage Buffer Entry) memory.
// Here, we just propagate the offset, as the result from this instruction is usually
// used with ALD to perform vertex load on geometry or tessellation shaders.
// The offset is calculated as (PrimitiveIndex * VerticesPerPrimitive) + VertexIndex.
// Since we hardcode PrimitiveIndex to zero, then the offset will be just VertexIndex.
context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcA));
}
public static void OutR(EmitterContext context)
{
InstOutR op = context.GetOp<InstOutR>();
EmitOut(context, op.OutType.HasFlag(OutType.Emit), op.OutType.HasFlag(OutType.Cut));
}
public static void OutI(EmitterContext context)
{
InstOutI op = context.GetOp<InstOutI>();
EmitOut(context, op.OutType.HasFlag(OutType.Emit), op.OutType.HasFlag(OutType.Cut));
}
public static void OutC(EmitterContext context)
{
InstOutC op = context.GetOp<InstOutC>();
EmitOut(context, op.OutType.HasFlag(OutType.Emit), op.OutType.HasFlag(OutType.Cut));
}
private static void EmitOut(EmitterContext context, bool emit, bool cut)
{
if (!(emit || cut))
{
context.Config.GpuAccessor.Log("Invalid OUT encoding.");
}
if (emit)
{
if (context.Config.LastInVertexPipeline)
{
context.PrepareForVertexReturn(out var tempXLocal, out var tempYLocal, out var tempZLocal);
context.EmitVertex();
// Restore output position value before transformation.
if (tempXLocal != null)
{
context.Copy(context.Load(StorageKind.Input, IoVariable.Position, null, Const(0)), tempXLocal);
}
if (tempYLocal != null)
{
context.Copy(context.Load(StorageKind.Input, IoVariable.Position, null, Const(1)), tempYLocal);
}
if (tempZLocal != null)
{
context.Copy(context.Load(StorageKind.Input, IoVariable.Position, null, Const(2)), tempZLocal);
}
}
else
{
context.EmitVertex();
}
}
if (cut)
{
context.EndPrimitive();
}
}
private static bool HasPrimitiveVertex(int attr)
{
return attr != AttributeConsts.PrimitiveId &&
attr != AttributeConsts.TessCoordX &&
attr != AttributeConsts.TessCoordY;
}
private static bool CanLoadOutput(int attr)
{
return attr != AttributeConsts.TessCoordX && attr != AttributeConsts.TessCoordY;
}
private static bool TryFixedFuncToUserAttributeIpa(EmitterContext context, int attr, out Operand selectedAttr)
{
if (attr >= AttributeConsts.FrontColorDiffuseR && attr < AttributeConsts.BackColorDiffuseR)
{
// TODO: If two sided rendering is enabled, then this should return
// FrontColor if the fragment is front facing, and back color otherwise.
selectedAttr = GenerateIpaLoad(context, FixedFuncToUserAttribute(context.Config, attr, isOutput: false));
return true;
}
else if (attr == AttributeConsts.FogCoord)
{
// TODO: We likely need to emulate the fixed-function functionality for FogCoord here.
selectedAttr = GenerateIpaLoad(context, FixedFuncToUserAttribute(context.Config, attr, isOutput: false));
return true;
}
else if (attr >= AttributeConsts.BackColorDiffuseR && attr < AttributeConsts.ClipDistance0)
{
selectedAttr = ConstF(((attr >> 2) & 3) == 3 ? 1f : 0f);
return true;
}
else if (attr >= AttributeConsts.TexCoordBase && attr < AttributeConsts.TexCoordEnd)
{
selectedAttr = GenerateIpaLoad(context, FixedFuncToUserAttribute(context.Config, attr, isOutput: false));
return true;
}
selectedAttr = GenerateIpaLoad(context, attr);
return false;
}
private static Operand GenerateIpaLoad(EmitterContext context, int offset)
{
return AttributeMap.GenerateAttributeLoad(context, null, offset, isOutput: false, isPerPatch: false);
}
private static int FixedFuncToUserAttribute(ShaderConfig config, int attr, bool isOutput)
{
bool supportsLayerFromVertexOrTess = config.GpuAccessor.QueryHostSupportsLayerVertexTessellation();
int fixedStartAttr = supportsLayerFromVertexOrTess ? 0 : 1;
if (attr == AttributeConsts.Layer && config.Stage != ShaderStage.Geometry && !supportsLayerFromVertexOrTess)
{
attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.Layer, 0, isOutput);
config.SetLayerOutputAttribute(attr);
}
else if (attr == AttributeConsts.FogCoord)
{
attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.FogCoord, fixedStartAttr, isOutput);
}
else if (attr >= AttributeConsts.FrontColorDiffuseR && attr < AttributeConsts.ClipDistance0)
{
attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.FrontColorDiffuseR, fixedStartAttr + 1, isOutput);
}
else if (attr >= AttributeConsts.TexCoordBase && attr < AttributeConsts.TexCoordEnd)
{
attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.TexCoordBase, fixedStartAttr + 5, isOutput);
}
return attr;
}
private static int FixedFuncToUserAttribute(ShaderConfig config, int attr, int baseAttr, int baseIndex, bool isOutput)
{
int index = (attr - baseAttr) >> 4;
int userAttrIndex = config.GetFreeUserAttribute(isOutput, baseIndex + index);
if ((uint)userAttrIndex < Constants.MaxAttributes)
{
attr = AttributeConsts.UserAttributeBase + userAttrIndex * 16 + (attr & 0xf);
if (isOutput)
{
config.SetOutputUserAttributeFixedFunc(userAttrIndex);
}
else
{
config.SetInputUserAttributeFixedFunc(userAttrIndex);
}
}
else
{
config.GpuAccessor.Log($"No enough user attributes for fixed attribute offset 0x{attr:X}.");
}
return attr;
}
private static bool TryConvertIdToIndexForVulkan(EmitterContext context, int attr, out Operand value)
{
if (context.Config.Options.TargetApi == TargetApi.Vulkan)
{
if (attr == AttributeConsts.InstanceId)
{
value = context.ISubtract(
context.Load(StorageKind.Input, IoVariable.InstanceIndex),
context.Load(StorageKind.Input, IoVariable.BaseInstance));
return true;
}
else if (attr == AttributeConsts.VertexId)
{
value = context.Load(StorageKind.Input, IoVariable.VertexIndex);
return true;
}
}
value = null;
return false;
}
}
}

View file

@ -0,0 +1,44 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.Translation;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Bar(EmitterContext context)
{
InstBar op = context.GetOp<InstBar>();
// TODO: Support other modes.
if (op.BarOp == BarOp.Sync)
{
context.Barrier();
}
else
{
context.Config.GpuAccessor.Log($"Invalid barrier mode: {op.BarOp}.");
}
}
public static void Depbar(EmitterContext context)
{
InstDepbar op = context.GetOp<InstDepbar>();
// No operation.
}
public static void Membar(EmitterContext context)
{
InstMembar op = context.GetOp<InstMembar>();
if (op.Membar == Decoders.Membar.Cta)
{
context.GroupMemoryBarrier();
}
else
{
context.MemoryBarrier();
}
}
}
}

View file

@ -0,0 +1,194 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void BfeR(EmitterContext context)
{
InstBfeR op = context.GetOp<InstBfeR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitBfe(context, srcA, srcB, op.Dest, op.Brev, op.Signed);
}
public static void BfeI(EmitterContext context)
{
InstBfeI op = context.GetOp<InstBfeI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitBfe(context, srcA, srcB, op.Dest, op.Brev, op.Signed);
}
public static void BfeC(EmitterContext context)
{
InstBfeC op = context.GetOp<InstBfeC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitBfe(context, srcA, srcB, op.Dest, op.Brev, op.Signed);
}
public static void BfiR(EmitterContext context)
{
InstBfiR op = context.GetOp<InstBfiR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitBfi(context, srcA, srcB, srcC, op.Dest);
}
public static void BfiI(EmitterContext context)
{
InstBfiI op = context.GetOp<InstBfiI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
var srcC = GetSrcReg(context, op.SrcC);
EmitBfi(context, srcA, srcB, srcC, op.Dest);
}
public static void BfiC(EmitterContext context)
{
InstBfiC op = context.GetOp<InstBfiC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitBfi(context, srcA, srcB, srcC, op.Dest);
}
public static void BfiRc(EmitterContext context)
{
InstBfiRc op = context.GetOp<InstBfiRc>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcC);
var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitBfi(context, srcA, srcB, srcC, op.Dest);
}
public static void FloR(EmitterContext context)
{
InstFloR op = context.GetOp<InstFloR>();
EmitFlo(context, GetSrcReg(context, op.SrcB), op.Dest, op.NegB, op.Sh, op.Signed);
}
public static void FloI(EmitterContext context)
{
InstFloI op = context.GetOp<InstFloI>();
EmitFlo(context, GetSrcImm(context, Imm20ToSInt(op.Imm20)), op.Dest, op.NegB, op.Sh, op.Signed);
}
public static void FloC(EmitterContext context)
{
InstFloC op = context.GetOp<InstFloC>();
EmitFlo(context, GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.NegB, op.Sh, op.Signed);
}
public static void PopcR(EmitterContext context)
{
InstPopcR op = context.GetOp<InstPopcR>();
EmitPopc(context, GetSrcReg(context, op.SrcB), op.Dest, op.NegB);
}
public static void PopcI(EmitterContext context)
{
InstPopcI op = context.GetOp<InstPopcI>();
EmitPopc(context, GetSrcImm(context, Imm20ToSInt(op.Imm20)), op.Dest, op.NegB);
}
public static void PopcC(EmitterContext context)
{
InstPopcC op = context.GetOp<InstPopcC>();
EmitPopc(context, GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.NegB);
}
private static void EmitBfe(
EmitterContext context,
Operand srcA,
Operand srcB,
int rd,
bool bitReverse,
bool isSigned)
{
if (bitReverse)
{
srcA = context.BitfieldReverse(srcA);
}
Operand position = context.BitwiseAnd(srcB, Const(0xff));
Operand size = context.BitfieldExtractU32(srcB, Const(8), Const(8));
Operand res = isSigned
? context.BitfieldExtractS32(srcA, position, size)
: context.BitfieldExtractU32(srcA, position, size);
context.Copy(GetDest(rd), res);
// TODO: CC, X, corner cases.
}
private static void EmitBfi(EmitterContext context, Operand srcA, Operand srcB, Operand srcC, int rd)
{
Operand position = context.BitwiseAnd(srcB, Const(0xff));
Operand size = context.BitfieldExtractU32(srcB, Const(8), Const(8));
Operand res = context.BitfieldInsert(srcC, srcA, position, size);
context.Copy(GetDest(rd), res);
}
private static void EmitFlo(EmitterContext context, Operand src, int rd, bool invert, bool sh, bool isSigned)
{
Operand srcB = context.BitwiseNot(src, invert);
Operand res;
if (sh)
{
res = context.FindLSB(context.BitfieldReverse(srcB));
}
else
{
res = isSigned
? context.FindMSBS32(srcB)
: context.FindMSBU32(srcB);
}
context.Copy(GetDest(rd), res);
}
private static void EmitPopc(EmitterContext context, Operand src, int rd, bool invert)
{
Operand srcB = context.BitwiseNot(src, invert);
Operand res = context.BitCount(srcB);
context.Copy(GetDest(rd), res);
}
}
}

View file

@ -0,0 +1,87 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Cset(EmitterContext context)
{
InstCset op = context.GetOp<InstCset>();
Operand res = GetCondition(context, op.Ccc);
Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
res = GetPredLogicalOp(context, op.Bop, res, srcPred);
Operand dest = GetDest(op.Dest);
if (op.BVal)
{
context.Copy(dest, context.ConditionalSelect(res, ConstF(1), Const(0)));
}
else
{
context.Copy(dest, res);
}
// TODO: CC.
}
public static void Csetp(EmitterContext context)
{
InstCsetp op = context.GetOp<InstCsetp>();
Operand p0Res = GetCondition(context, op.Ccc);
Operand p1Res = context.BitwiseNot(p0Res);
Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
p0Res = GetPredLogicalOp(context, op.Bop, p0Res, srcPred);
p1Res = GetPredLogicalOp(context, op.Bop, p1Res, srcPred);
context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res);
context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res);
// TODO: CC.
}
private static Operand GetCondition(EmitterContext context, Ccc cond, int defaultCond = IrConsts.True)
{
return cond switch
{
Ccc.F => Const(IrConsts.False),
Ccc.Lt => context.BitwiseExclusiveOr(context.BitwiseAnd(GetNF(), context.BitwiseNot(GetZF())), GetVF()),
Ccc.Eq => context.BitwiseAnd(context.BitwiseNot(GetNF()), GetZF()),
Ccc.Le => context.BitwiseExclusiveOr(GetNF(), context.BitwiseOr(GetZF(), GetVF())),
Ccc.Gt => context.BitwiseNot(context.BitwiseOr(context.BitwiseExclusiveOr(GetNF(), GetVF()), GetZF())),
Ccc.Ne => context.BitwiseNot(GetZF()),
Ccc.Ge => context.BitwiseNot(context.BitwiseExclusiveOr(GetNF(), GetVF())),
Ccc.Num => context.BitwiseNot(context.BitwiseAnd(GetNF(), GetZF())),
Ccc.Nan => context.BitwiseAnd(GetNF(), GetZF()),
Ccc.Ltu => context.BitwiseExclusiveOr(GetNF(), GetVF()),
Ccc.Equ => GetZF(),
Ccc.Leu => context.BitwiseOr(context.BitwiseExclusiveOr(GetNF(), GetVF()), GetZF()),
Ccc.Gtu => context.BitwiseExclusiveOr(context.BitwiseNot(GetNF()), context.BitwiseOr(GetVF(), GetZF())),
Ccc.Neu => context.BitwiseOr(GetNF(), context.BitwiseNot(GetZF())),
Ccc.Geu => context.BitwiseExclusiveOr(context.BitwiseOr(context.BitwiseNot(GetNF()), GetZF()), GetVF()),
Ccc.T => Const(IrConsts.True),
Ccc.Off => context.BitwiseNot(GetVF()),
Ccc.Lo => context.BitwiseNot(GetCF()),
Ccc.Sff => context.BitwiseNot(GetNF()),
Ccc.Ls => context.BitwiseOr(GetZF(), context.BitwiseNot(GetCF())),
Ccc.Hi => context.BitwiseAnd(GetCF(), context.BitwiseNot(GetZF())),
Ccc.Sft => GetNF(),
Ccc.Hs => GetCF(),
Ccc.Oft => GetVF(),
Ccc.Rle => context.BitwiseOr(GetNF(), GetZF()),
Ccc.Rgt => context.BitwiseNot(context.BitwiseOr(GetNF(), GetZF())),
_ => Const(defaultCond)
};
}
}
}

View file

@ -0,0 +1,425 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using System;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void F2fR(EmitterContext context)
{
InstF2fR op = context.GetOp<InstF2fR>();
var src = UnpackReg(context, op.SrcFmt, op.Sh, op.SrcB);
EmitF2F(context, op.SrcFmt, op.DstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB, op.Sat);
}
public static void F2fI(EmitterContext context)
{
InstF2fI op = context.GetOp<InstF2fI>();
var src = UnpackImm(context, op.SrcFmt, op.Sh, Imm20ToFloat(op.Imm20));
EmitF2F(context, op.SrcFmt, op.DstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB, op.Sat);
}
public static void F2fC(EmitterContext context)
{
InstF2fC op = context.GetOp<InstF2fC>();
var src = UnpackCbuf(context, op.SrcFmt, op.Sh, op.CbufSlot, op.CbufOffset);
EmitF2F(context, op.SrcFmt, op.DstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB, op.Sat);
}
public static void F2iR(EmitterContext context)
{
InstF2iR op = context.GetOp<InstF2iR>();
var src = UnpackReg(context, op.SrcFmt, op.Sh, op.SrcB);
EmitF2I(context, op.SrcFmt, op.IDstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB);
}
public static void F2iI(EmitterContext context)
{
InstF2iI op = context.GetOp<InstF2iI>();
var src = UnpackImm(context, op.SrcFmt, op.Sh, Imm20ToFloat(op.Imm20));
EmitF2I(context, op.SrcFmt, op.IDstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB);
}
public static void F2iC(EmitterContext context)
{
InstF2iC op = context.GetOp<InstF2iC>();
var src = UnpackCbuf(context, op.SrcFmt, op.Sh, op.CbufSlot, op.CbufOffset);
EmitF2I(context, op.SrcFmt, op.IDstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB);
}
public static void I2fR(EmitterContext context)
{
InstI2fR op = context.GetOp<InstI2fR>();
var src = GetSrcReg(context, op.SrcB);
EmitI2F(context, op.ISrcFmt, op.DstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB);
}
public static void I2fI(EmitterContext context)
{
InstI2fI op = context.GetOp<InstI2fI>();
var src = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitI2F(context, op.ISrcFmt, op.DstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB);
}
public static void I2fC(EmitterContext context)
{
InstI2fC op = context.GetOp<InstI2fC>();
var src = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitI2F(context, op.ISrcFmt, op.DstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB);
}
public static void I2iR(EmitterContext context)
{
InstI2iR op = context.GetOp<InstI2iR>();
var src = GetSrcReg(context, op.SrcB);
EmitI2I(context, op.ISrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat, op.WriteCC);
}
public static void I2iI(EmitterContext context)
{
InstI2iI op = context.GetOp<InstI2iI>();
var src = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitI2I(context, op.ISrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat, op.WriteCC);
}
public static void I2iC(EmitterContext context)
{
InstI2iC op = context.GetOp<InstI2iC>();
var src = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitI2I(context, op.ISrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat, op.WriteCC);
}
private static void EmitF2F(
EmitterContext context,
DstFmt srcType,
DstFmt dstType,
IntegerRound roundingMode,
Operand src,
int rd,
bool absolute,
bool negate,
bool saturate)
{
Operand srcB = context.FPAbsNeg(src, absolute, negate, srcType.ToInstFPType());
if (srcType == dstType)
{
srcB = roundingMode switch
{
IntegerRound.Round => context.FPRound(srcB, srcType.ToInstFPType()),
IntegerRound.Floor => context.FPFloor(srcB, srcType.ToInstFPType()),
IntegerRound.Ceil => context.FPCeiling(srcB, srcType.ToInstFPType()),
IntegerRound.Trunc => context.FPTruncate(srcB, srcType.ToInstFPType()),
_ => srcB
};
}
// We don't need to handle conversions between FP16 <-> FP32
// since we do FP16 operations as FP32 directly.
// FP16 <-> FP64 conversions are invalid.
if (srcType == DstFmt.F32 && dstType == DstFmt.F64)
{
srcB = context.FP32ConvertToFP64(srcB);
}
else if (srcType == DstFmt.F64 && dstType == DstFmt.F32)
{
srcB = context.FP64ConvertToFP32(srcB);
}
srcB = context.FPSaturate(srcB, saturate, dstType.ToInstFPType());
WriteFP(context, dstType, srcB, rd);
// TODO: CC.
}
private static void EmitF2I(
EmitterContext context,
DstFmt srcType,
IDstFmt dstType,
RoundMode2 roundingMode,
Operand src,
int rd,
bool absolute,
bool negate)
{
if (dstType == IDstFmt.U64)
{
context.Config.GpuAccessor.Log("Unimplemented 64-bits F2I.");
}
Instruction fpType = srcType.ToInstFPType();
bool isSignedInt = dstType == IDstFmt.S16 || dstType == IDstFmt.S32 || dstType == IDstFmt.S64;
bool isSmallInt = dstType == IDstFmt.U16 || dstType == IDstFmt.S16;
Operand srcB = context.FPAbsNeg(src, absolute, negate, fpType);
srcB = roundingMode switch
{
RoundMode2.Round => context.FPRound(srcB, fpType),
RoundMode2.Floor => context.FPFloor(srcB, fpType),
RoundMode2.Ceil => context.FPCeiling(srcB, fpType),
RoundMode2.Trunc => context.FPTruncate(srcB, fpType),
_ => srcB
};
if (!isSignedInt)
{
// Negative float to uint cast is undefined, so we clamp the value before conversion.
Operand c0 = srcType == DstFmt.F64 ? context.PackDouble2x32(0.0) : ConstF(0);
srcB = context.FPMaximum(srcB, c0, fpType);
}
if (srcType == DstFmt.F64)
{
srcB = isSignedInt
? context.FP64ConvertToS32(srcB)
: context.FP64ConvertToU32(srcB);
}
else
{
srcB = isSignedInt
? context.FP32ConvertToS32(srcB)
: context.FP32ConvertToU32(srcB);
}
if (isSmallInt)
{
int min = (int)GetIntMin(dstType);
int max = (int)GetIntMax(dstType);
srcB = isSignedInt
? context.IClampS32(srcB, Const(min), Const(max))
: context.IClampU32(srcB, Const(min), Const(max));
}
Operand dest = GetDest(rd);
context.Copy(dest, srcB);
// TODO: CC.
}
private static void EmitI2F(
EmitterContext context,
ISrcFmt srcType,
DstFmt dstType,
Operand src,
ByteSel byteSelection,
int rd,
bool absolute,
bool negate)
{
bool isSignedInt =
srcType == ISrcFmt.S8 ||
srcType == ISrcFmt.S16 ||
srcType == ISrcFmt.S32 ||
srcType == ISrcFmt.S64;
bool isSmallInt =
srcType == ISrcFmt.U16 ||
srcType == ISrcFmt.S16 ||
srcType == ISrcFmt.U8 ||
srcType == ISrcFmt.S8;
// TODO: Handle S/U64.
Operand srcB = context.IAbsNeg(src, absolute, negate);
if (isSmallInt)
{
int size = srcType == ISrcFmt.U16 || srcType == ISrcFmt.S16 ? 16 : 8;
srcB = isSignedInt
? context.BitfieldExtractS32(srcB, Const((int)byteSelection * 8), Const(size))
: context.BitfieldExtractU32(srcB, Const((int)byteSelection * 8), Const(size));
}
if (dstType == DstFmt.F64)
{
srcB = isSignedInt
? context.IConvertS32ToFP64(srcB)
: context.IConvertU32ToFP64(srcB);
}
else
{
srcB = isSignedInt
? context.IConvertS32ToFP32(srcB)
: context.IConvertU32ToFP32(srcB);
}
WriteFP(context, dstType, srcB, rd);
// TODO: CC.
}
private static void EmitI2I(
EmitterContext context,
ISrcDstFmt srcType,
ISrcDstFmt dstType,
Operand src,
ByteSel byteSelection,
int rd,
bool absolute,
bool negate,
bool saturate,
bool writeCC)
{
if ((srcType & ~ISrcDstFmt.S8) > ISrcDstFmt.U32 || (dstType & ~ISrcDstFmt.S8) > ISrcDstFmt.U32)
{
context.Config.GpuAccessor.Log("Invalid I2I encoding.");
return;
}
bool srcIsSignedInt =
srcType == ISrcDstFmt.S8 ||
srcType == ISrcDstFmt.S16 ||
srcType == ISrcDstFmt.S32;
bool dstIsSignedInt =
dstType == ISrcDstFmt.S8 ||
dstType == ISrcDstFmt.S16 ||
dstType == ISrcDstFmt.S32;
bool srcIsSmallInt =
srcType == ISrcDstFmt.U16 ||
srcType == ISrcDstFmt.S16 ||
srcType == ISrcDstFmt.U8 ||
srcType == ISrcDstFmt.S8;
if (srcIsSmallInt)
{
int size = srcType == ISrcDstFmt.U16 || srcType == ISrcDstFmt.S16 ? 16 : 8;
src = srcIsSignedInt
? context.BitfieldExtractS32(src, Const((int)byteSelection * 8), Const(size))
: context.BitfieldExtractU32(src, Const((int)byteSelection * 8), Const(size));
}
src = context.IAbsNeg(src, absolute, negate);
if (saturate)
{
int min = (int)GetIntMin(dstType);
int max = (int)GetIntMax(dstType);
src = dstIsSignedInt
? context.IClampS32(src, Const(min), Const(max))
: context.IClampU32(src, Const(min), Const(max));
}
context.Copy(GetDest(rd), src);
SetZnFlags(context, src, writeCC);
}
private static Operand UnpackReg(EmitterContext context, DstFmt floatType, bool h, int reg)
{
if (floatType == DstFmt.F32)
{
return GetSrcReg(context, reg);
}
else if (floatType == DstFmt.F16)
{
return GetHalfUnpacked(context, GetSrcReg(context, reg), HalfSwizzle.F16)[h ? 1 : 0];
}
else if (floatType == DstFmt.F64)
{
return GetSrcReg(context, reg, isFP64: true);
}
throw new ArgumentException($"Invalid floating point type \"{floatType}\".");
}
private static Operand UnpackCbuf(EmitterContext context, DstFmt floatType, bool h, int cbufSlot, int cbufOffset)
{
if (floatType == DstFmt.F32)
{
return GetSrcCbuf(context, cbufSlot, cbufOffset);
}
else if (floatType == DstFmt.F16)
{
return GetHalfUnpacked(context, GetSrcCbuf(context, cbufSlot, cbufOffset), HalfSwizzle.F16)[h ? 1 : 0];
}
else if (floatType == DstFmt.F64)
{
return GetSrcCbuf(context, cbufSlot, cbufOffset, isFP64: true);
}
throw new ArgumentException($"Invalid floating point type \"{floatType}\".");
}
private static Operand UnpackImm(EmitterContext context, DstFmt floatType, bool h, int imm)
{
if (floatType == DstFmt.F32)
{
return GetSrcImm(context, imm);
}
else if (floatType == DstFmt.F16)
{
return GetHalfUnpacked(context, GetSrcImm(context, imm), HalfSwizzle.F16)[h ? 1 : 0];
}
else if (floatType == DstFmt.F64)
{
return GetSrcImm(context, imm, isFP64: true);
}
throw new ArgumentException($"Invalid floating point type \"{floatType}\".");
}
private static void WriteFP(EmitterContext context, DstFmt type, Operand srcB, int rd)
{
Operand dest = GetDest(rd);
if (type == DstFmt.F32)
{
context.Copy(dest, srcB);
}
else if (type == DstFmt.F16)
{
context.Copy(dest, context.PackHalf2x16(srcB, ConstF(0)));
}
else /* if (type == FPType.FP64) */
{
Operand dest2 = GetDest2(rd);
context.Copy(dest, context.UnpackDouble2x32Low(srcB));
context.Copy(dest2, context.UnpackDouble2x32High(srcB));
}
}
private static Instruction ToInstFPType(this DstFmt type)
{
return type == DstFmt.F64 ? Instruction.FP64 : Instruction.FP32;
}
}
}

View file

@ -0,0 +1,532 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void DaddR(EmitterContext context)
{
InstDaddR op = context.GetOp<InstDaddR>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
EmitFadd(context, Instruction.FP64, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC);
}
public static void DaddI(EmitterContext context)
{
InstDaddI op = context.GetOp<InstDaddI>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
EmitFadd(context, Instruction.FP64, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC);
}
public static void DaddC(EmitterContext context)
{
InstDaddC op = context.GetOp<InstDaddC>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
EmitFadd(context, Instruction.FP64, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC);
}
public static void DfmaR(EmitterContext context)
{
InstDfmaR op = context.GetOp<InstDfmaR>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
var srcC = GetSrcReg(context, op.SrcC, isFP64: true);
EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC);
}
public static void DfmaI(EmitterContext context)
{
InstDfmaI op = context.GetOp<InstDfmaI>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
var srcC = GetSrcReg(context, op.SrcC, isFP64: true);
EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC);
}
public static void DfmaC(EmitterContext context)
{
InstDfmaC op = context.GetOp<InstDfmaC>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
var srcC = GetSrcReg(context, op.SrcC, isFP64: true);
EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC);
}
public static void DfmaRc(EmitterContext context)
{
InstDfmaRc op = context.GetOp<InstDfmaRc>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcReg(context, op.SrcC, isFP64: true);
var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC);
}
public static void DmulR(EmitterContext context)
{
InstDmulR op = context.GetOp<InstDmulR>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
EmitFmul(context, Instruction.FP64, MultiplyScale.NoScale, srcA, srcB, op.Dest, op.NegA, false, op.WriteCC);
}
public static void DmulI(EmitterContext context)
{
InstDmulI op = context.GetOp<InstDmulI>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
EmitFmul(context, Instruction.FP64, MultiplyScale.NoScale, srcA, srcB, op.Dest, op.NegA, false, op.WriteCC);
}
public static void DmulC(EmitterContext context)
{
InstDmulC op = context.GetOp<InstDmulC>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
EmitFmul(context, Instruction.FP64, MultiplyScale.NoScale, srcA, srcB, op.Dest, op.NegA, false, op.WriteCC);
}
public static void FaddR(EmitterContext context)
{
InstFaddR op = context.GetOp<InstFaddR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, op.Sat, op.WriteCC);
}
public static void FaddI(EmitterContext context)
{
InstFaddI op = context.GetOp<InstFaddI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, op.Sat, op.WriteCC);
}
public static void FaddC(EmitterContext context)
{
InstFaddC op = context.GetOp<InstFaddC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, op.Sat, op.WriteCC);
}
public static void Fadd32i(EmitterContext context)
{
InstFadd32i op = context.GetOp<InstFadd32i>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm32);
EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC);
}
public static void FfmaR(EmitterContext context)
{
InstFfmaR op = context.GetOp<InstFfmaR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
}
public static void FfmaI(EmitterContext context)
{
InstFfmaI op = context.GetOp<InstFfmaI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
var srcC = GetSrcReg(context, op.SrcC);
EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
}
public static void FfmaC(EmitterContext context)
{
InstFfmaC op = context.GetOp<InstFfmaC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
}
public static void FfmaRc(EmitterContext context)
{
InstFfmaRc op = context.GetOp<InstFfmaRc>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcC);
var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
}
public static void Ffma32i(EmitterContext context)
{
InstFfma32i op = context.GetOp<InstFfma32i>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm32);
var srcC = GetSrcReg(context, op.Dest);
EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
}
public static void FmulR(EmitterContext context)
{
InstFmulR op = context.GetOp<InstFmulR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitFmul(context, Instruction.FP32, op.Scale, srcA, srcB, op.Dest, op.NegA, op.Sat, op.WriteCC);
}
public static void FmulI(EmitterContext context)
{
InstFmulI op = context.GetOp<InstFmulI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
EmitFmul(context, Instruction.FP32, op.Scale, srcA, srcB, op.Dest, op.NegA, op.Sat, op.WriteCC);
}
public static void FmulC(EmitterContext context)
{
InstFmulC op = context.GetOp<InstFmulC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitFmul(context, Instruction.FP32, op.Scale, srcA, srcB, op.Dest, op.NegA, op.Sat, op.WriteCC);
}
public static void Fmul32i(EmitterContext context)
{
InstFmul32i op = context.GetOp<InstFmul32i>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm32);
EmitFmul(context, Instruction.FP32, MultiplyScale.NoScale, srcA, srcB, op.Dest, false, op.Sat, op.WriteCC);
}
public static void Hadd2R(EmitterContext context)
{
InstHadd2R op = context.GetOp<InstHadd2R>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegB, op.AbsB);
EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: true, op.Dest, op.Sat);
}
public static void Hadd2I(EmitterContext context)
{
InstHadd2I op = context.GetOp<InstHadd2I>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1);
EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: true, op.Dest, op.Sat);
}
public static void Hadd2C(EmitterContext context)
{
InstHadd2C op = context.GetOp<InstHadd2C>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegB, op.AbsB);
EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: true, op.Dest, op.Sat);
}
public static void Hadd232i(EmitterContext context)
{
InstHadd232i op = context.GetOp<InstHadd232i>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, false);
var srcB = GetHalfSrc(context, op.Imm);
EmitHadd2Hmul2(context, OFmt.F16, srcA, srcB, isAdd: true, op.Dest, op.Sat);
}
public static void Hfma2R(EmitterContext context)
{
InstHfma2R op = context.GetOp<InstHfma2R>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegA, false);
var srcC = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegC, false);
EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat);
}
public static void Hfma2I(EmitterContext context)
{
InstHfma2I op = context.GetOp<InstHfma2I>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1);
var srcC = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegC, false);
EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat);
}
public static void Hfma2C(EmitterContext context)
{
InstHfma2C op = context.GetOp<InstHfma2C>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegA, false);
var srcC = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegC, false);
EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat);
}
public static void Hfma2Rc(EmitterContext context)
{
InstHfma2Rc op = context.GetOp<InstHfma2Rc>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
var srcB = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegA, false);
var srcC = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegC, false);
EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat);
}
public static void Hfma232i(EmitterContext context)
{
InstHfma232i op = context.GetOp<InstHfma232i>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
var srcB = GetHalfSrc(context, op.Imm);
var srcC = GetHalfSrc(context, HalfSwizzle.F16, op.Dest, op.NegC, false);
EmitHfma2(context, OFmt.F16, srcA, srcB, srcC, op.Dest, saturate: false);
}
public static void Hmul2R(EmitterContext context)
{
InstHmul2R op = context.GetOp<InstHmul2R>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, op.AbsA);
var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegA, op.AbsB);
EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: false, op.Dest, op.Sat);
}
public static void Hmul2I(EmitterContext context)
{
InstHmul2I op = context.GetOp<InstHmul2I>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1);
EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: false, op.Dest, op.Sat);
}
public static void Hmul2C(EmitterContext context)
{
InstHmul2C op = context.GetOp<InstHmul2C>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, op.AbsA);
var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegA, op.AbsB);
EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: false, op.Dest, op.Sat);
}
public static void Hmul232i(EmitterContext context)
{
InstHmul232i op = context.GetOp<InstHmul232i>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
var srcB = GetHalfSrc(context, op.Imm32);
EmitHadd2Hmul2(context, OFmt.F16, srcA, srcB, isAdd: false, op.Dest, op.Sat);
}
private static void EmitFadd(
EmitterContext context,
Instruction fpType,
Operand srcA,
Operand srcB,
int rd,
bool negateA,
bool negateB,
bool absoluteA,
bool absoluteB,
bool saturate,
bool writeCC)
{
bool isFP64 = fpType == Instruction.FP64;
srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType);
srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType);
Operand res = context.FPSaturate(context.FPAdd(srcA, srcB, fpType), saturate, fpType);
SetDest(context, res, rd, isFP64);
SetFPZnFlags(context, res, writeCC, fpType);
}
private static void EmitFfma(
EmitterContext context,
Instruction fpType,
Operand srcA,
Operand srcB,
Operand srcC,
int rd,
bool negateB,
bool negateC,
bool saturate,
bool writeCC)
{
bool isFP64 = fpType == Instruction.FP64;
srcB = context.FPNegate(srcB, negateB, fpType);
srcC = context.FPNegate(srcC, negateC, fpType);
Operand res = context.FPSaturate(context.FPFusedMultiplyAdd(srcA, srcB, srcC, fpType), saturate, fpType);
SetDest(context, res, rd, isFP64);
SetFPZnFlags(context, res, writeCC, fpType);
}
private static void EmitFmul(
EmitterContext context,
Instruction fpType,
MultiplyScale scale,
Operand srcA,
Operand srcB,
int rd,
bool negateB,
bool saturate,
bool writeCC)
{
bool isFP64 = fpType == Instruction.FP64;
srcB = context.FPNegate(srcB, negateB, fpType);
if (scale != MultiplyScale.NoScale)
{
Operand scaleConst = scale switch
{
MultiplyScale.D2 => ConstF(0.5f),
MultiplyScale.D4 => ConstF(0.25f),
MultiplyScale.D8 => ConstF(0.125f),
MultiplyScale.M2 => ConstF(2f),
MultiplyScale.M4 => ConstF(4f),
MultiplyScale.M8 => ConstF(8f),
_ => ConstF(1f) // Invalid, behave as if it had no scale.
};
if (scaleConst.AsFloat() == 1f)
{
context.Config.GpuAccessor.Log($"Invalid FP multiply scale \"{scale}\".");
}
if (isFP64)
{
scaleConst = context.FP32ConvertToFP64(scaleConst);
}
srcA = context.FPMultiply(srcA, scaleConst, fpType);
}
Operand res = context.FPSaturate(context.FPMultiply(srcA, srcB, fpType), saturate, fpType);
SetDest(context, res, rd, isFP64);
SetFPZnFlags(context, res, writeCC, fpType);
}
private static void EmitHadd2Hmul2(
EmitterContext context,
OFmt swizzle,
Operand[] srcA,
Operand[] srcB,
bool isAdd,
int rd,
bool saturate)
{
Operand[] res = new Operand[2];
for (int index = 0; index < res.Length; index++)
{
if (isAdd)
{
res[index] = context.FPAdd(srcA[index], srcB[index]);
}
else
{
res[index] = context.FPMultiply(srcA[index], srcB[index]);
}
res[index] = context.FPSaturate(res[index], saturate);
}
context.Copy(GetDest(rd), GetHalfPacked(context, swizzle, res, rd));
}
public static void EmitHfma2(
EmitterContext context,
OFmt swizzle,
Operand[] srcA,
Operand[] srcB,
Operand[] srcC,
int rd,
bool saturate)
{
Operand[] res = new Operand[2];
for (int index = 0; index < res.Length; index++)
{
res[index] = context.FPFusedMultiplyAdd(srcA[index], srcB[index], srcC[index]);
res[index] = context.FPSaturate(res[index], saturate);
}
context.Copy(GetDest(rd), GetHalfPacked(context, swizzle, res, rd));
}
}
}

View file

@ -0,0 +1,575 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using System;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void DsetR(EmitterContext context)
{
InstDsetR op = context.GetOp<InstDsetR>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
EmitFset(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.Dest,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
op.BVal,
op.WriteCC,
isFP64: true);
}
public static void DsetI(EmitterContext context)
{
InstDsetI op = context.GetOp<InstDsetI>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
EmitFset(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.Dest,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
op.BVal,
op.WriteCC,
isFP64: true);
}
public static void DsetC(EmitterContext context)
{
InstDsetC op = context.GetOp<InstDsetC>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
EmitFset(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.Dest,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
op.BVal,
op.WriteCC,
isFP64: true);
}
public static void DsetpR(EmitterContext context)
{
InstDsetpR op = context.GetOp<InstDsetpR>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
EmitFsetp(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.DestPred,
op.DestPredInv,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
writeCC: false,
isFP64: true);
}
public static void DsetpI(EmitterContext context)
{
InstDsetpI op = context.GetOp<InstDsetpI>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
EmitFsetp(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.DestPred,
op.DestPredInv,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
writeCC: false,
isFP64: true);
}
public static void DsetpC(EmitterContext context)
{
InstDsetpC op = context.GetOp<InstDsetpC>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
EmitFsetp(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.DestPred,
op.DestPredInv,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
writeCC: false,
isFP64: true);
}
public static void FcmpR(EmitterContext context)
{
InstFcmpR op = context.GetOp<InstFcmpR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest);
}
public static void FcmpI(EmitterContext context)
{
InstFcmpI op = context.GetOp<InstFcmpI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
var srcC = GetSrcReg(context, op.SrcC);
EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest);
}
public static void FcmpC(EmitterContext context)
{
InstFcmpC op = context.GetOp<InstFcmpC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest);
}
public static void FcmpRc(EmitterContext context)
{
InstFcmpRc op = context.GetOp<InstFcmpRc>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcC);
var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest);
}
public static void FsetR(EmitterContext context)
{
InstFsetR op = context.GetOp<InstFsetR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitFset(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.BVal, op.WriteCC);
}
public static void FsetC(EmitterContext context)
{
InstFsetC op = context.GetOp<InstFsetC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitFset(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.BVal, op.WriteCC);
}
public static void FsetI(EmitterContext context)
{
InstFsetI op = context.GetOp<InstFsetI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
EmitFset(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.BVal, op.WriteCC);
}
public static void FsetpR(EmitterContext context)
{
InstFsetpR op = context.GetOp<InstFsetpR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitFsetp(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.DestPred,
op.DestPredInv,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
op.WriteCC);
}
public static void FsetpI(EmitterContext context)
{
InstFsetpI op = context.GetOp<InstFsetpI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
EmitFsetp(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.DestPred,
op.DestPredInv,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
op.WriteCC);
}
public static void FsetpC(EmitterContext context)
{
InstFsetpC op = context.GetOp<InstFsetpC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitFsetp(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.DestPred,
op.DestPredInv,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
op.WriteCC);
}
public static void Hset2R(EmitterContext context)
{
InstHset2R op = context.GetOp<InstHset2R>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegB, op.AbsB);
EmitHset2(context, op.Cmp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.Bval);
}
public static void Hset2I(EmitterContext context)
{
InstHset2I op = context.GetOp<InstHset2I>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1);
EmitHset2(context, op.Cmp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.Bval);
}
public static void Hset2C(EmitterContext context)
{
InstHset2C op = context.GetOp<InstHset2C>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegB, false);
EmitHset2(context, op.Cmp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.Bval);
}
public static void Hsetp2R(EmitterContext context)
{
InstHsetp2R op = context.GetOp<InstHsetp2R>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegB, op.AbsB);
EmitHsetp2(context, op.FComp2, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.HAnd);
}
public static void Hsetp2I(EmitterContext context)
{
InstHsetp2I op = context.GetOp<InstHsetp2I>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1);
EmitHsetp2(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.HAnd);
}
public static void Hsetp2C(EmitterContext context)
{
InstHsetp2C op = context.GetOp<InstHsetp2C>();
var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegB, op.AbsB);
EmitHsetp2(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.HAnd);
}
private static void EmitFcmp(EmitterContext context, FComp cmpOp, Operand srcA, Operand srcB, Operand srcC, int rd)
{
Operand cmpRes = GetFPComparison(context, cmpOp, srcC, ConstF(0));
Operand res = context.ConditionalSelect(cmpRes, srcA, srcB);
context.Copy(GetDest(rd), res);
}
private static void EmitFset(
EmitterContext context,
FComp cmpOp,
BoolOp logicOp,
Operand srcA,
Operand srcB,
int srcPred,
bool srcPredInv,
int rd,
bool absoluteA,
bool absoluteB,
bool negateA,
bool negateB,
bool boolFloat,
bool writeCC,
bool isFP64 = false)
{
Instruction fpType = isFP64 ? Instruction.FP64 : Instruction.FP32;
srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType);
srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType);
Operand res = GetFPComparison(context, cmpOp, srcA, srcB, fpType);
Operand pred = GetPredicate(context, srcPred, srcPredInv);
res = GetPredLogicalOp(context, logicOp, res, pred);
Operand dest = GetDest(rd);
if (boolFloat)
{
res = context.ConditionalSelect(res, ConstF(1), Const(0));
context.Copy(dest, res);
SetFPZnFlags(context, res, writeCC);
}
else
{
context.Copy(dest, res);
SetZnFlags(context, res, writeCC, extended: false);
}
}
private static void EmitFsetp(
EmitterContext context,
FComp cmpOp,
BoolOp logicOp,
Operand srcA,
Operand srcB,
int srcPred,
bool srcPredInv,
int destPred,
int destPredInv,
bool absoluteA,
bool absoluteB,
bool negateA,
bool negateB,
bool writeCC,
bool isFP64 = false)
{
Instruction fpType = isFP64 ? Instruction.FP64 : Instruction.FP32;
srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType);
srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType);
Operand p0Res = GetFPComparison(context, cmpOp, srcA, srcB, fpType);
Operand p1Res = context.BitwiseNot(p0Res);
Operand pred = GetPredicate(context, srcPred, srcPredInv);
p0Res = GetPredLogicalOp(context, logicOp, p0Res, pred);
p1Res = GetPredLogicalOp(context, logicOp, p1Res, pred);
context.Copy(Register(destPred, RegisterType.Predicate), p0Res);
context.Copy(Register(destPredInv, RegisterType.Predicate), p1Res);
}
private static void EmitHset2(
EmitterContext context,
FComp cmpOp,
BoolOp logicOp,
Operand[] srcA,
Operand[] srcB,
int srcPred,
bool srcPredInv,
int rd,
bool boolFloat)
{
Operand[] res = new Operand[2];
res[0] = GetFPComparison(context, cmpOp, srcA[0], srcB[0]);
res[1] = GetFPComparison(context, cmpOp, srcA[1], srcB[1]);
Operand pred = GetPredicate(context, srcPred, srcPredInv);
res[0] = GetPredLogicalOp(context, logicOp, res[0], pred);
res[1] = GetPredLogicalOp(context, logicOp, res[1], pred);
if (boolFloat)
{
res[0] = context.ConditionalSelect(res[0], ConstF(1), Const(0));
res[1] = context.ConditionalSelect(res[1], ConstF(1), Const(0));
context.Copy(GetDest(rd), context.PackHalf2x16(res[0], res[1]));
}
else
{
Operand low = context.BitwiseAnd(res[0], Const(0xffff));
Operand high = context.ShiftLeft (res[1], Const(16));
Operand packed = context.BitwiseOr(low, high);
context.Copy(GetDest(rd), packed);
}
}
private static void EmitHsetp2(
EmitterContext context,
FComp cmpOp,
BoolOp logicOp,
Operand[] srcA,
Operand[] srcB,
int srcPred,
bool srcPredInv,
int destPred,
int destPredInv,
bool hAnd)
{
Operand p0Res = GetFPComparison(context, cmpOp, srcA[0], srcB[0]);
Operand p1Res = GetFPComparison(context, cmpOp, srcA[1], srcB[1]);
if (hAnd)
{
p0Res = context.BitwiseAnd(p0Res, p1Res);
p1Res = context.BitwiseNot(p0Res);
}
Operand pred = GetPredicate(context, srcPred, srcPredInv);
p0Res = GetPredLogicalOp(context, logicOp, p0Res, pred);
p1Res = GetPredLogicalOp(context, logicOp, p1Res, pred);
context.Copy(Register(destPred, RegisterType.Predicate), p0Res);
context.Copy(Register(destPredInv, RegisterType.Predicate), p1Res);
}
private static Operand GetFPComparison(EmitterContext context, FComp cond, Operand srcA, Operand srcB, Instruction fpType = Instruction.FP32)
{
Operand res;
if (cond == FComp.T)
{
res = Const(IrConsts.True);
}
else if (cond == FComp.F)
{
res = Const(IrConsts.False);
}
else if (cond == FComp.Nan || cond == FComp.Num)
{
res = context.BitwiseOr(context.IsNan(srcA, fpType), context.IsNan(srcB, fpType));
if (cond == FComp.Num)
{
res = context.BitwiseNot(res);
}
}
else
{
Instruction inst;
switch (cond & ~FComp.Nan)
{
case FComp.Lt: inst = Instruction.CompareLess; break;
case FComp.Eq: inst = Instruction.CompareEqual; break;
case FComp.Le: inst = Instruction.CompareLessOrEqual; break;
case FComp.Gt: inst = Instruction.CompareGreater; break;
case FComp.Ne: inst = Instruction.CompareNotEqual; break;
case FComp.Ge: inst = Instruction.CompareGreaterOrEqual; break;
default: throw new ArgumentException($"Unexpected condition \"{cond}\".");
}
res = context.Add(inst | fpType, Local(), srcA, srcB);
if ((cond & FComp.Nan) != 0)
{
res = context.BitwiseOr(res, context.IsNan(srcA, fpType));
res = context.BitwiseOr(res, context.IsNan(srcB, fpType));
}
}
return res;
}
}
}

View file

@ -0,0 +1,106 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void DmnmxR(EmitterContext context)
{
InstDmnmxR op = context.GetOp<InstDmnmxR>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC, isFP64: true);
}
public static void DmnmxI(EmitterContext context)
{
InstDmnmxI op = context.GetOp<InstDmnmxI>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC, isFP64: true);
}
public static void DmnmxC(EmitterContext context)
{
InstDmnmxC op = context.GetOp<InstDmnmxC>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC, isFP64: true);
}
public static void FmnmxR(EmitterContext context)
{
InstFmnmxR op = context.GetOp<InstFmnmxR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC);
}
public static void FmnmxI(EmitterContext context)
{
InstFmnmxI op = context.GetOp<InstFmnmxI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC);
}
public static void FmnmxC(EmitterContext context)
{
InstFmnmxC op = context.GetOp<InstFmnmxC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC);
}
private static void EmitFmnmx(
EmitterContext context,
Operand srcA,
Operand srcB,
Operand srcPred,
int rd,
bool absoluteA,
bool absoluteB,
bool negateA,
bool negateB,
bool writeCC,
bool isFP64 = false)
{
Instruction fpType = isFP64 ? Instruction.FP64 : Instruction.FP32;
srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType);
srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType);
Operand resMin = context.FPMinimum(srcA, srcB, fpType);
Operand resMax = context.FPMaximum(srcA, srcB, fpType);
Operand res = context.ConditionalSelect(srcPred, resMin, resMax);
SetDest(context, res, rd, isFP64);
SetFPZnFlags(context, res, writeCC, fpType);
}
}
}

View file

@ -0,0 +1,322 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using System.Collections.Generic;
using System.Linq;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Bra(EmitterContext context)
{
InstBra op = context.GetOp<InstBra>();
EmitBranch(context, context.CurrBlock.Successors[^1].Address);
}
public static void Brk(EmitterContext context)
{
InstBrk op = context.GetOp<InstBrk>();
EmitBrkContSync(context);
}
public static void Brx(EmitterContext context)
{
InstBrx op = context.GetOp<InstBrx>();
InstOp currOp = context.CurrOp;
int startIndex = context.CurrBlock.HasNext() ? 1 : 0;
if (context.CurrBlock.Successors.Count <= startIndex)
{
context.Config.GpuAccessor.Log($"Failed to find targets for BRX instruction at 0x{currOp.Address:X}.");
return;
}
int offset = (int)currOp.GetAbsoluteAddress();
Operand address = context.IAdd(Register(op.SrcA, RegisterType.Gpr), Const(offset));
var targets = context.CurrBlock.Successors.Skip(startIndex);
bool allTargetsSinglePred = true;
int total = context.CurrBlock.Successors.Count - startIndex;
int count = 0;
foreach (var target in targets.OrderBy(x => x.Address))
{
if (++count < total && (target.Predecessors.Count > 1 || target.Address <= context.CurrBlock.Address))
{
allTargetsSinglePred = false;
break;
}
}
if (allTargetsSinglePred)
{
// Chain blocks, each target block will check if the BRX target address
// matches its own address, if not, it jumps to the next target which will do the same check,
// until it reaches the last possible target, which executed unconditionally.
// We can only do this if the BRX block is the only predecessor of all target blocks.
// Additionally, this is not supported for blocks located before the current block,
// since it will be too late to insert a label, but this is something that can be improved
// in the future if necessary.
var sortedTargets = targets.OrderBy(x => x.Address);
Block currentTarget = null;
ulong firstTargetAddress = 0;
foreach (Block nextTarget in sortedTargets)
{
if (currentTarget != null)
{
if (currentTarget.Address != nextTarget.Address)
{
context.SetBrxTarget(currentTarget.Address, address, (int)currentTarget.Address, nextTarget.Address);
}
}
else
{
firstTargetAddress = nextTarget.Address;
}
currentTarget = nextTarget;
}
context.Branch(context.GetLabel(firstTargetAddress));
}
else
{
// Emit the branches sequentially.
// This generates slightly worse code, but should work for all cases.
var sortedTargets = targets.OrderByDescending(x => x.Address);
ulong lastTargetAddress = ulong.MaxValue;
count = 0;
foreach (Block target in sortedTargets)
{
Operand label = context.GetLabel(target.Address);
if (++count < total)
{
if (target.Address != lastTargetAddress)
{
context.BranchIfTrue(label, context.ICompareEqual(address, Const((int)target.Address)));
}
lastTargetAddress = target.Address;
}
else
{
context.Branch(label);
}
}
}
}
public static void Cal(EmitterContext context)
{
InstCal op = context.GetOp<InstCal>();
DecodedFunction function = context.Program.GetFunctionByAddress(context.CurrOp.GetAbsoluteAddress());
if (function.IsCompilerGenerated)
{
switch (function.Type)
{
case FunctionType.BuiltInFSIBegin:
context.FSIBegin();
break;
case FunctionType.BuiltInFSIEnd:
context.FSIEnd();
break;
}
}
else
{
context.Call(function.Id, false);
}
}
public static void Cont(EmitterContext context)
{
InstCont op = context.GetOp<InstCont>();
EmitBrkContSync(context);
}
public static void Exit(EmitterContext context)
{
InstExit op = context.GetOp<InstExit>();
if (context.IsNonMain)
{
context.Config.GpuAccessor.Log("Invalid exit on non-main function.");
return;
}
if (op.Ccc == Ccc.T)
{
context.Return();
}
else
{
Operand cond = GetCondition(context, op.Ccc, IrConsts.False);
// If the condition is always false, we don't need to do anything.
if (cond.Type != OperandType.Constant || cond.Value != IrConsts.False)
{
Operand lblSkip = Label();
context.BranchIfFalse(lblSkip, cond);
context.Return();
context.MarkLabel(lblSkip);
}
}
}
public static void Kil(EmitterContext context)
{
InstKil op = context.GetOp<InstKil>();
context.Discard();
}
public static void Pbk(EmitterContext context)
{
InstPbk op = context.GetOp<InstPbk>();
EmitPbkPcntSsy(context);
}
public static void Pcnt(EmitterContext context)
{
InstPcnt op = context.GetOp<InstPcnt>();
EmitPbkPcntSsy(context);
}
public static void Ret(EmitterContext context)
{
InstRet op = context.GetOp<InstRet>();
if (context.IsNonMain)
{
context.Return();
}
else
{
context.Config.GpuAccessor.Log("Invalid return on main function.");
}
}
public static void Ssy(EmitterContext context)
{
InstSsy op = context.GetOp<InstSsy>();
EmitPbkPcntSsy(context);
}
public static void Sync(EmitterContext context)
{
InstSync op = context.GetOp<InstSync>();
EmitBrkContSync(context);
}
private static void EmitPbkPcntSsy(EmitterContext context)
{
var consumers = context.CurrBlock.PushOpCodes.First(x => x.Op.Address == context.CurrOp.Address).Consumers;
foreach (KeyValuePair<Block, Operand> kv in consumers)
{
Block consumerBlock = kv.Key;
Operand local = kv.Value;
int id = consumerBlock.SyncTargets[context.CurrOp.Address].PushOpId;
context.Copy(local, Const(id));
}
}
private static void EmitBrkContSync(EmitterContext context)
{
var targets = context.CurrBlock.SyncTargets;
if (targets.Count == 1)
{
// If we have only one target, then the SSY/PBK is basically
// a branch, we can produce better codegen for this case.
EmitBranch(context, targets.Values.First().PushOpInfo.Op.GetAbsoluteAddress());
}
else
{
// TODO: Support CC here as well (condition).
foreach (SyncTarget target in targets.Values)
{
PushOpInfo pushOpInfo = target.PushOpInfo;
Operand label = context.GetLabel(pushOpInfo.Op.GetAbsoluteAddress());
Operand local = pushOpInfo.Consumers[context.CurrBlock];
context.BranchIfTrue(label, context.ICompareEqual(local, Const(target.PushOpId)));
}
}
}
private static void EmitBranch(EmitterContext context, ulong address)
{
InstOp op = context.CurrOp;
InstConditional opCond = new InstConditional(op.RawOpCode);
// If we're branching to the next instruction, then the branch
// is useless and we can ignore it.
if (address == op.Address + 8)
{
return;
}
Operand label = context.GetLabel(address);
Operand pred = Register(opCond.Pred, RegisterType.Predicate);
if (opCond.Ccc != Ccc.T)
{
Operand cond = GetCondition(context, opCond.Ccc);
if (opCond.Pred == RegisterConsts.PredicateTrueIndex)
{
pred = cond;
}
else if (opCond.PredInv)
{
pred = context.BitwiseAnd(context.BitwiseNot(pred), cond);
}
else
{
pred = context.BitwiseAnd(pred, cond);
}
context.BranchIfTrue(label, pred);
}
else if (opCond.Pred == RegisterConsts.PredicateTrueIndex)
{
context.Branch(label);
}
else if (opCond.PredInv)
{
context.BranchIfFalse(label, pred);
}
else
{
context.BranchIfTrue(label, pred);
}
}
}
}

View file

@ -0,0 +1,266 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using System;
using System.Runtime.CompilerServices;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static class InstEmitHelper
{
public static Operand GetZF()
{
return Register(0, RegisterType.Flag);
}
public static Operand GetNF()
{
return Register(1, RegisterType.Flag);
}
public static Operand GetCF()
{
return Register(2, RegisterType.Flag);
}
public static Operand GetVF()
{
return Register(3, RegisterType.Flag);
}
public static Operand GetDest(int rd)
{
return Register(rd, RegisterType.Gpr);
}
public static Operand GetDest2(int rd)
{
return Register(rd | 1, RegisterType.Gpr);
}
public static Operand GetSrcCbuf(EmitterContext context, int cbufSlot, int cbufOffset, bool isFP64 = false)
{
if (isFP64)
{
return context.PackDouble2x32(
Cbuf(cbufSlot, cbufOffset),
Cbuf(cbufSlot, cbufOffset + 1));
}
else
{
return Cbuf(cbufSlot, cbufOffset);
}
}
public static Operand GetSrcImm(EmitterContext context, int imm, bool isFP64 = false)
{
if (isFP64)
{
return context.PackDouble2x32(Const(0), Const(imm));
}
else
{
return Const(imm);
}
}
public static Operand GetSrcReg(EmitterContext context, int reg, bool isFP64 = false)
{
if (isFP64)
{
return context.PackDouble2x32(Register(reg, RegisterType.Gpr), Register(reg | 1, RegisterType.Gpr));
}
else
{
return Register(reg, RegisterType.Gpr);
}
}
public static Operand[] GetHalfSrc(
EmitterContext context,
HalfSwizzle swizzle,
int ra,
bool negate,
bool absolute)
{
Operand[] operands = GetHalfUnpacked(context, GetSrcReg(context, ra), swizzle);
return FPAbsNeg(context, operands, absolute, negate);
}
public static Operand[] GetHalfSrc(
EmitterContext context,
HalfSwizzle swizzle,
int cbufSlot,
int cbufOffset,
bool negate,
bool absolute)
{
Operand[] operands = GetHalfUnpacked(context, GetSrcCbuf(context, cbufSlot, cbufOffset), swizzle);
return FPAbsNeg(context, operands, absolute, negate);
}
public static Operand[] GetHalfSrc(EmitterContext context, int immH0, int immH1)
{
ushort low = (ushort)(immH0 << 6);
ushort high = (ushort)(immH1 << 6);
return new Operand[]
{
ConstF((float)Unsafe.As<ushort, Half>(ref low)),
ConstF((float)Unsafe.As<ushort, Half>(ref high))
};
}
public static Operand[] GetHalfSrc(EmitterContext context, int imm32)
{
ushort low = (ushort)imm32;
ushort high = (ushort)(imm32 >> 16);
return new Operand[]
{
ConstF((float)Unsafe.As<ushort, Half>(ref low)),
ConstF((float)Unsafe.As<ushort, Half>(ref high))
};
}
public static Operand[] FPAbsNeg(EmitterContext context, Operand[] operands, bool abs, bool neg)
{
for (int index = 0; index < operands.Length; index++)
{
operands[index] = context.FPAbsNeg(operands[index], abs, neg);
}
return operands;
}
public static Operand[] GetHalfUnpacked(EmitterContext context, Operand src, HalfSwizzle swizzle)
{
switch (swizzle)
{
case HalfSwizzle.F16:
return new Operand[]
{
context.UnpackHalf2x16Low (src),
context.UnpackHalf2x16High(src)
};
case HalfSwizzle.F32: return new Operand[] { src, src };
case HalfSwizzle.H0H0:
return new Operand[]
{
context.UnpackHalf2x16Low(src),
context.UnpackHalf2x16Low(src)
};
case HalfSwizzle.H1H1:
return new Operand[]
{
context.UnpackHalf2x16High(src),
context.UnpackHalf2x16High(src)
};
}
throw new ArgumentException($"Invalid swizzle \"{swizzle}\".");
}
public static Operand GetHalfPacked(EmitterContext context, OFmt swizzle, Operand[] results, int rd)
{
switch (swizzle)
{
case OFmt.F16: return context.PackHalf2x16(results[0], results[1]);
case OFmt.F32: return results[0];
case OFmt.MrgH0:
{
Operand h1 = GetHalfDest(context, rd, isHigh: true);
return context.PackHalf2x16(results[0], h1);
}
case OFmt.MrgH1:
{
Operand h0 = GetHalfDest(context, rd, isHigh: false);
return context.PackHalf2x16(h0, results[1]);
}
}
throw new ArgumentException($"Invalid swizzle \"{swizzle}\".");
}
public static Operand GetHalfDest(EmitterContext context, int rd, bool isHigh)
{
if (isHigh)
{
return context.UnpackHalf2x16High(GetDest(rd));
}
else
{
return context.UnpackHalf2x16Low(GetDest(rd));
}
}
public static Operand GetPredicate(EmitterContext context, int pred, bool not)
{
Operand local = Register(pred, RegisterType.Predicate);
if (not)
{
local = context.BitwiseNot(local);
}
return local;
}
public static void SetDest(EmitterContext context, Operand value, int rd, bool isFP64)
{
if (isFP64)
{
context.Copy(GetDest(rd), context.UnpackDouble2x32Low(value));
context.Copy(GetDest2(rd), context.UnpackDouble2x32High(value));
}
else
{
context.Copy(GetDest(rd), value);
}
}
public static int Imm16ToSInt(int imm16)
{
return (short)imm16;
}
public static int Imm20ToFloat(int imm20)
{
return imm20 << 12;
}
public static int Imm20ToSInt(int imm20)
{
return (imm20 << 12) >> 12;
}
public static int Imm24ToSInt(int imm24)
{
return (imm24 << 8) >> 8;
}
public static Operand SignExtendTo32(EmitterContext context, Operand src, int srcBits)
{
return context.BitfieldExtractS32(src, Const(0), Const(srcBits));
}
public static Operand ZeroExtendTo32(EmitterContext context, Operand src, int srcBits)
{
int mask = (int)(uint.MaxValue >> (32 - srcBits));
return context.BitwiseAnd(src, Const(mask));
}
}
}

View file

@ -0,0 +1,699 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void IaddR(EmitterContext context)
{
InstIaddR op = context.GetOp<InstIaddR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC);
}
public static void IaddI(EmitterContext context)
{
InstIaddI op = context.GetOp<InstIaddI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC);
}
public static void IaddC(EmitterContext context)
{
InstIaddC op = context.GetOp<InstIaddC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC);
}
public static void Iadd32i(EmitterContext context)
{
InstIadd32i op = context.GetOp<InstIadd32i>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm32);
EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC);
}
public static void Iadd3R(EmitterContext context)
{
InstIadd3R op = context.GetOp<InstIadd3R>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitIadd3(context, op.Lrs, srcA, srcB, srcC, op.Apart, op.Bpart, op.Cpart, op.Dest, op.NegA, op.NegB, op.NegC);
}
public static void Iadd3I(EmitterContext context)
{
InstIadd3I op = context.GetOp<InstIadd3I>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
var srcC = GetSrcReg(context, op.SrcC);
EmitIadd3(context, Lrs.None, srcA, srcB, srcC, HalfSelect.B32, HalfSelect.B32, HalfSelect.B32, op.Dest, op.NegA, op.NegB, op.NegC);
}
public static void Iadd3C(EmitterContext context)
{
InstIadd3C op = context.GetOp<InstIadd3C>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitIadd3(context, Lrs.None, srcA, srcB, srcC, HalfSelect.B32, HalfSelect.B32, HalfSelect.B32, op.Dest, op.NegA, op.NegB, op.NegC);
}
public static void ImadR(EmitterContext context)
{
InstImadR op = context.GetOp<InstImadR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
}
public static void ImadI(EmitterContext context)
{
InstImadI op = context.GetOp<InstImadI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
var srcC = GetSrcReg(context, op.SrcC);
EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
}
public static void ImadC(EmitterContext context)
{
InstImadC op = context.GetOp<InstImadC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
}
public static void ImadRc(EmitterContext context)
{
InstImadRc op = context.GetOp<InstImadRc>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcC);
var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
}
public static void Imad32i(EmitterContext context)
{
InstImad32i op = context.GetOp<InstImad32i>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm32);
var srcC = GetSrcReg(context, op.Dest);
EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
}
public static void ImulR(EmitterContext context)
{
InstImulR op = context.GetOp<InstImulR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo);
}
public static void ImulI(EmitterContext context)
{
InstImulI op = context.GetOp<InstImulI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo);
}
public static void ImulC(EmitterContext context)
{
InstImulC op = context.GetOp<InstImulC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo);
}
public static void Imul32i(EmitterContext context)
{
InstImul32i op = context.GetOp<InstImul32i>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm32);
EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo);
}
public static void IscaddR(EmitterContext context)
{
InstIscaddR op = context.GetOp<InstIscaddR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, op.AvgMode, op.WriteCC);
}
public static void IscaddI(EmitterContext context)
{
InstIscaddI op = context.GetOp<InstIscaddI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, op.AvgMode, op.WriteCC);
}
public static void IscaddC(EmitterContext context)
{
InstIscaddC op = context.GetOp<InstIscaddC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, op.AvgMode, op.WriteCC);
}
public static void Iscadd32i(EmitterContext context)
{
InstIscadd32i op = context.GetOp<InstIscadd32i>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm32);
EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, AvgMode.NoNeg, op.WriteCC);
}
public static void LeaR(EmitterContext context)
{
InstLeaR op = context.GetOp<InstLeaR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitLea(context, srcA, srcB, op.Dest, op.NegA, op.ImmU5);
}
public static void LeaI(EmitterContext context)
{
InstLeaI op = context.GetOp<InstLeaI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitLea(context, srcA, srcB, op.Dest, op.NegA, op.ImmU5);
}
public static void LeaC(EmitterContext context)
{
InstLeaC op = context.GetOp<InstLeaC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitLea(context, srcA, srcB, op.Dest, op.NegA, op.ImmU5);
}
public static void LeaHiR(EmitterContext context)
{
InstLeaHiR op = context.GetOp<InstLeaHiR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitLeaHi(context, srcA, srcB, srcC, op.Dest, op.NegA, op.ImmU5);
}
public static void LeaHiC(EmitterContext context)
{
InstLeaHiC op = context.GetOp<InstLeaHiC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitLeaHi(context, srcA, srcB, srcC, op.Dest, op.NegA, op.ImmU5);
}
public static void XmadR(EmitterContext context)
{
InstXmadR op = context.GetOp<InstXmadR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, op.HiloB, op.Psl, op.Mrg, op.X, op.WriteCC);
}
public static void XmadI(EmitterContext context)
{
InstXmadI op = context.GetOp<InstXmadI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm16);
var srcC = GetSrcReg(context, op.SrcC);
EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, false, op.Psl, op.Mrg, op.X, op.WriteCC);
}
public static void XmadC(EmitterContext context)
{
InstXmadC op = context.GetOp<InstXmadC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, op.HiloB, op.Psl, op.Mrg, op.X, op.WriteCC);
}
public static void XmadRc(EmitterContext context)
{
InstXmadRc op = context.GetOp<InstXmadRc>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcC);
var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, op.HiloB, false, false, op.X, op.WriteCC);
}
private static void EmitIadd(
EmitterContext context,
Operand srcA,
Operand srcB,
int rd,
AvgMode avgMode,
bool extended,
bool writeCC)
{
srcA = context.INegate(srcA, avgMode == AvgMode.NegA);
srcB = context.INegate(srcB, avgMode == AvgMode.NegB);
Operand res = context.IAdd(srcA, srcB);
if (extended)
{
res = context.IAdd(res, context.BitwiseAnd(GetCF(), Const(1)));
}
SetIaddFlags(context, res, srcA, srcB, writeCC, extended);
// TODO: SAT.
context.Copy(GetDest(rd), res);
}
private static void EmitIadd3(
EmitterContext context,
Lrs mode,
Operand srcA,
Operand srcB,
Operand srcC,
HalfSelect partA,
HalfSelect partB,
HalfSelect partC,
int rd,
bool negateA,
bool negateB,
bool negateC)
{
Operand Extend(Operand src, HalfSelect part)
{
if (part == HalfSelect.B32)
{
return src;
}
if (part == HalfSelect.H0)
{
return context.BitwiseAnd(src, Const(0xffff));
}
else if (part == HalfSelect.H1)
{
return context.ShiftRightU32(src, Const(16));
}
else
{
context.Config.GpuAccessor.Log($"Iadd3 has invalid component selection {part}.");
}
return src;
}
srcA = context.INegate(Extend(srcA, partA), negateA);
srcB = context.INegate(Extend(srcB, partB), negateB);
srcC = context.INegate(Extend(srcC, partC), negateC);
Operand res = context.IAdd(srcA, srcB);
if (mode != Lrs.None)
{
if (mode == Lrs.LeftShift)
{
res = context.ShiftLeft(res, Const(16));
}
else if (mode == Lrs.RightShift)
{
res = context.ShiftRightU32(res, Const(16));
}
else
{
// TODO: Warning.
}
}
res = context.IAdd(res, srcC);
context.Copy(GetDest(rd), res);
// TODO: CC, X, corner cases.
}
private static void EmitImad(
EmitterContext context,
Operand srcA,
Operand srcB,
Operand srcC,
int rd,
AvgMode avgMode,
bool signedA,
bool signedB,
bool high)
{
srcB = context.INegate(srcB, avgMode == AvgMode.NegA);
srcC = context.INegate(srcC, avgMode == AvgMode.NegB);
Operand res;
if (high)
{
if (signedA && signedB)
{
res = context.MultiplyHighS32(srcA, srcB);
}
else
{
res = context.MultiplyHighU32(srcA, srcB);
if (signedA)
{
res = context.IAdd(res, context.IMultiply(srcB, context.ShiftRightS32(srcA, Const(31))));
}
else if (signedB)
{
res = context.IAdd(res, context.IMultiply(srcA, context.ShiftRightS32(srcB, Const(31))));
}
}
}
else
{
res = context.IMultiply(srcA, srcB);
}
if (srcC.Type != OperandType.Constant || srcC.Value != 0)
{
res = context.IAdd(res, srcC);
}
// TODO: CC, X, SAT, and more?
context.Copy(GetDest(rd), res);
}
private static void EmitIscadd(
EmitterContext context,
Operand srcA,
Operand srcB,
int rd,
int shift,
AvgMode avgMode,
bool writeCC)
{
srcA = context.ShiftLeft(srcA, Const(shift));
srcA = context.INegate(srcA, avgMode == AvgMode.NegA);
srcB = context.INegate(srcB, avgMode == AvgMode.NegB);
Operand res = context.IAdd(srcA, srcB);
SetIaddFlags(context, res, srcA, srcB, writeCC, false);
context.Copy(GetDest(rd), res);
}
public static void EmitLea(EmitterContext context, Operand srcA, Operand srcB, int rd, bool negateA, int shift)
{
srcA = context.ShiftLeft(srcA, Const(shift));
srcA = context.INegate(srcA, negateA);
Operand res = context.IAdd(srcA, srcB);
context.Copy(GetDest(rd), res);
// TODO: CC, X.
}
private static void EmitLeaHi(
EmitterContext context,
Operand srcA,
Operand srcB,
Operand srcC,
int rd,
bool negateA,
int shift)
{
Operand aLow = context.ShiftLeft(srcA, Const(shift));
Operand aHigh = shift == 0 ? Const(0) : context.ShiftRightU32(srcA, Const(32 - shift));
aHigh = context.BitwiseOr(aHigh, context.ShiftLeft(srcC, Const(shift)));
if (negateA)
{
// Perform 64-bit negation by doing bitwise not of the value,
// then adding 1 and carrying over from low to high.
aLow = context.BitwiseNot(aLow);
aHigh = context.BitwiseNot(aHigh);
aLow = AddWithCarry(context, aLow, Const(1), out Operand aLowCOut);
aHigh = context.IAdd(aHigh, aLowCOut);
}
Operand res = context.IAdd(aHigh, srcB);
context.Copy(GetDest(rd), res);
// TODO: CC, X.
}
public static void EmitXmad(
EmitterContext context,
XmadCop2 mode,
Operand srcA,
Operand srcB,
Operand srcC,
int rd,
bool signedA,
bool signedB,
bool highA,
bool highB,
bool productShiftLeft,
bool merge,
bool extended,
bool writeCC)
{
XmadCop modeConv;
switch (mode)
{
case XmadCop2.Cfull:
modeConv = XmadCop.Cfull;
break;
case XmadCop2.Clo:
modeConv = XmadCop.Clo;
break;
case XmadCop2.Chi:
modeConv = XmadCop.Chi;
break;
case XmadCop2.Csfu:
modeConv = XmadCop.Csfu;
break;
default:
context.Config.GpuAccessor.Log($"Invalid XMAD mode \"{mode}\".");
return;
}
EmitXmad(context, modeConv, srcA, srcB, srcC, rd, signedA, signedB, highA, highB, productShiftLeft, merge, extended, writeCC);
}
public static void EmitXmad(
EmitterContext context,
XmadCop mode,
Operand srcA,
Operand srcB,
Operand srcC,
int rd,
bool signedA,
bool signedB,
bool highA,
bool highB,
bool productShiftLeft,
bool merge,
bool extended,
bool writeCC)
{
var srcBUnmodified = srcB;
Operand Extend16To32(Operand src, bool high, bool signed)
{
if (signed && high)
{
return context.ShiftRightS32(src, Const(16));
}
else if (signed)
{
return context.BitfieldExtractS32(src, Const(0), Const(16));
}
else if (high)
{
return context.ShiftRightU32(src, Const(16));
}
else
{
return context.BitwiseAnd(src, Const(0xffff));
}
}
srcA = Extend16To32(srcA, highA, signedA);
srcB = Extend16To32(srcB, highB, signedB);
Operand res = context.IMultiply(srcA, srcB);
if (productShiftLeft)
{
res = context.ShiftLeft(res, Const(16));
}
switch (mode)
{
case XmadCop.Cfull:
break;
case XmadCop.Clo:
srcC = Extend16To32(srcC, high: false, signed: false);
break;
case XmadCop.Chi:
srcC = Extend16To32(srcC, high: true, signed: false);
break;
case XmadCop.Cbcc:
srcC = context.IAdd(srcC, context.ShiftLeft(srcBUnmodified, Const(16)));
break;
case XmadCop.Csfu:
Operand signAdjustA = context.ShiftLeft(context.ShiftRightU32(srcA, Const(31)), Const(16));
Operand signAdjustB = context.ShiftLeft(context.ShiftRightU32(srcB, Const(31)), Const(16));
srcC = context.ISubtract(srcC, context.IAdd(signAdjustA, signAdjustB));
break;
default:
context.Config.GpuAccessor.Log($"Invalid XMAD mode \"{mode}\".");
return;
}
Operand product = res;
if (extended)
{
// Add with carry.
res = context.IAdd(res, context.BitwiseAnd(GetCF(), Const(1)));
}
else
{
// Add (no carry in).
res = context.IAdd(res, srcC);
}
SetIaddFlags(context, res, product, srcC, writeCC, extended);
if (merge)
{
res = context.BitwiseAnd(res, Const(0xffff));
res = context.BitwiseOr(res, context.ShiftLeft(srcBUnmodified, Const(16)));
}
context.Copy(GetDest(rd), res);
}
private static void SetIaddFlags(EmitterContext context, Operand res, Operand srcA, Operand srcB, bool setCC, bool extended)
{
if (!setCC)
{
return;
}
if (extended)
{
// C = (d == a && CIn) || d < a
Operand tempC0 = context.ICompareEqual(res, srcA);
Operand tempC1 = context.ICompareLessUnsigned(res, srcA);
tempC0 = context.BitwiseAnd(tempC0, GetCF());
context.Copy(GetCF(), context.BitwiseOr(tempC0, tempC1));
}
else
{
// C = d < a
context.Copy(GetCF(), context.ICompareLessUnsigned(res, srcA));
}
// V = (d ^ a) & ~(a ^ b) < 0
Operand tempV0 = context.BitwiseExclusiveOr(res, srcA);
Operand tempV1 = context.BitwiseExclusiveOr(srcA, srcB);
tempV1 = context.BitwiseNot(tempV1);
Operand tempV = context.BitwiseAnd(tempV0, tempV1);
context.Copy(GetVF(), context.ICompareLess(tempV, Const(0)));
SetZnFlags(context, res, setCC: true, extended: extended);
}
}
}

View file

@ -0,0 +1,310 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using System;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void IcmpR(EmitterContext context)
{
InstIcmpR op = context.GetOp<InstIcmpR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed);
}
public static void IcmpI(EmitterContext context)
{
InstIcmpI op = context.GetOp<InstIcmpI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
var srcC = GetSrcReg(context, op.SrcC);
EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed);
}
public static void IcmpC(EmitterContext context)
{
InstIcmpC op = context.GetOp<InstIcmpC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed);
}
public static void IcmpRc(EmitterContext context)
{
InstIcmpRc op = context.GetOp<InstIcmpRc>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcC);
var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed);
}
public static void IsetR(EmitterContext context)
{
InstIsetR op = context.GetOp<InstIsetR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitIset(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.BVal, op.Signed, op.X, op.WriteCC);
}
public static void IsetI(EmitterContext context)
{
InstIsetI op = context.GetOp<InstIsetI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitIset(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.BVal, op.Signed, op.X, op.WriteCC);
}
public static void IsetC(EmitterContext context)
{
InstIsetC op = context.GetOp<InstIsetC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitIset(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.BVal, op.Signed, op.X, op.WriteCC);
}
public static void IsetpR(EmitterContext context)
{
InstIsetpR op = context.GetOp<InstIsetpR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitIsetp(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.Signed, op.X);
}
public static void IsetpI(EmitterContext context)
{
InstIsetpI op = context.GetOp<InstIsetpI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitIsetp(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.Signed, op.X);
}
public static void IsetpC(EmitterContext context)
{
InstIsetpC op = context.GetOp<InstIsetpC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitIsetp(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.Signed, op.X);
}
private static void EmitIcmp(
EmitterContext context,
IComp cmpOp,
Operand srcA,
Operand srcB,
Operand srcC,
int rd,
bool isSigned)
{
Operand cmpRes = GetIntComparison(context, cmpOp, srcC, Const(0), isSigned);
Operand res = context.ConditionalSelect(cmpRes, srcA, srcB);
context.Copy(GetDest(rd), res);
}
private static void EmitIset(
EmitterContext context,
IComp cmpOp,
BoolOp logicOp,
Operand srcA,
Operand srcB,
int srcPred,
bool srcPredInv,
int rd,
bool boolFloat,
bool isSigned,
bool extended,
bool writeCC)
{
Operand res = GetIntComparison(context, cmpOp, srcA, srcB, isSigned, extended);
Operand pred = GetPredicate(context, srcPred, srcPredInv);
res = GetPredLogicalOp(context, logicOp, res, pred);
Operand dest = GetDest(rd);
if (boolFloat)
{
res = context.ConditionalSelect(res, ConstF(1), Const(0));
context.Copy(dest, res);
SetFPZnFlags(context, res, writeCC);
}
else
{
context.Copy(dest, res);
SetZnFlags(context, res, writeCC, extended);
}
}
private static void EmitIsetp(
EmitterContext context,
IComp cmpOp,
BoolOp logicOp,
Operand srcA,
Operand srcB,
int srcPred,
bool srcPredInv,
int destPred,
int destPredInv,
bool isSigned,
bool extended)
{
Operand p0Res = GetIntComparison(context, cmpOp, srcA, srcB, isSigned, extended);
Operand p1Res = context.BitwiseNot(p0Res);
Operand pred = GetPredicate(context, srcPred, srcPredInv);
p0Res = GetPredLogicalOp(context, logicOp, p0Res, pred);
p1Res = GetPredLogicalOp(context, logicOp, p1Res, pred);
context.Copy(Register(destPred, RegisterType.Predicate), p0Res);
context.Copy(Register(destPredInv, RegisterType.Predicate), p1Res);
}
private static Operand GetIntComparison(
EmitterContext context,
IComp cond,
Operand srcA,
Operand srcB,
bool isSigned,
bool extended)
{
return extended
? GetIntComparisonExtended(context, cond, srcA, srcB, isSigned)
: GetIntComparison(context, cond, srcA, srcB, isSigned);
}
private static Operand GetIntComparisonExtended(EmitterContext context, IComp cond, Operand srcA, Operand srcB, bool isSigned)
{
Operand res;
if (cond == IComp.T)
{
res = Const(IrConsts.True);
}
else if (cond == IComp.F)
{
res = Const(IrConsts.False);
}
else
{
res = context.ISubtract(srcA, srcB);
res = context.IAdd(res, context.BitwiseNot(GetCF()));
switch (cond)
{
case IComp.Eq: // r = xh == yh && xl == yl
res = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), GetZF());
break;
case IComp.Lt: // r = xh < yh || (xh == yh && xl < yl)
Operand notC = context.BitwiseNot(GetCF());
Operand prevLt = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), notC);
res = isSigned
? context.BitwiseOr(context.ICompareLess(srcA, srcB), prevLt)
: context.BitwiseOr(context.ICompareLessUnsigned(srcA, srcB), prevLt);
break;
case IComp.Le: // r = xh < yh || (xh == yh && xl <= yl)
Operand zOrNotC = context.BitwiseOr(GetZF(), context.BitwiseNot(GetCF()));
Operand prevLe = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), zOrNotC);
res = isSigned
? context.BitwiseOr(context.ICompareLess(srcA, srcB), prevLe)
: context.BitwiseOr(context.ICompareLessUnsigned(srcA, srcB), prevLe);
break;
case IComp.Gt: // r = xh > yh || (xh == yh && xl > yl)
Operand notZAndC = context.BitwiseAnd(context.BitwiseNot(GetZF()), GetCF());
Operand prevGt = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), notZAndC);
res = isSigned
? context.BitwiseOr(context.ICompareGreater(srcA, srcB), prevGt)
: context.BitwiseOr(context.ICompareGreaterUnsigned(srcA, srcB), prevGt);
break;
case IComp.Ge: // r = xh > yh || (xh == yh && xl >= yl)
Operand prevGe = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), GetCF());
res = isSigned
? context.BitwiseOr(context.ICompareGreater(srcA, srcB), prevGe)
: context.BitwiseOr(context.ICompareGreaterUnsigned(srcA, srcB), prevGe);
break;
case IComp.Ne: // r = xh != yh || xl != yl
res = context.BitwiseOr(context.ICompareNotEqual(srcA, srcB), context.BitwiseNot(GetZF()));
break;
default:
throw new ArgumentException($"Unexpected condition \"{cond}\".");
}
}
return res;
}
private static Operand GetIntComparison(EmitterContext context, IComp cond, Operand srcA, Operand srcB, bool isSigned)
{
Operand res;
if (cond == IComp.T)
{
res = Const(IrConsts.True);
}
else if (cond == IComp.F)
{
res = Const(IrConsts.False);
}
else
{
var inst = cond switch
{
IComp.Lt => Instruction.CompareLessU32,
IComp.Eq => Instruction.CompareEqual,
IComp.Le => Instruction.CompareLessOrEqualU32,
IComp.Gt => Instruction.CompareGreaterU32,
IComp.Ne => Instruction.CompareNotEqual,
IComp.Ge => Instruction.CompareGreaterOrEqualU32,
_ => throw new InvalidOperationException($"Unexpected condition \"{cond}\".")
};
if (isSigned)
{
switch (cond)
{
case IComp.Lt: inst = Instruction.CompareLess; break;
case IComp.Le: inst = Instruction.CompareLessOrEqual; break;
case IComp.Gt: inst = Instruction.CompareGreater; break;
case IComp.Ge: inst = Instruction.CompareGreaterOrEqual; break;
}
}
res = context.Add(inst, Local(), srcA, srcB);
}
return res;
}
}
}

View file

@ -0,0 +1,167 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
private const int PT = RegisterConsts.PredicateTrueIndex;
public static void LopR(EmitterContext context)
{
InstLopR op = context.GetOp<InstLopR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitLop(context, op.Lop, op.PredicateOp, srcA, srcB, op.Dest, op.DestPred, op.NegA, op.NegB, op.X, op.WriteCC);
}
public static void LopI(EmitterContext context)
{
InstLopI op = context.GetOp<InstLopI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitLop(context, op.LogicOp, op.PredicateOp, srcA, srcB, op.Dest, op.DestPred, op.NegA, op.NegB, op.X, op.WriteCC);
}
public static void LopC(EmitterContext context)
{
InstLopC op = context.GetOp<InstLopC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitLop(context, op.LogicOp, op.PredicateOp, srcA, srcB, op.Dest, op.DestPred, op.NegA, op.NegB, op.X, op.WriteCC);
}
public static void Lop32i(EmitterContext context)
{
InstLop32i op = context.GetOp<InstLop32i>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, op.Imm32);
EmitLop(context, op.LogicOp, PredicateOp.F, srcA, srcB, op.Dest, PT, op.NegA, op.NegB, op.X, op.WriteCC);
}
public static void Lop3R(EmitterContext context)
{
InstLop3R op = context.GetOp<InstLop3R>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitLop3(context, op.Imm, op.PredicateOp, srcA, srcB, srcC, op.Dest, op.DestPred, op.X, op.WriteCC);
}
public static void Lop3I(EmitterContext context)
{
InstLop3I op = context.GetOp<InstLop3I>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
var srcC = GetSrcReg(context, op.SrcC);
EmitLop3(context, op.Imm, PredicateOp.F, srcA, srcB, srcC, op.Dest, PT, false, op.WriteCC);
}
public static void Lop3C(EmitterContext context)
{
InstLop3C op = context.GetOp<InstLop3C>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcC = GetSrcReg(context, op.SrcC);
EmitLop3(context, op.Imm, PredicateOp.F, srcA, srcB, srcC, op.Dest, PT, false, op.WriteCC);
}
private static void EmitLop(
EmitterContext context,
LogicOp logicOp,
PredicateOp predOp,
Operand srcA,
Operand srcB,
int rd,
int destPred,
bool invertA,
bool invertB,
bool extended,
bool writeCC)
{
srcA = context.BitwiseNot(srcA, invertA);
srcB = context.BitwiseNot(srcB, invertB);
Operand res = logicOp switch
{
LogicOp.And => res = context.BitwiseAnd(srcA, srcB),
LogicOp.Or => res = context.BitwiseOr(srcA, srcB),
LogicOp.Xor => res = context.BitwiseExclusiveOr(srcA, srcB),
_ => srcB
};
EmitLopPredWrite(context, res, predOp, destPred);
context.Copy(GetDest(rd), res);
SetZnFlags(context, res, writeCC, extended);
}
private static void EmitLop3(
EmitterContext context,
int truthTable,
PredicateOp predOp,
Operand srcA,
Operand srcB,
Operand srcC,
int rd,
int destPred,
bool extended,
bool writeCC)
{
Operand res = Lop3Expression.GetFromTruthTable(context, srcA, srcB, srcC, truthTable);
EmitLopPredWrite(context, res, predOp, destPred);
context.Copy(GetDest(rd), res);
SetZnFlags(context, res, writeCC, extended);
}
private static void EmitLopPredWrite(EmitterContext context, Operand result, PredicateOp predOp, int pred)
{
if (pred != RegisterConsts.PredicateTrueIndex)
{
Operand pRes;
if (predOp == PredicateOp.F)
{
pRes = Const(IrConsts.False);
}
else if (predOp == PredicateOp.T)
{
pRes = Const(IrConsts.True);
}
else if (predOp == PredicateOp.Z)
{
pRes = context.ICompareEqual(result, Const(0));
}
else /* if (predOp == Pop.Nz) */
{
pRes = context.ICompareNotEqual(result, Const(0));
}
context.Copy(Register(pred, RegisterType.Predicate), pRes);
}
}
}
}

View file

@ -0,0 +1,71 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void ImnmxR(EmitterContext context)
{
InstImnmxR op = context.GetOp<InstImnmxR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitImnmx(context, srcA, srcB, srcPred, op.Dest, op.Signed, op.WriteCC);
}
public static void ImnmxI(EmitterContext context)
{
InstImnmxI op = context.GetOp<InstImnmxI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitImnmx(context, srcA, srcB, srcPred, op.Dest, op.Signed, op.WriteCC);
}
public static void ImnmxC(EmitterContext context)
{
InstImnmxC op = context.GetOp<InstImnmxC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitImnmx(context, srcA, srcB, srcPred, op.Dest, op.Signed, op.WriteCC);
}
private static void EmitImnmx(
EmitterContext context,
Operand srcA,
Operand srcB,
Operand srcPred,
int rd,
bool isSignedInt,
bool writeCC)
{
Operand resMin = isSignedInt
? context.IMinimumS32(srcA, srcB)
: context.IMinimumU32(srcA, srcB);
Operand resMax = isSignedInt
? context.IMaximumS32(srcA, srcB)
: context.IMaximumU32(srcA, srcB);
Operand res = context.ConditionalSelect(srcPred, resMin, resMax);
context.Copy(GetDest(rd), res);
SetZnFlags(context, res, writeCC);
// TODO: X flags.
}
}
}

View file

@ -0,0 +1,541 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
private enum MemoryRegion
{
Local,
Shared
}
public static void Atom(EmitterContext context)
{
InstAtom op = context.GetOp<InstAtom>();
int sOffset = (op.Imm20 << 12) >> 12;
(Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, sOffset);
Operand value = GetSrcReg(context, op.SrcB);
Operand res = EmitAtomicOp(context, StorageKind.GlobalMemory, op.Op, op.Size, addrLow, addrHigh, value);
context.Copy(GetDest(op.Dest), res);
}
public static void Atoms(EmitterContext context)
{
InstAtoms op = context.GetOp<InstAtoms>();
Operand offset = context.ShiftRightU32(GetSrcReg(context, op.SrcA), Const(2));
int sOffset = (op.Imm22 << 10) >> 10;
offset = context.IAdd(offset, Const(sOffset));
Operand value = GetSrcReg(context, op.SrcB);
AtomSize size = op.AtomsSize switch
{
AtomsSize.S32 => AtomSize.S32,
AtomsSize.U64 => AtomSize.U64,
AtomsSize.S64 => AtomSize.S64,
_ => AtomSize.U32
};
Operand res = EmitAtomicOp(context, StorageKind.SharedMemory, op.AtomOp, size, offset, Const(0), value);
context.Copy(GetDest(op.Dest), res);
}
public static void Ldc(EmitterContext context)
{
InstLdc op = context.GetOp<InstLdc>();
if (op.LsSize > LsSize2.B64)
{
context.Config.GpuAccessor.Log($"Invalid LDC size: {op.LsSize}.");
return;
}
bool isSmallInt = op.LsSize < LsSize2.B32;
int count = op.LsSize == LsSize2.B64 ? 2 : 1;
Operand slot = Const(op.CbufSlot);
Operand srcA = GetSrcReg(context, op.SrcA);
if (op.AddressMode == AddressMode.Is || op.AddressMode == AddressMode.Isl)
{
slot = context.IAdd(slot, context.BitfieldExtractU32(srcA, Const(16), Const(16)));
srcA = context.BitwiseAnd(srcA, Const(0xffff));
}
Operand addr = context.IAdd(srcA, Const(Imm16ToSInt(op.CbufOffset)));
Operand wordOffset = context.ShiftRightU32(addr, Const(2));
Operand bitOffset = GetBitOffset(context, addr);
for (int index = 0; index < count; index++)
{
Register dest = new Register(op.Dest + index, RegisterType.Gpr);
if (dest.IsRZ)
{
break;
}
Operand offset = context.IAdd(wordOffset, Const(index));
Operand value = context.LoadConstant(slot, offset);
if (isSmallInt)
{
value = ExtractSmallInt(context, (LsSize)op.LsSize, bitOffset, value);
}
context.Copy(Register(dest), value);
}
}
public static void Ldg(EmitterContext context)
{
InstLdg op = context.GetOp<InstLdg>();
EmitLdg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E);
}
public static void Ldl(EmitterContext context)
{
InstLdl op = context.GetOp<InstLdl>();
EmitLoad(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
}
public static void Lds(EmitterContext context)
{
InstLds op = context.GetOp<InstLds>();
EmitLoad(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
}
public static void Red(EmitterContext context)
{
InstRed op = context.GetOp<InstRed>();
(Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, op.Imm20);
EmitAtomicOp(context, StorageKind.GlobalMemory, (AtomOp)op.RedOp, op.RedSize, addrLow, addrHigh, GetDest(op.SrcB));
}
public static void Stg(EmitterContext context)
{
InstStg op = context.GetOp<InstStg>();
EmitStg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E);
}
public static void Stl(EmitterContext context)
{
InstStl op = context.GetOp<InstStl>();
EmitStore(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
}
public static void Sts(EmitterContext context)
{
InstSts op = context.GetOp<InstSts>();
EmitStore(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
}
private static Operand EmitAtomicOp(
EmitterContext context,
StorageKind storageKind,
AtomOp op,
AtomSize type,
Operand addrLow,
Operand addrHigh,
Operand value)
{
Operand res = Const(0);
switch (op)
{
case AtomOp.Add:
if (type == AtomSize.S32 || type == AtomSize.U32)
{
res = context.AtomicAdd(storageKind, addrLow, addrHigh, value);
}
else
{
context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
}
break;
case AtomOp.And:
if (type == AtomSize.S32 || type == AtomSize.U32)
{
res = context.AtomicAnd(storageKind, addrLow, addrHigh, value);
}
else
{
context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
}
break;
case AtomOp.Xor:
if (type == AtomSize.S32 || type == AtomSize.U32)
{
res = context.AtomicXor(storageKind, addrLow, addrHigh, value);
}
else
{
context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
}
break;
case AtomOp.Or:
if (type == AtomSize.S32 || type == AtomSize.U32)
{
res = context.AtomicOr(storageKind, addrLow, addrHigh, value);
}
else
{
context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
}
break;
case AtomOp.Max:
if (type == AtomSize.S32)
{
res = context.AtomicMaxS32(storageKind, addrLow, addrHigh, value);
}
else if (type == AtomSize.U32)
{
res = context.AtomicMaxU32(storageKind, addrLow, addrHigh, value);
}
else
{
context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
}
break;
case AtomOp.Min:
if (type == AtomSize.S32)
{
res = context.AtomicMinS32(storageKind, addrLow, addrHigh, value);
}
else if (type == AtomSize.U32)
{
res = context.AtomicMinU32(storageKind, addrLow, addrHigh, value);
}
else
{
context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
}
break;
}
return res;
}
private static void EmitLoad(
EmitterContext context,
MemoryRegion region,
LsSize2 size,
Operand srcA,
int rd,
int offset)
{
if (size > LsSize2.B128)
{
context.Config.GpuAccessor.Log($"Invalid load size: {size}.");
return;
}
bool isSmallInt = size < LsSize2.B32;
int count = 1;
switch (size)
{
case LsSize2.B64: count = 2; break;
case LsSize2.B128: count = 4; break;
}
Operand baseOffset = context.IAdd(srcA, Const(offset));
Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2)); // Word offset = byte offset / 4 (one word = 4 bytes).
Operand bitOffset = GetBitOffset(context, baseOffset);
for (int index = 0; index < count; index++)
{
Register dest = new Register(rd + index, RegisterType.Gpr);
if (dest.IsRZ)
{
break;
}
Operand elemOffset = context.IAdd(wordOffset, Const(index));
Operand value = null;
switch (region)
{
case MemoryRegion.Local: value = context.LoadLocal(elemOffset); break;
case MemoryRegion.Shared: value = context.LoadShared(elemOffset); break;
}
if (isSmallInt)
{
value = ExtractSmallInt(context, (LsSize)size, bitOffset, value);
}
context.Copy(Register(dest), value);
}
}
private static void EmitLdg(
EmitterContext context,
LsSize size,
int ra,
int rd,
int offset,
bool extended)
{
bool isSmallInt = size < LsSize.B32;
int count = GetVectorCount(size);
(Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
Operand bitOffset = GetBitOffset(context, addrLow);
for (int index = 0; index < count; index++)
{
Register dest = new Register(rd + index, RegisterType.Gpr);
if (dest.IsRZ)
{
break;
}
Operand value = context.LoadGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh);
if (isSmallInt)
{
value = ExtractSmallInt(context, size, bitOffset, value);
}
context.Copy(Register(dest), value);
}
}
private static void EmitStore(
EmitterContext context,
MemoryRegion region,
LsSize2 size,
Operand srcA,
int rd,
int offset)
{
if (size > LsSize2.B128)
{
context.Config.GpuAccessor.Log($"Invalid store size: {size}.");
return;
}
bool isSmallInt = size < LsSize2.B32;
int count = 1;
switch (size)
{
case LsSize2.B64: count = 2; break;
case LsSize2.B128: count = 4; break;
}
Operand baseOffset = context.IAdd(srcA, Const(offset));
Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
Operand bitOffset = GetBitOffset(context, baseOffset);
for (int index = 0; index < count; index++)
{
bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex;
Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
Operand elemOffset = context.IAdd(wordOffset, Const(index));
if (isSmallInt && region == MemoryRegion.Local)
{
Operand word = context.LoadLocal(elemOffset);
value = InsertSmallInt(context, (LsSize)size, bitOffset, word, value);
}
if (region == MemoryRegion.Local)
{
context.StoreLocal(elemOffset, value);
}
else if (region == MemoryRegion.Shared)
{
switch (size)
{
case LsSize2.U8:
case LsSize2.S8:
context.StoreShared8(baseOffset, value);
break;
case LsSize2.U16:
case LsSize2.S16:
context.StoreShared16(baseOffset, value);
break;
default:
context.StoreShared(elemOffset, value);
break;
}
}
}
}
private static void EmitStg(
EmitterContext context,
LsSize2 size,
int ra,
int rd,
int offset,
bool extended)
{
if (size > LsSize2.B128)
{
context.Config.GpuAccessor.Log($"Invalid store size: {size}.");
return;
}
int count = GetVectorCount((LsSize)size);
(Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
Operand bitOffset = GetBitOffset(context, addrLow);
for (int index = 0; index < count; index++)
{
bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex;
Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
Operand addrLowOffset = context.IAdd(addrLow, Const(index * 4));
if (size == LsSize2.U8 || size == LsSize2.S8)
{
context.StoreGlobal8(addrLowOffset, addrHigh, value);
}
else if (size == LsSize2.U16 || size == LsSize2.S16)
{
context.StoreGlobal16(addrLowOffset, addrHigh, value);
}
else
{
context.StoreGlobal(addrLowOffset, addrHigh, value);
}
}
}
private static int GetVectorCount(LsSize size)
{
switch (size)
{
case LsSize.B64:
return 2;
case LsSize.B128:
case LsSize.UB128:
return 4;
}
return 1;
}
private static (Operand, Operand) Get40BitsAddress(
EmitterContext context,
Register ra,
bool extended,
int offset)
{
Operand addrLow = Register(ra);
Operand addrHigh;
if (extended && !ra.IsRZ)
{
addrHigh = Register(ra.Index + 1, RegisterType.Gpr);
}
else
{
addrHigh = Const(0);
}
Operand offs = Const(offset);
addrLow = context.IAdd(addrLow, offs);
if (extended)
{
Operand carry = context.ICompareLessUnsigned(addrLow, offs);
addrHigh = context.IAdd(addrHigh, context.ConditionalSelect(carry, Const(1), Const(0)));
}
return (addrLow, addrHigh);
}
private static Operand GetBitOffset(EmitterContext context, Operand baseOffset)
{
// Note: bit offset = (baseOffset & 0b11) * 8.
// Addresses should be always aligned to the integer type,
// so we don't need to take unaligned addresses into account.
return context.ShiftLeft(context.BitwiseAnd(baseOffset, Const(3)), Const(3));
}
private static Operand ExtractSmallInt(
EmitterContext context,
LsSize size,
Operand bitOffset,
Operand value)
{
value = context.ShiftRightU32(value, bitOffset);
switch (size)
{
case LsSize.U8: value = ZeroExtendTo32(context, value, 8); break;
case LsSize.U16: value = ZeroExtendTo32(context, value, 16); break;
case LsSize.S8: value = SignExtendTo32(context, value, 8); break;
case LsSize.S16: value = SignExtendTo32(context, value, 16); break;
}
return value;
}
private static Operand InsertSmallInt(
EmitterContext context,
LsSize size,
Operand bitOffset,
Operand word,
Operand value)
{
switch (size)
{
case LsSize.U8:
case LsSize.S8:
value = context.BitwiseAnd(value, Const(0xff));
value = context.BitfieldInsert(word, value, bitOffset, Const(8));
break;
case LsSize.U16:
case LsSize.S16:
value = context.BitwiseAnd(value, Const(0xffff));
value = context.BitfieldInsert(word, value, bitOffset, Const(16));
break;
}
return value;
}
}
}

View file

@ -0,0 +1,237 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void MovR(EmitterContext context)
{
InstMovR op = context.GetOp<InstMovR>();
context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcA));
}
public static void MovI(EmitterContext context)
{
InstMovI op = context.GetOp<InstMovI>();
context.Copy(GetDest(op.Dest), GetSrcImm(context, op.Imm20));
}
public static void MovC(EmitterContext context)
{
InstMovC op = context.GetOp<InstMovC>();
context.Copy(GetDest(op.Dest), GetSrcCbuf(context, op.CbufSlot, op.CbufOffset));
}
public static void Mov32i(EmitterContext context)
{
InstMov32i op = context.GetOp<InstMov32i>();
context.Copy(GetDest(op.Dest), GetSrcImm(context, op.Imm32));
}
public static void R2pR(EmitterContext context)
{
InstR2pR op = context.GetOp<InstR2pR>();
Operand value = GetSrcReg(context, op.SrcA);
Operand mask = GetSrcReg(context, op.SrcB);
EmitR2p(context, value, mask, op.ByteSel, op.Ccpr);
}
public static void R2pI(EmitterContext context)
{
InstR2pI op = context.GetOp<InstR2pI>();
Operand value = GetSrcReg(context, op.SrcA);
Operand mask = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitR2p(context, value, mask, op.ByteSel, op.Ccpr);
}
public static void R2pC(EmitterContext context)
{
InstR2pC op = context.GetOp<InstR2pC>();
Operand value = GetSrcReg(context, op.SrcA);
Operand mask = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitR2p(context, value, mask, op.ByteSel, op.Ccpr);
}
public static void S2r(EmitterContext context)
{
InstS2r op = context.GetOp<InstS2r>();
Operand src;
switch (op.SReg)
{
case SReg.LaneId:
src = context.Load(StorageKind.Input, IoVariable.SubgroupLaneId);
break;
case SReg.InvocationId:
src = context.Load(StorageKind.Input, IoVariable.InvocationId);
break;
case SReg.YDirection:
src = ConstF(1); // TODO: Use value from Y direction GPU register.
break;
case SReg.ThreadKill:
src = context.Config.Stage == ShaderStage.Fragment ? context.Load(StorageKind.Input, IoVariable.ThreadKill) : Const(0);
break;
case SReg.InvocationInfo:
if (context.Config.Stage != ShaderStage.Compute && context.Config.Stage != ShaderStage.Fragment)
{
// Note: Lowest 8-bits seems to contain some primitive index,
// but it seems to be NVIDIA implementation specific as it's only used
// to calculate ISBE offsets, so we can just keep it as zero.
if (context.Config.Stage == ShaderStage.TessellationControl ||
context.Config.Stage == ShaderStage.TessellationEvaluation)
{
src = context.ShiftLeft(context.Load(StorageKind.Input, IoVariable.PatchVertices), Const(16));
}
else
{
src = Const(context.Config.GpuAccessor.QueryPrimitiveTopology().ToInputVertices() << 16);
}
}
else
{
src = Const(0);
}
break;
case SReg.TId:
Operand tidX = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(0));
Operand tidY = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(1));
Operand tidZ = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(2));
tidY = context.ShiftLeft(tidY, Const(16));
tidZ = context.ShiftLeft(tidZ, Const(26));
src = context.BitwiseOr(tidX, context.BitwiseOr(tidY, tidZ));
break;
case SReg.TIdX:
src = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(0));
break;
case SReg.TIdY:
src = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(1));
break;
case SReg.TIdZ:
src = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(2));
break;
case SReg.CtaIdX:
src = context.Load(StorageKind.Input, IoVariable.CtaId, null, Const(0));
break;
case SReg.CtaIdY:
src = context.Load(StorageKind.Input, IoVariable.CtaId, null, Const(1));
break;
case SReg.CtaIdZ:
src = context.Load(StorageKind.Input, IoVariable.CtaId, null, Const(2));
break;
case SReg.EqMask:
src = context.Load(StorageKind.Input, IoVariable.SubgroupEqMask, null, Const(0));
break;
case SReg.LtMask:
src = context.Load(StorageKind.Input, IoVariable.SubgroupLtMask, null, Const(0));
break;
case SReg.LeMask:
src = context.Load(StorageKind.Input, IoVariable.SubgroupLeMask, null, Const(0));
break;
case SReg.GtMask:
src = context.Load(StorageKind.Input, IoVariable.SubgroupGtMask, null, Const(0));
break;
case SReg.GeMask:
src = context.Load(StorageKind.Input, IoVariable.SubgroupGeMask, null, Const(0));
break;
default:
src = Const(0);
break;
}
context.Copy(GetDest(op.Dest), src);
}
public static void SelR(EmitterContext context)
{
InstSelR op = context.GetOp<InstSelR>();
Operand srcA = GetSrcReg(context, op.SrcA);
Operand srcB = GetSrcReg(context, op.SrcB);
Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitSel(context, srcA, srcB, srcPred, op.Dest);
}
public static void SelI(EmitterContext context)
{
InstSelI op = context.GetOp<InstSelI>();
Operand srcA = GetSrcReg(context, op.SrcA);
Operand srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitSel(context, srcA, srcB, srcPred, op.Dest);
}
public static void SelC(EmitterContext context)
{
InstSelC op = context.GetOp<InstSelC>();
Operand srcA = GetSrcReg(context, op.SrcA);
Operand srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitSel(context, srcA, srcB, srcPred, op.Dest);
}
private static void EmitR2p(EmitterContext context, Operand value, Operand mask, ByteSel byteSel, bool ccpr)
{
Operand Test(Operand value, int bit)
{
return context.ICompareNotEqual(context.BitwiseAnd(value, Const(1 << bit)), Const(0));
}
if (ccpr)
{
// TODO: Support Register to condition code flags copy.
context.Config.GpuAccessor.Log("R2P.CC not implemented.");
}
else
{
int shift = (int)byteSel * 8;
for (int bit = 0; bit < RegisterConsts.PredsCount; bit++)
{
Operand pred = Register(bit, RegisterType.Predicate);
Operand res = context.ConditionalSelect(Test(mask, bit), Test(value, bit + shift), pred);
context.Copy(pred, res);
}
}
}
private static void EmitSel(EmitterContext context, Operand srcA, Operand srcB, Operand srcPred, int rd)
{
Operand res = context.ConditionalSelect(srcPred, srcA, srcB);
context.Copy(GetDest(rd), res);
}
}
}

View file

@ -0,0 +1,97 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void RroR(EmitterContext context)
{
InstRroR op = context.GetOp<InstRroR>();
EmitRro(context, GetSrcReg(context, op.SrcB), op.Dest, op.AbsB, op.NegB);
}
public static void RroI(EmitterContext context)
{
InstRroI op = context.GetOp<InstRroI>();
EmitRro(context, GetSrcImm(context, Imm20ToFloat(op.Imm20)), op.Dest, op.AbsB, op.NegB);
}
public static void RroC(EmitterContext context)
{
InstRroC op = context.GetOp<InstRroC>();
EmitRro(context, GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.AbsB, op.NegB);
}
public static void Mufu(EmitterContext context)
{
InstMufu op = context.GetOp<InstMufu>();
Operand res = context.FPAbsNeg(GetSrcReg(context, op.SrcA), op.AbsA, op.NegA);
switch (op.MufuOp)
{
case MufuOp.Cos:
res = context.FPCosine(res);
break;
case MufuOp.Sin:
res = context.FPSine(res);
break;
case MufuOp.Ex2:
res = context.FPExponentB2(res);
break;
case MufuOp.Lg2:
res = context.FPLogarithmB2(res);
break;
case MufuOp.Rcp:
res = context.FPReciprocal(res);
break;
case MufuOp.Rsq:
res = context.FPReciprocalSquareRoot(res);
break;
case MufuOp.Rcp64h:
res = context.PackDouble2x32(OperandHelper.Const(0), res);
res = context.UnpackDouble2x32High(context.FPReciprocal(res, Instruction.FP64));
break;
case MufuOp.Rsq64h:
res = context.PackDouble2x32(OperandHelper.Const(0), res);
res = context.UnpackDouble2x32High(context.FPReciprocalSquareRoot(res, Instruction.FP64));
break;
case MufuOp.Sqrt:
res = context.FPSquareRoot(res);
break;
default:
context.Config.GpuAccessor.Log($"Invalid MUFU operation \"{op.MufuOp}\".");
break;
}
context.Copy(GetDest(op.Dest), context.FPSaturate(res, op.Sat));
}
private static void EmitRro(EmitterContext context, Operand srcB, int rd, bool absB, bool negB)
{
// This is the range reduction operator,
// we translate it as a simple move, as it
// should be always followed by a matching
// MUFU instruction.
srcB = context.FPAbsNeg(srcB, absB, negB);
context.Copy(GetDest(rd), srcB);
}
}
}

View file

@ -0,0 +1,15 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.Translation;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Nop(EmitterContext context)
{
InstNop op = context.GetOp<InstNop>();
// No operation.
}
}
}

View file

@ -0,0 +1,54 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Pset(EmitterContext context)
{
InstPset op = context.GetOp<InstPset>();
Operand srcA = context.BitwiseNot(Register(op.Src2Pred, RegisterType.Predicate), op.Src2PredInv);
Operand srcB = context.BitwiseNot(Register(op.Src1Pred, RegisterType.Predicate), op.Src1PredInv);
Operand srcC = context.BitwiseNot(Register(op.SrcPred, RegisterType.Predicate), op.SrcPredInv);
Operand res = GetPredLogicalOp(context, op.BoolOpAB, srcA, srcB);
res = GetPredLogicalOp(context, op.BoolOpC, res, srcC);
Operand dest = GetDest(op.Dest);
if (op.BVal)
{
context.Copy(dest, context.ConditionalSelect(res, ConstF(1), Const(0)));
}
else
{
context.Copy(dest, res);
}
}
public static void Psetp(EmitterContext context)
{
InstPsetp op = context.GetOp<InstPsetp>();
Operand srcA = context.BitwiseNot(Register(op.Src2Pred, RegisterType.Predicate), op.Src2PredInv);
Operand srcB = context.BitwiseNot(Register(op.Src1Pred, RegisterType.Predicate), op.Src1PredInv);
Operand p0Res = GetPredLogicalOp(context, op.BoolOpAB, srcA, srcB);
Operand p1Res = context.BitwiseNot(p0Res);
Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
p0Res = GetPredLogicalOp(context, op.BoolOpC, p0Res, srcPred);
p1Res = GetPredLogicalOp(context, op.BoolOpC, p1Res, srcPred);
context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res);
context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res);
}
}
}

View file

@ -0,0 +1,249 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void ShfLR(EmitterContext context)
{
InstShfLR op = context.GetOp<InstShfLR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitShf(context, op.MaxShift, srcA, srcB, srcC, op.Dest, op.M, left: true, op.WriteCC);
}
public static void ShfRR(EmitterContext context)
{
InstShfRR op = context.GetOp<InstShfRR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
var srcC = GetSrcReg(context, op.SrcC);
EmitShf(context, op.MaxShift, srcA, srcB, srcC, op.Dest, op.M, left: false, op.WriteCC);
}
public static void ShfLI(EmitterContext context)
{
InstShfLI op = context.GetOp<InstShfLI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = Const(op.Imm6);
var srcC = GetSrcReg(context, op.SrcC);
EmitShf(context, op.MaxShift, srcA, srcB, srcC, op.Dest, op.M, left: true, op.WriteCC);
}
public static void ShfRI(EmitterContext context)
{
InstShfRI op = context.GetOp<InstShfRI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = Const(op.Imm6);
var srcC = GetSrcReg(context, op.SrcC);
EmitShf(context, op.MaxShift, srcA, srcB, srcC, op.Dest, op.M, left: false, op.WriteCC);
}
public static void ShlR(EmitterContext context)
{
InstShlR op = context.GetOp<InstShlR>();
EmitShl(context, GetSrcReg(context, op.SrcA), GetSrcReg(context, op.SrcB), op.Dest, op.M);
}
public static void ShlI(EmitterContext context)
{
InstShlI op = context.GetOp<InstShlI>();
EmitShl(context, GetSrcReg(context, op.SrcA), GetSrcImm(context, Imm20ToSInt(op.Imm20)), op.Dest, op.M);
}
public static void ShlC(EmitterContext context)
{
InstShlC op = context.GetOp<InstShlC>();
EmitShl(context, GetSrcReg(context, op.SrcA), GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.M);
}
public static void ShrR(EmitterContext context)
{
InstShrR op = context.GetOp<InstShrR>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcReg(context, op.SrcB);
EmitShr(context, srcA, srcB, op.Dest, op.M, op.Brev, op.Signed);
}
public static void ShrI(EmitterContext context)
{
InstShrI op = context.GetOp<InstShrI>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitShr(context, srcA, srcB, op.Dest, op.M, op.Brev, op.Signed);
}
public static void ShrC(EmitterContext context)
{
InstShrC op = context.GetOp<InstShrC>();
var srcA = GetSrcReg(context, op.SrcA);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitShr(context, srcA, srcB, op.Dest, op.M, op.Brev, op.Signed);
}
private static void EmitShf(
EmitterContext context,
MaxShift maxShift,
Operand srcA,
Operand srcB,
Operand srcC,
int rd,
bool mask,
bool left,
bool writeCC)
{
bool isLongShift = maxShift == MaxShift.U64 || maxShift == MaxShift.S64;
bool signedShift = maxShift == MaxShift.S64;
int maxShiftConst = isLongShift ? 64 : 32;
if (mask)
{
srcB = context.BitwiseAnd(srcB, Const(maxShiftConst - 1));
}
Operand res;
if (left)
{
// res = (C << B) | (A >> (32 - B))
res = context.ShiftLeft(srcC, srcB);
res = context.BitwiseOr(res, context.ShiftRightU32(srcA, context.ISubtract(Const(32), srcB)));
if (isLongShift)
{
// res = B >= 32 ? A << (B - 32) : res
Operand lowerShift = context.ShiftLeft(srcA, context.ISubtract(srcB, Const(32)));
Operand shiftGreaterThan31 = context.ICompareGreaterOrEqualUnsigned(srcB, Const(32));
res = context.ConditionalSelect(shiftGreaterThan31, lowerShift, res);
}
}
else
{
// res = (A >> B) | (C << (32 - B))
res = context.ShiftRightU32(srcA, srcB);
res = context.BitwiseOr(res, context.ShiftLeft(srcC, context.ISubtract(Const(32), srcB)));
if (isLongShift)
{
// res = B >= 32 ? C >> (B - 32) : res
Operand upperShift = signedShift
? context.ShiftRightS32(srcC, context.ISubtract(srcB, Const(32)))
: context.ShiftRightU32(srcC, context.ISubtract(srcB, Const(32)));
Operand shiftGreaterThan31 = context.ICompareGreaterOrEqualUnsigned(srcB, Const(32));
res = context.ConditionalSelect(shiftGreaterThan31, upperShift, res);
}
}
if (!mask)
{
// Clamped shift value.
Operand isLessThanMax = context.ICompareLessUnsigned(srcB, Const(maxShiftConst));
res = context.ConditionalSelect(isLessThanMax, res, Const(0));
}
context.Copy(GetDest(rd), res);
if (writeCC)
{
InstEmitAluHelper.SetZnFlags(context, res, writeCC);
}
// TODO: X.
}
private static void EmitShl(EmitterContext context, Operand srcA, Operand srcB, int rd, bool mask)
{
if (mask)
{
srcB = context.BitwiseAnd(srcB, Const(0x1f));
}
Operand res = context.ShiftLeft(srcA, srcB);
if (!mask)
{
// Clamped shift value.
Operand isLessThan32 = context.ICompareLessUnsigned(srcB, Const(32));
res = context.ConditionalSelect(isLessThan32, res, Const(0));
}
// TODO: X, CC.
context.Copy(GetDest(rd), res);
}
private static void EmitShr(
EmitterContext context,
Operand srcA,
Operand srcB,
int rd,
bool mask,
bool bitReverse,
bool isSigned)
{
if (bitReverse)
{
srcA = context.BitfieldReverse(srcA);
}
if (mask)
{
srcB = context.BitwiseAnd(srcB, Const(0x1f));
}
Operand res = isSigned
? context.ShiftRightS32(srcA, srcB)
: context.ShiftRightU32(srcA, srcB);
if (!mask)
{
// Clamped shift value.
Operand resShiftBy32;
if (isSigned)
{
resShiftBy32 = context.ShiftRightS32(srcA, Const(31));
}
else
{
resShiftBy32 = Const(0);
}
Operand isLessThan32 = context.ICompareLessUnsigned(srcB, Const(32));
res = context.ConditionalSelect(isLessThan32, res, resShiftBy32);
}
// TODO: X, CC.
context.Copy(GetDest(rd), res);
}
}
}

View file

@ -0,0 +1,796 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using System;
using System.Collections.Generic;
using System.Numerics;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void SuatomB(EmitterContext context)
{
InstSuatomB op = context.GetOp<InstSuatomB>();
EmitSuatom(
context,
op.Dim,
op.Op,
op.Size,
0,
op.SrcA,
op.SrcB,
op.SrcC,
op.Dest,
op.Ba,
isBindless: true,
compareAndSwap: false);
}
public static void Suatom(EmitterContext context)
{
InstSuatom op = context.GetOp<InstSuatom>();
EmitSuatom(
context,
op.Dim,
op.Op,
op.Size,
op.TidB,
op.SrcA,
op.SrcB,
0,
op.Dest,
op.Ba,
isBindless: false,
compareAndSwap: false);
}
public static void SuatomB2(EmitterContext context)
{
InstSuatomB2 op = context.GetOp<InstSuatomB2>();
EmitSuatom(
context,
op.Dim,
op.Op,
op.Size,
0,
op.SrcA,
op.SrcB,
op.SrcC,
op.Dest,
op.Ba,
isBindless: true,
compareAndSwap: false);
}
public static void SuatomCasB(EmitterContext context)
{
InstSuatomCasB op = context.GetOp<InstSuatomCasB>();
EmitSuatom(
context,
op.Dim,
0,
op.Size,
0,
op.SrcA,
op.SrcB,
op.SrcC,
op.Dest,
op.Ba,
isBindless: true,
compareAndSwap: true);
}
public static void SuatomCas(EmitterContext context)
{
InstSuatomCas op = context.GetOp<InstSuatomCas>();
EmitSuatom(
context,
op.Dim,
0,
op.Size,
op.TidB,
op.SrcA,
op.SrcB,
0,
op.Dest,
op.Ba,
isBindless: false,
compareAndSwap: true);
}
public static void SuldDB(EmitterContext context)
{
InstSuldDB op = context.GetOp<InstSuldDB>();
EmitSuld(context, op.CacheOp, op.Dim, op.Size, 0, 0, op.SrcA, op.Dest, op.SrcC, useComponents: false, op.Ba, isBindless: true);
}
public static void SuldD(EmitterContext context)
{
InstSuldD op = context.GetOp<InstSuldD>();
EmitSuld(context, op.CacheOp, op.Dim, op.Size, op.TidB, 0, op.SrcA, op.Dest, 0, useComponents: false, op.Ba, isBindless: false);
}
public static void SuldB(EmitterContext context)
{
InstSuldB op = context.GetOp<InstSuldB>();
EmitSuld(context, op.CacheOp, op.Dim, 0, 0, op.Rgba, op.SrcA, op.Dest, op.SrcC, useComponents: true, false, isBindless: true);
}
public static void Suld(EmitterContext context)
{
InstSuld op = context.GetOp<InstSuld>();
EmitSuld(context, op.CacheOp, op.Dim, 0, op.TidB, op.Rgba, op.SrcA, op.Dest, 0, useComponents: true, false, isBindless: false);
}
public static void SuredB(EmitterContext context)
{
InstSuredB op = context.GetOp<InstSuredB>();
EmitSured(context, op.Dim, op.Op, op.Size, 0, op.SrcA, op.Dest, op.SrcC, op.Ba, isBindless: true);
}
public static void Sured(EmitterContext context)
{
InstSured op = context.GetOp<InstSured>();
EmitSured(context, op.Dim, op.Op, op.Size, op.TidB, op.SrcA, op.Dest, 0, op.Ba, isBindless: false);
}
public static void SustDB(EmitterContext context)
{
InstSustDB op = context.GetOp<InstSustDB>();
EmitSust(context, op.CacheOp, op.Dim, op.Size, 0, 0, op.SrcA, op.Dest, op.SrcC, useComponents: false, op.Ba, isBindless: true);
}
public static void SustD(EmitterContext context)
{
InstSustD op = context.GetOp<InstSustD>();
EmitSust(context, op.CacheOp, op.Dim, op.Size, op.TidB, 0, op.SrcA, op.Dest, 0, useComponents: false, op.Ba, isBindless: false);
}
public static void SustB(EmitterContext context)
{
InstSustB op = context.GetOp<InstSustB>();
EmitSust(context, op.CacheOp, op.Dim, 0, 0, op.Rgba, op.SrcA, op.Dest, op.SrcC, useComponents: true, false, isBindless: true);
}
public static void Sust(EmitterContext context)
{
InstSust op = context.GetOp<InstSust>();
EmitSust(context, op.CacheOp, op.Dim, 0, op.TidB, op.Rgba, op.SrcA, op.Dest, 0, useComponents: true, false, isBindless: false);
}
private static void EmitSuatom(
EmitterContext context,
SuDim dimensions,
SuatomOp atomicOp,
SuatomSize size,
int imm,
int srcA,
int srcB,
int srcC,
int dest,
bool byteAddress,
bool isBindless,
bool compareAndSwap)
{
SamplerType type = ConvertSamplerType(dimensions);
if (type == SamplerType.None)
{
context.Config.GpuAccessor.Log("Invalid image atomic sampler type.");
return;
}
Operand Ra()
{
if (srcA > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(srcA++, RegisterType.Gpr));
}
Operand Rb()
{
if (srcB > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(srcB++, RegisterType.Gpr));
}
Operand destOperand = dest != RegisterConsts.RegisterZeroIndex ? Register(dest, RegisterType.Gpr) : null;
List<Operand> sourcesList = new List<Operand>();
if (isBindless)
{
sourcesList.Add(context.Copy(GetSrcReg(context, srcC)));
}
int coordsCount = type.GetDimensions();
for (int index = 0; index < coordsCount; index++)
{
sourcesList.Add(Ra());
}
if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
{
sourcesList.Add(Const(0));
type &= ~SamplerType.Mask;
type |= SamplerType.Texture2D;
}
if (type.HasFlag(SamplerType.Array))
{
sourcesList.Add(Ra());
type |= SamplerType.Array;
}
if (byteAddress)
{
int xIndex = isBindless ? 1 : 0;
sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(size)));
}
// TODO: FP and 64-bit formats.
TextureFormat format = size == SuatomSize.Sd32 || size == SuatomSize.Sd64
? (isBindless ? TextureFormat.Unknown : context.Config.GetTextureFormatAtomic(imm))
: GetTextureFormat(size);
if (compareAndSwap)
{
sourcesList.Add(Rb());
}
sourcesList.Add(Rb());
Operand[] sources = sourcesList.ToArray();
TextureFlags flags = compareAndSwap ? TextureFlags.CAS : GetAtomicOpFlags(atomicOp);
if (isBindless)
{
flags |= TextureFlags.Bindless;
}
TextureOperation operation = context.CreateTextureOperation(
Instruction.ImageAtomic,
type,
format,
flags,
imm,
0,
new[] { destOperand },
sources);
context.Add(operation);
}
private static void EmitSuld(
EmitterContext context,
CacheOpLd cacheOp,
SuDim dimensions,
SuSize size,
int imm,
SuRgba componentMask,
int srcA,
int srcB,
int srcC,
bool useComponents,
bool byteAddress,
bool isBindless)
{
context.Config.SetUsedFeature(FeatureFlags.IntegerSampling);
SamplerType type = ConvertSamplerType(dimensions);
if (type == SamplerType.None)
{
context.Config.GpuAccessor.Log("Invalid image store sampler type.");
return;
}
Operand Ra()
{
if (srcA > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(srcA++, RegisterType.Gpr));
}
List<Operand> sourcesList = new List<Operand>();
if (isBindless)
{
sourcesList.Add(context.Copy(Register(srcC, RegisterType.Gpr)));
}
int coordsCount = type.GetDimensions();
for (int index = 0; index < coordsCount; index++)
{
sourcesList.Add(Ra());
}
if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
{
sourcesList.Add(Const(0));
type &= ~SamplerType.Mask;
type |= SamplerType.Texture2D;
}
if (type.HasFlag(SamplerType.Array))
{
sourcesList.Add(Ra());
}
Operand[] sources = sourcesList.ToArray();
int handle = imm;
TextureFlags flags = isBindless ? TextureFlags.Bindless : TextureFlags.None;
if (cacheOp == CacheOpLd.Cg)
{
flags |= TextureFlags.Coherent;
}
if (useComponents)
{
Operand[] dests = new Operand[BitOperations.PopCount((uint)componentMask)];
int outputIndex = 0;
for (int i = 0; i < dests.Length; i++)
{
if (srcB + i >= RegisterConsts.RegisterZeroIndex)
{
break;
}
dests[outputIndex++] = Register(srcB + i, RegisterType.Gpr);
}
if (outputIndex != dests.Length)
{
Array.Resize(ref dests, outputIndex);
}
TextureOperation operation = context.CreateTextureOperation(
Instruction.ImageLoad,
type,
flags,
handle,
(int)componentMask,
dests,
sources);
if (!isBindless)
{
operation.Format = context.Config.GetTextureFormat(handle);
}
context.Add(operation);
}
else
{
if (byteAddress)
{
int xIndex = isBindless ? 1 : 0;
sources[xIndex] = context.ShiftRightS32(sources[xIndex], Const(GetComponentSizeInBytesLog2(size)));
}
int components = GetComponents(size);
int compMask = (1 << components) - 1;
Operand[] dests = new Operand[components];
int outputIndex = 0;
for (int i = 0; i < dests.Length; i++)
{
if (srcB + i >= RegisterConsts.RegisterZeroIndex)
{
break;
}
dests[outputIndex++] = Register(srcB + i, RegisterType.Gpr);
}
if (outputIndex != dests.Length)
{
Array.Resize(ref dests, outputIndex);
}
TextureOperation operation = context.CreateTextureOperation(
Instruction.ImageLoad,
type,
GetTextureFormat(size),
flags,
handle,
compMask,
dests,
sources);
context.Add(operation);
switch (size)
{
case SuSize.U8: context.Copy(dests[0], ZeroExtendTo32(context, dests[0], 8)); break;
case SuSize.U16: context.Copy(dests[0], ZeroExtendTo32(context, dests[0], 16)); break;
case SuSize.S8: context.Copy(dests[0], SignExtendTo32(context, dests[0], 8)); break;
case SuSize.S16: context.Copy(dests[0], SignExtendTo32(context, dests[0], 16)); break;
}
}
}
private static void EmitSured(
EmitterContext context,
SuDim dimensions,
RedOp atomicOp,
SuatomSize size,
int imm,
int srcA,
int srcB,
int srcC,
bool byteAddress,
bool isBindless)
{
SamplerType type = ConvertSamplerType(dimensions);
if (type == SamplerType.None)
{
context.Config.GpuAccessor.Log("Invalid image reduction sampler type.");
return;
}
Operand Ra()
{
if (srcA > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(srcA++, RegisterType.Gpr));
}
Operand Rb()
{
if (srcB > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(srcB++, RegisterType.Gpr));
}
List<Operand> sourcesList = new List<Operand>();
if (isBindless)
{
sourcesList.Add(context.Copy(GetSrcReg(context, srcC)));
}
int coordsCount = type.GetDimensions();
for (int index = 0; index < coordsCount; index++)
{
sourcesList.Add(Ra());
}
if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
{
sourcesList.Add(Const(0));
type &= ~SamplerType.Mask;
type |= SamplerType.Texture2D;
}
if (type.HasFlag(SamplerType.Array))
{
sourcesList.Add(Ra());
type |= SamplerType.Array;
}
if (byteAddress)
{
int xIndex = isBindless ? 1 : 0;
sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(size)));
}
// TODO: FP and 64-bit formats.
TextureFormat format = size == SuatomSize.Sd32 || size == SuatomSize.Sd64
? (isBindless ? TextureFormat.Unknown : context.Config.GetTextureFormatAtomic(imm))
: GetTextureFormat(size);
sourcesList.Add(Rb());
Operand[] sources = sourcesList.ToArray();
TextureFlags flags = GetAtomicOpFlags((SuatomOp)atomicOp);
if (isBindless)
{
flags |= TextureFlags.Bindless;
}
TextureOperation operation = context.CreateTextureOperation(
Instruction.ImageAtomic,
type,
format,
flags,
imm,
0,
null,
sources);
context.Add(operation);
}
private static void EmitSust(
EmitterContext context,
CacheOpSt cacheOp,
SuDim dimensions,
SuSize size,
int imm,
SuRgba componentMask,
int srcA,
int srcB,
int srcC,
bool useComponents,
bool byteAddress,
bool isBindless)
{
SamplerType type = ConvertSamplerType(dimensions);
if (type == SamplerType.None)
{
context.Config.GpuAccessor.Log("Invalid image store sampler type.");
return;
}
Operand Ra()
{
if (srcA > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(srcA++, RegisterType.Gpr));
}
Operand Rb()
{
if (srcB > RegisterConsts.RegisterZeroIndex)
{
return Const(0);
}
return context.Copy(Register(srcB++, RegisterType.Gpr));
}
List<Operand> sourcesList = new List<Operand>();
if (isBindless)
{
sourcesList.Add(context.Copy(Register(srcC, RegisterType.Gpr)));
}
int coordsCount = type.GetDimensions();
for (int index = 0; index < coordsCount; index++)
{
sourcesList.Add(Ra());
}
if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
{
sourcesList.Add(Const(0));
type &= ~SamplerType.Mask;
type |= SamplerType.Texture2D;
}
if (type.HasFlag(SamplerType.Array))
{
sourcesList.Add(Ra());
}
TextureFormat format = TextureFormat.Unknown;
if (useComponents)
{
for (int compMask = (int)componentMask, compIndex = 0; compMask != 0; compMask >>= 1, compIndex++)
{
if ((compMask & 1) != 0)
{
sourcesList.Add(Rb());
}
}
if (!isBindless)
{
format = context.Config.GetTextureFormat(imm);
}
}
else
{
if (byteAddress)
{
int xIndex = isBindless ? 1 : 0;
sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(size)));
}
int components = GetComponents(size);
for (int compIndex = 0; compIndex < components; compIndex++)
{
sourcesList.Add(Rb());
}
format = GetTextureFormat(size);
}
Operand[] sources = sourcesList.ToArray();
int handle = imm;
TextureFlags flags = isBindless ? TextureFlags.Bindless : TextureFlags.None;
if (cacheOp == CacheOpSt.Cg)
{
flags |= TextureFlags.Coherent;
}
TextureOperation operation = context.CreateTextureOperation(
Instruction.ImageStore,
type,
format,
flags,
handle,
0,
null,
sources);
context.Add(operation);
}
private static int GetComponentSizeInBytesLog2(SuatomSize size)
{
return size switch
{
SuatomSize.U32 => 2,
SuatomSize.S32 => 2,
SuatomSize.U64 => 3,
SuatomSize.F32FtzRn => 2,
SuatomSize.F16x2FtzRn => 2,
SuatomSize.S64 => 3,
SuatomSize.Sd32 => 2,
SuatomSize.Sd64 => 3,
_ => 2
};
}
private static TextureFormat GetTextureFormat(SuatomSize size)
{
return size switch
{
SuatomSize.U32 => TextureFormat.R32Uint,
SuatomSize.S32 => TextureFormat.R32Sint,
SuatomSize.U64 => TextureFormat.R32G32Uint,
SuatomSize.F32FtzRn => TextureFormat.R32Float,
SuatomSize.F16x2FtzRn => TextureFormat.R16G16Float,
SuatomSize.S64 => TextureFormat.R32G32Uint,
SuatomSize.Sd32 => TextureFormat.R32Uint,
SuatomSize.Sd64 => TextureFormat.R32G32Uint,
_ => TextureFormat.R32Uint
};
}
private static TextureFlags GetAtomicOpFlags(SuatomOp op)
{
return op switch
{
SuatomOp.Add => TextureFlags.Add,
SuatomOp.Min => TextureFlags.Minimum,
SuatomOp.Max => TextureFlags.Maximum,
SuatomOp.Inc => TextureFlags.Increment,
SuatomOp.Dec => TextureFlags.Decrement,
SuatomOp.And => TextureFlags.BitwiseAnd,
SuatomOp.Or => TextureFlags.BitwiseOr,
SuatomOp.Xor => TextureFlags.BitwiseXor,
SuatomOp.Exch => TextureFlags.Swap,
_ => TextureFlags.Add
};
}
private static int GetComponents(SuSize size)
{
return size switch
{
SuSize.B64 => 2,
SuSize.B128 => 4,
SuSize.UB128 => 4,
_ => 1
};
}
private static int GetComponentSizeInBytesLog2(SuSize size)
{
return size switch
{
SuSize.U8 => 0,
SuSize.S8 => 0,
SuSize.U16 => 1,
SuSize.S16 => 1,
SuSize.B32 => 2,
SuSize.B64 => 3,
SuSize.B128 => 4,
SuSize.UB128 => 4,
_ => 2
};
}
private static TextureFormat GetTextureFormat(SuSize size)
{
return size switch
{
SuSize.U8 => TextureFormat.R8Uint,
SuSize.S8 => TextureFormat.R8Sint,
SuSize.U16 => TextureFormat.R16Uint,
SuSize.S16 => TextureFormat.R16Sint,
SuSize.B32 => TextureFormat.R32Uint,
SuSize.B64 => TextureFormat.R32G32Uint,
SuSize.B128 => TextureFormat.R32G32B32A32Uint,
SuSize.UB128 => TextureFormat.R32G32B32A32Uint,
_ => TextureFormat.R32Uint
};
}
private static SamplerType ConvertSamplerType(SuDim target)
{
return target switch
{
SuDim._1d => SamplerType.Texture1D,
SuDim._1dBuffer => SamplerType.TextureBuffer,
SuDim._1dArray => SamplerType.Texture1D | SamplerType.Array,
SuDim._2d => SamplerType.Texture2D,
SuDim._2dArray => SamplerType.Texture2D | SamplerType.Array,
SuDim._3d => SamplerType.Texture3D,
_ => SamplerType.None
};
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,118 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Vmad(EmitterContext context)
{
InstVmad op = context.GetOp<InstVmad>();
bool aSigned = (op.ASelect & VectorSelect.S8B0) != 0;
bool bSigned = (op.BSelect & VectorSelect.S8B0) != 0;
Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect);
Operand srcC = context.INegate(GetSrcReg(context, op.SrcC), op.AvgMode == AvgMode.NegB);
Operand srcB;
if (op.BVideo)
{
srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect);
}
else
{
int imm = op.Imm16;
if (bSigned)
{
imm = (imm << 16) >> 16;
}
srcB = Const(imm);
}
Operand productLow = context.IMultiply(srcA, srcB);
Operand productHigh;
if (aSigned == bSigned)
{
productHigh = aSigned
? context.MultiplyHighS32(srcA, srcB)
: context.MultiplyHighU32(srcA, srcB);
}
else
{
Operand temp = aSigned
? context.IMultiply(srcB, context.ShiftRightS32(srcA, Const(31)))
: context.IMultiply(srcA, context.ShiftRightS32(srcB, Const(31)));
productHigh = context.IAdd(temp, context.MultiplyHighU32(srcA, srcB));
}
if (op.AvgMode == AvgMode.NegA)
{
(productLow, productHigh) = InstEmitAluHelper.NegateLong(context, productLow, productHigh);
}
Operand resLow = InstEmitAluHelper.AddWithCarry(context, productLow, srcC, out Operand sumCarry);
Operand resHigh = context.IAdd(productHigh, sumCarry);
if (op.AvgMode == AvgMode.PlusOne)
{
resLow = InstEmitAluHelper.AddWithCarry(context, resLow, Const(1), out Operand poCarry);
resHigh = context.IAdd(resHigh, poCarry);
}
bool resSigned = op.ASelect == VectorSelect.S32 ||
op.BSelect == VectorSelect.S32 ||
op.AvgMode == AvgMode.NegB ||
op.AvgMode == AvgMode.NegA;
int shift = op.VideoScale switch
{
VideoScale.Shr7 => 7,
VideoScale.Shr15 => 15,
_ => 0
};
if (shift != 0)
{
// Low = (Low >> Shift) | (High << (32 - Shift))
// High >>= Shift
resLow = context.ShiftRightU32(resLow, Const(shift));
resLow = context.BitwiseOr(resLow, context.ShiftLeft(resHigh, Const(32 - shift)));
resHigh = resSigned
? context.ShiftRightS32(resHigh, Const(shift))
: context.ShiftRightU32(resHigh, Const(shift));
}
Operand res = resLow;
if (op.Sat)
{
Operand sign = context.ShiftRightS32(resHigh, Const(31));
if (resSigned)
{
Operand overflow = context.ICompareNotEqual(resHigh, context.ShiftRightS32(resLow, Const(31)));
Operand clampValue = context.ConditionalSelect(sign, Const(int.MinValue), Const(int.MaxValue));
res = context.ConditionalSelect(overflow, clampValue, resLow);
}
else
{
Operand overflow = context.ICompareNotEqual(resHigh, Const(0));
res = context.ConditionalSelect(overflow, context.BitwiseNot(sign), resLow);
}
}
context.Copy(GetDest(op.Dest), res);
// TODO: CC.
}
}
}

View file

@ -0,0 +1,183 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Vmnmx(EmitterContext context)
{
InstVmnmx op = context.GetOp<InstVmnmx>();
Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect);
Operand srcC = GetSrcReg(context, op.SrcC);
Operand srcB;
if (op.BVideo)
{
srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect);
}
else
{
int imm = op.Imm16;
if ((op.BSelect & VectorSelect.S8B0) != 0)
{
imm = (imm << 16) >> 16;
}
srcB = Const(imm);
}
Operand res;
bool resSigned;
if ((op.ASelect & VectorSelect.S8B0) != (op.BSelect & VectorSelect.S8B0))
{
// Signedness is different, but for max, result will always fit a U32,
// since one of the inputs can't be negative, and the result is the one
// with highest value. For min, it will always fit on a S32, since
// one of the input can't be greater than INT_MAX and we want the lowest value.
resSigned = !op.Mn;
res = op.Mn ? context.IMaximumU32(srcA, srcB) : context.IMinimumS32(srcA, srcB);
if ((op.ASelect & VectorSelect.S8B0) != 0)
{
Operand isBGtIntMax = context.ICompareLess(srcB, Const(0));
res = context.ConditionalSelect(isBGtIntMax, srcB, res);
}
else
{
Operand isAGtIntMax = context.ICompareLess(srcA, Const(0));
res = context.ConditionalSelect(isAGtIntMax, srcA, res);
}
}
else
{
// Ra and Rb have the same signedness, so doesn't matter which one we test.
resSigned = (op.ASelect & VectorSelect.S8B0) != 0;
if (op.Mn)
{
res = resSigned
? context.IMaximumS32(srcA, srcB)
: context.IMaximumU32(srcA, srcB);
}
else
{
res = resSigned
? context.IMinimumS32(srcA, srcB)
: context.IMinimumU32(srcA, srcB);
}
}
if (op.Sat)
{
if (op.DFormat && !resSigned)
{
res = context.IMinimumU32(res, Const(int.MaxValue));
}
else if (!op.DFormat && resSigned)
{
res = context.IMaximumS32(res, Const(0));
}
}
switch (op.VideoOp)
{
case VideoOp.Acc:
res = context.IAdd(res, srcC);
break;
case VideoOp.Max:
res = op.DFormat ? context.IMaximumS32(res, srcC) : context.IMaximumU32(res, srcC);
break;
case VideoOp.Min:
res = op.DFormat ? context.IMinimumS32(res, srcC) : context.IMinimumU32(res, srcC);
break;
case VideoOp.Mrg16h:
res = context.BitfieldInsert(srcC, res, Const(16), Const(16));
break;
case VideoOp.Mrg16l:
res = context.BitfieldInsert(srcC, res, Const(0), Const(16));
break;
case VideoOp.Mrg8b0:
res = context.BitfieldInsert(srcC, res, Const(0), Const(8));
break;
case VideoOp.Mrg8b2:
res = context.BitfieldInsert(srcC, res, Const(16), Const(8));
break;
}
context.Copy(GetDest(op.Dest), res);
}
public static void Vsetp(EmitterContext context)
{
InstVsetp op = context.GetOp<InstVsetp>();
Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect);
Operand srcB;
if (op.BVideo)
{
srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect);
}
else
{
int imm = op.Imm16;
if ((op.BSelect & VectorSelect.S8B0) != 0)
{
imm = (imm << 16) >> 16;
}
srcB = Const(imm);
}
Operand p0Res;
bool signedA = (op.ASelect & VectorSelect.S8B0) != 0;
bool signedB = (op.BSelect & VectorSelect.S8B0) != 0;
if (signedA != signedB)
{
bool a32 = (op.ASelect & ~VectorSelect.S8B0) == VectorSelect.U32;
bool b32 = (op.BSelect & ~VectorSelect.S8B0) == VectorSelect.U32;
if (!a32 && !b32)
{
// Both values are extended small integer and can always fit in a S32, just do a signed comparison.
p0Res = GetIntComparison(context, op.VComp, srcA, srcB, isSigned: true, extended: false);
}
else
{
// TODO: Mismatching sign case.
p0Res = Const(0);
}
}
else
{
// Sign matches, just do a regular comparison.
p0Res = GetIntComparison(context, op.VComp, srcA, srcB, signedA, extended: false);
}
Operand p1Res = context.BitwiseNot(p0Res);
Operand pred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
p0Res = InstEmitAluHelper.GetPredLogicalOp(context, op.BoolOp, p0Res, pred);
p1Res = InstEmitAluHelper.GetPredLogicalOp(context, op.BoolOp, p1Res, pred);
context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res);
context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res);
}
}
}

View file

@ -0,0 +1,84 @@
using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void Fswzadd(EmitterContext context)
{
InstFswzadd op = context.GetOp<InstFswzadd>();
Operand srcA = GetSrcReg(context, op.SrcA);
Operand srcB = GetSrcReg(context, op.SrcB);
Operand dest = GetDest(op.Dest);
context.Copy(dest, context.FPSwizzleAdd(srcA, srcB, op.PnWord));
InstEmitAluHelper.SetFPZnFlags(context, dest, op.WriteCC);
}
public static void Shfl(EmitterContext context)
{
InstShfl op = context.GetOp<InstShfl>();
Operand pred = Register(op.DestPred, RegisterType.Predicate);
Operand srcA = GetSrcReg(context, op.SrcA);
Operand srcB = op.BFixShfl ? Const(op.SrcBImm) : GetSrcReg(context, op.SrcB);
Operand srcC = op.CFixShfl ? Const(op.SrcCImm) : GetSrcReg(context, op.SrcC);
(Operand res, Operand valid) = op.ShflMode switch
{
ShflMode.Idx => context.Shuffle(srcA, srcB, srcC),
ShflMode.Up => context.ShuffleUp(srcA, srcB, srcC),
ShflMode.Down => context.ShuffleDown(srcA, srcB, srcC),
ShflMode.Bfly => context.ShuffleXor(srcA, srcB, srcC),
_ => (null, null)
};
context.Copy(GetDest(op.Dest), res);
context.Copy(pred, valid);
}
public static void Vote(EmitterContext context)
{
InstVote op = context.GetOp<InstVote>();
Operand pred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
Operand res = null;
switch (op.VoteMode)
{
case VoteMode.All:
res = context.VoteAll(pred);
break;
case VoteMode.Any:
res = context.VoteAny(pred);
break;
case VoteMode.Eq:
res = context.VoteAllEqual(pred);
break;
}
if (res != null)
{
context.Copy(Register(op.VpDest, RegisterType.Predicate), res);
}
else
{
context.Config.GpuAccessor.Log($"Invalid vote operation: {op.VoteMode}.");
}
if (op.Dest != RegisterConsts.RegisterZeroIndex)
{
context.Copy(GetDest(op.Dest), context.Ballot(pred));
}
}
}
}

View file

@ -0,0 +1,6 @@
using Ryujinx.Graphics.Shader.Translation;
namespace Ryujinx.Graphics.Shader.Instructions
{
delegate void InstEmitter(EmitterContext context);
}

View file

@ -0,0 +1,141 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.Translation;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Instructions
{
static class Lop3Expression
{
private enum TruthTable : byte
{
False = 0x00, // false
True = 0xff, // true
In = 0xf0, // a
And2 = 0xc0, // a & b
Or2 = 0xfc, // a | b
Xor2 = 0x3c, // a ^ b
And3 = 0x80, // a & b & c
Or3 = 0xfe, // a | b | c
XorAnd = 0x60, // a & (b ^ c)
XorOr = 0xf6, // a | (b ^ c)
OrAnd = 0xe0, // a & (b | c)
AndOr = 0xf8, // a | (b & c)
Onehot = 0x16, // (a & !b & !c) | (!a & b & !c) | (!a & !b & c) - Only one value is true.
Majority = 0xe8, // Popcount(a, b, c) >= 2
Gamble = 0x81, // (a & b & c) | (!a & !b & !c) - All on or all off
InverseGamble = 0x7e, // Inverse of Gamble
Dot = 0x1a, // a ^ (c | (a & b))
Mux = 0xca, // a ? b : c
AndXor = 0x78, // a ^ (b & c)
OrXor = 0x1e, // a ^ (b | c)
Xor3 = 0x96, // a ^ b ^ c
}
public static Operand GetFromTruthTable(EmitterContext context, Operand srcA, Operand srcB, Operand srcC, int imm)
{
for (int i = 0; i < 0x40; i++)
{
TruthTable currImm = (TruthTable)imm;
Operand x = srcA;
Operand y = srcB;
Operand z = srcC;
if ((i & 0x01) != 0)
{
(x, y) = (y, x);
currImm = PermuteTable(currImm, 7, 6, 3, 2, 5, 4, 1, 0);
}
if ((i & 0x02) != 0)
{
(x, z) = (z, x);
currImm = PermuteTable(currImm, 7, 3, 5, 1, 6, 2, 4, 0);
}
if ((i & 0x04) != 0)
{
(y, z) = (z, y);
currImm = PermuteTable(currImm, 7, 5, 6, 4, 3, 1, 2, 0);
}
if ((i & 0x08) != 0)
{
x = context.BitwiseNot(x);
currImm = PermuteTable(currImm, 3, 2, 1, 0, 7, 6, 5, 4);
}
if ((i & 0x10) != 0)
{
y = context.BitwiseNot(y);
currImm = PermuteTable(currImm, 5, 4, 7, 6, 1, 0, 3, 2);
}
if ((i & 0x20) != 0)
{
z = context.BitwiseNot(z);
currImm = PermuteTable(currImm, 6, 7, 4, 5, 2, 3, 0, 1);
}
Operand result = GetExpr(currImm, context, x, y, z);
if (result != null)
{
return result;
}
Operand notResult = GetExpr((TruthTable)((~(int)currImm) & 0xff), context, x, y, z);
if (notResult != null)
{
return context.BitwiseNot(notResult);
}
}
return null;
}
private static Operand GetExpr(TruthTable imm, EmitterContext context, Operand x, Operand y, Operand z)
{
return imm switch
{
TruthTable.False => Const(0),
TruthTable.True => Const(-1),
TruthTable.In => x,
TruthTable.And2 => context.BitwiseAnd(x, y),
TruthTable.Or2 => context.BitwiseOr(x, y),
TruthTable.Xor2 => context.BitwiseExclusiveOr(x, y),
TruthTable.And3 => context.BitwiseAnd(x, context.BitwiseAnd(y, z)),
TruthTable.Or3 => context.BitwiseOr(x, context.BitwiseOr(y, z)),
TruthTable.XorAnd => context.BitwiseAnd(x, context.BitwiseExclusiveOr(y, z)),
TruthTable.XorOr => context.BitwiseOr(x, context.BitwiseExclusiveOr(y, z)),
TruthTable.OrAnd => context.BitwiseAnd(x, context.BitwiseOr(y, z)),
TruthTable.AndOr => context.BitwiseOr(x, context.BitwiseAnd(y, z)),
TruthTable.Onehot => context.BitwiseExclusiveOr(context.BitwiseOr(x, y), context.BitwiseOr(z, context.BitwiseAnd(x, y))),
TruthTable.Majority => context.BitwiseAnd(context.BitwiseOr(x, y), context.BitwiseOr(z, context.BitwiseAnd(x, y))),
TruthTable.InverseGamble => context.BitwiseOr(context.BitwiseExclusiveOr(x, y), context.BitwiseExclusiveOr(x, z)),
TruthTable.Dot => context.BitwiseAnd(context.BitwiseExclusiveOr(x, z), context.BitwiseOr(context.BitwiseNot(y), z)),
TruthTable.Mux => context.BitwiseOr(context.BitwiseAnd(x, y), context.BitwiseAnd(context.BitwiseNot(x), z)),
TruthTable.AndXor => context.BitwiseExclusiveOr(x, context.BitwiseAnd(y, z)),
TruthTable.OrXor => context.BitwiseExclusiveOr(x, context.BitwiseOr(y, z)),
TruthTable.Xor3 => context.BitwiseExclusiveOr(x, context.BitwiseExclusiveOr(y, z)),
_ => null
};
}
private static TruthTable PermuteTable(TruthTable imm, int bit7, int bit6, int bit5, int bit4, int bit3, int bit2, int bit1, int bit0)
{
int result = 0;
result |= (((int)imm >> 0) & 1) << bit0;
result |= (((int)imm >> 1) & 1) << bit1;
result |= (((int)imm >> 2) & 1) << bit2;
result |= (((int)imm >> 3) & 1) << bit3;
result |= (((int)imm >> 4) & 1) << bit4;
result |= (((int)imm >> 5) & 1) << bit5;
result |= (((int)imm >> 6) & 1) << bit6;
result |= (((int)imm >> 7) & 1) << bit7;
return (TruthTable)result;
}
}
}