Implement shader storage buffer operations using new Load/Store instructions (#4993)

* Implement storage buffer operations using new Load/Store instruction

* Extend GenerateMultiTargetStorageOp to also match access with constant offset, and log and comments

* Remove now unused code

* Catch more complex cases of global memory usage

* Shader cache version bump

* Extend global access elimination to work with more shared memory cases

* Change alignment requirement from 16 bytes to 8 bytes, handle cases where we need more than 16 storage buffers

* Tweak preferencing to catch more cases

* Enable CB0 elimination even when host storage buffer alignment is > 16 (for Intel)

* Fix storage buffer bindings

* Simplify some code

* Shader cache version bump

* Fix typo

* Extend global memory elimination to handle shared memory with multiple possible offsets and local memory
This commit is contained in:
gdkchan 2023-06-03 20:12:18 -03:00 committed by GitHub
parent 81c9052847
commit 21c9ac6240
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
42 changed files with 1468 additions and 1259 deletions

View file

@ -7,17 +7,15 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{
static class Optimizer
{
public static void RunPass(BasicBlock[] blocks, ShaderConfig config)
public static void RunPass(HelperFunctionManager hfm, BasicBlock[] blocks, ShaderConfig config)
{
RunOptimizationPasses(blocks, config);
int sbUseMask = 0;
int ubeUseMask = 0;
GlobalToStorage.RunPass(hfm, blocks, config);
// Those passes are looking for specific patterns and only needs to run once.
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
{
GlobalToStorage.RunPass(blocks[blkIndex], config, ref sbUseMask, ref ubeUseMask);
BindlessToIndexed.RunPass(blocks[blkIndex], config);
BindlessElimination.RunPass(blocks[blkIndex], config);
@ -28,8 +26,6 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
}
}
config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask);
// Run optimizations one last time to remove any code that is now optimizable after above passes.
RunOptimizationPasses(blocks, config);
}

View file

@ -13,7 +13,6 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
switch (operation.Inst)
{
case Instruction.Add:
case Instruction.BitwiseExclusiveOr:
TryEliminateBinaryOpCommutative(operation, 0);
break;
@ -21,6 +20,13 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
TryEliminateBitwiseAnd(operation);
break;
case Instruction.BitwiseExclusiveOr:
if (!TryEliminateXorSwap(operation))
{
TryEliminateBinaryOpCommutative(operation, 0);
}
break;
case Instruction.BitwiseOr:
TryEliminateBitwiseOr(operation);
break;
@ -49,8 +55,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
private static void TryEliminateBitwiseAnd(Operation operation)
{
// Try to recognize and optimize those 3 patterns (in order):
// x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y,
// x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000
// x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y,
// x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000
Operand x = operation.GetSource(0);
Operand y = operation.GetSource(1);
@ -68,11 +75,62 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
}
}
private static bool TryEliminateXorSwap(Operation xCopyOp)
{
// Try to recognize XOR swap pattern:
// x = x ^ y
// y = x ^ y
// x = x ^ y
// Or, in SSA:
// x2 = x ^ y
// y2 = x2 ^ y
// x3 = x2 ^ y2
// Transform it into something more sane:
// temp = y
// y = x
// x = temp
// Note that because XOR is commutative, there are actually
// multiple possible combinations of this pattern, for
// simplicity this only catches one of them.
Operand x = xCopyOp.GetSource(0);
Operand y = xCopyOp.GetSource(1);
if (x.AsgOp is not Operation tCopyOp || tCopyOp.Inst != Instruction.BitwiseExclusiveOr ||
y.AsgOp is not Operation yCopyOp || yCopyOp.Inst != Instruction.BitwiseExclusiveOr)
{
return false;
}
if (tCopyOp == yCopyOp)
{
return false;
}
if (yCopyOp.GetSource(0) != x ||
yCopyOp.GetSource(1) != tCopyOp.GetSource(1) ||
x.UseOps.Count != 2)
{
return false;
}
x = tCopyOp.GetSource(0);
y = tCopyOp.GetSource(1);
tCopyOp.TurnIntoCopy(y); // Temp = Y
yCopyOp.TurnIntoCopy(x); // Y = X
xCopyOp.TurnIntoCopy(tCopyOp.Dest); // X = Temp
return true;
}
private static void TryEliminateBitwiseOr(Operation operation)
{
// Try to recognize and optimize those 3 patterns (in order):
// x | 0x00000000 == x, 0x00000000 | y == y,
// x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF
// x | 0x00000000 == x, 0x00000000 | y == y,
// x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF
Operand x = operation.GetSource(0);
Operand y = operation.GetSource(1);

View file

@ -1,4 +1,5 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{
@ -93,5 +94,17 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
return source;
}
public static void DeleteNode(LinkedListNode<INode> node, Operation operation)
{
node.List.Remove(node);
for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
{
operation.SetSource(srcIndex, null);
}
operation.Dest = null;
}
}
}