mirror of
https://git.743378673.xyz/MeloNX/MeloNX.git
synced 2025-07-29 09:57:09 +02:00
Implement shader storage buffer operations using new Load/Store instructions (#4993)
* Implement storage buffer operations using new Load/Store instruction * Extend GenerateMultiTargetStorageOp to also match access with constant offset, and log and comments * Remove now unused code * Catch more complex cases of global memory usage * Shader cache version bump * Extend global access elimination to work with more shared memory cases * Change alignment requirement from 16 bytes to 8 bytes, handle cases where we need more than 16 storage buffers * Tweak preferencing to catch more cases * Enable CB0 elimination even when host storage buffer alignment is > 16 (for Intel) * Fix storage buffer bindings * Simplify some code * Shader cache version bump * Fix typo * Extend global memory elimination to handle shared memory with multiple possible offsets and local memory
This commit is contained in:
parent
81c9052847
commit
21c9ac6240
42 changed files with 1468 additions and 1259 deletions
File diff suppressed because it is too large
Load diff
|
@ -7,17 +7,15 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
{
|
||||
static class Optimizer
|
||||
{
|
||||
public static void RunPass(BasicBlock[] blocks, ShaderConfig config)
|
||||
public static void RunPass(HelperFunctionManager hfm, BasicBlock[] blocks, ShaderConfig config)
|
||||
{
|
||||
RunOptimizationPasses(blocks, config);
|
||||
|
||||
int sbUseMask = 0;
|
||||
int ubeUseMask = 0;
|
||||
GlobalToStorage.RunPass(hfm, blocks, config);
|
||||
|
||||
// Those passes are looking for specific patterns and only needs to run once.
|
||||
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
|
||||
{
|
||||
GlobalToStorage.RunPass(blocks[blkIndex], config, ref sbUseMask, ref ubeUseMask);
|
||||
BindlessToIndexed.RunPass(blocks[blkIndex], config);
|
||||
BindlessElimination.RunPass(blocks[blkIndex], config);
|
||||
|
||||
|
@ -28,8 +26,6 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
}
|
||||
}
|
||||
|
||||
config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask);
|
||||
|
||||
// Run optimizations one last time to remove any code that is now optimizable after above passes.
|
||||
RunOptimizationPasses(blocks, config);
|
||||
}
|
||||
|
|
|
@ -13,7 +13,6 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
switch (operation.Inst)
|
||||
{
|
||||
case Instruction.Add:
|
||||
case Instruction.BitwiseExclusiveOr:
|
||||
TryEliminateBinaryOpCommutative(operation, 0);
|
||||
break;
|
||||
|
||||
|
@ -21,6 +20,13 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
TryEliminateBitwiseAnd(operation);
|
||||
break;
|
||||
|
||||
case Instruction.BitwiseExclusiveOr:
|
||||
if (!TryEliminateXorSwap(operation))
|
||||
{
|
||||
TryEliminateBinaryOpCommutative(operation, 0);
|
||||
}
|
||||
break;
|
||||
|
||||
case Instruction.BitwiseOr:
|
||||
TryEliminateBitwiseOr(operation);
|
||||
break;
|
||||
|
@ -49,8 +55,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
private static void TryEliminateBitwiseAnd(Operation operation)
|
||||
{
|
||||
// Try to recognize and optimize those 3 patterns (in order):
|
||||
// x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y,
|
||||
// x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000
|
||||
// x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y,
|
||||
// x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000
|
||||
|
||||
Operand x = operation.GetSource(0);
|
||||
Operand y = operation.GetSource(1);
|
||||
|
||||
|
@ -68,11 +75,62 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
}
|
||||
}
|
||||
|
||||
private static bool TryEliminateXorSwap(Operation xCopyOp)
|
||||
{
|
||||
// Try to recognize XOR swap pattern:
|
||||
// x = x ^ y
|
||||
// y = x ^ y
|
||||
// x = x ^ y
|
||||
// Or, in SSA:
|
||||
// x2 = x ^ y
|
||||
// y2 = x2 ^ y
|
||||
// x3 = x2 ^ y2
|
||||
// Transform it into something more sane:
|
||||
// temp = y
|
||||
// y = x
|
||||
// x = temp
|
||||
|
||||
// Note that because XOR is commutative, there are actually
|
||||
// multiple possible combinations of this pattern, for
|
||||
// simplicity this only catches one of them.
|
||||
|
||||
Operand x = xCopyOp.GetSource(0);
|
||||
Operand y = xCopyOp.GetSource(1);
|
||||
|
||||
if (x.AsgOp is not Operation tCopyOp || tCopyOp.Inst != Instruction.BitwiseExclusiveOr ||
|
||||
y.AsgOp is not Operation yCopyOp || yCopyOp.Inst != Instruction.BitwiseExclusiveOr)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (tCopyOp == yCopyOp)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (yCopyOp.GetSource(0) != x ||
|
||||
yCopyOp.GetSource(1) != tCopyOp.GetSource(1) ||
|
||||
x.UseOps.Count != 2)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
x = tCopyOp.GetSource(0);
|
||||
y = tCopyOp.GetSource(1);
|
||||
|
||||
tCopyOp.TurnIntoCopy(y); // Temp = Y
|
||||
yCopyOp.TurnIntoCopy(x); // Y = X
|
||||
xCopyOp.TurnIntoCopy(tCopyOp.Dest); // X = Temp
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static void TryEliminateBitwiseOr(Operation operation)
|
||||
{
|
||||
// Try to recognize and optimize those 3 patterns (in order):
|
||||
// x | 0x00000000 == x, 0x00000000 | y == y,
|
||||
// x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF
|
||||
// x | 0x00000000 == x, 0x00000000 | y == y,
|
||||
// x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF
|
||||
|
||||
Operand x = operation.GetSource(0);
|
||||
Operand y = operation.GetSource(1);
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
||||
{
|
||||
|
@ -93,5 +94,17 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
|
||||
return source;
|
||||
}
|
||||
|
||||
public static void DeleteNode(LinkedListNode<INode> node, Operation operation)
|
||||
{
|
||||
node.List.Remove(node);
|
||||
|
||||
for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
|
||||
{
|
||||
operation.SetSource(srcIndex, null);
|
||||
}
|
||||
|
||||
operation.Dest = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue