Move solution and projects to src

This commit is contained in:
TSR Berry 2023-04-08 01:22:00 +02:00 committed by Mary
parent cd124bda58
commit cee7121058
3466 changed files with 55 additions and 55 deletions

View file

@ -0,0 +1,103 @@
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu.Engine;
using Ryujinx.Graphics.Gpu.Image;
using Ryujinx.Graphics.Shader;
using System;
using System.Linq;
namespace Ryujinx.Graphics.Gpu.Shader
{
/// <summary>
/// A collection of shader bindings ready for insertion into the buffer and texture managers.
/// </summary>
internal class CachedShaderBindings
{
public TextureBindingInfo[][] TextureBindings { get; }
public TextureBindingInfo[][] ImageBindings { get; }
public BufferDescriptor[][] ConstantBufferBindings { get; }
public BufferDescriptor[][] StorageBufferBindings { get; }
public int MaxTextureBinding { get; }
public int MaxImageBinding { get; }
/// <summary>
/// Create a new cached shader bindings collection.
/// </summary>
/// <param name="isCompute">Whether the shader is for compute</param>
/// <param name="stages">The stages used by the shader</param>
public CachedShaderBindings(bool isCompute, CachedShaderStage[] stages)
{
int stageCount = isCompute ? 1 : Constants.ShaderStages;
TextureBindings = new TextureBindingInfo[stageCount][];
ImageBindings = new TextureBindingInfo[stageCount][];
ConstantBufferBindings = new BufferDescriptor[stageCount][];
StorageBufferBindings = new BufferDescriptor[stageCount][];
int maxTextureBinding = -1;
int maxImageBinding = -1;
int offset = isCompute ? 0 : 1;
for (int i = 0; i < stageCount; i++)
{
CachedShaderStage stage = stages[i + offset];
if (stage == null)
{
TextureBindings[i] = Array.Empty<TextureBindingInfo>();
ImageBindings[i] = Array.Empty<TextureBindingInfo>();
ConstantBufferBindings[i] = Array.Empty<BufferDescriptor>();
StorageBufferBindings[i] = Array.Empty<BufferDescriptor>();
continue;
}
TextureBindings[i] = stage.Info.Textures.Select(descriptor =>
{
Target target = ShaderTexture.GetTarget(descriptor.Type);
var result = new TextureBindingInfo(
target,
descriptor.Binding,
descriptor.CbufSlot,
descriptor.HandleIndex,
descriptor.Flags);
if (descriptor.Binding > maxTextureBinding)
{
maxTextureBinding = descriptor.Binding;
}
return result;
}).ToArray();
ImageBindings[i] = stage.Info.Images.Select(descriptor =>
{
Target target = ShaderTexture.GetTarget(descriptor.Type);
Format format = ShaderTexture.GetFormat(descriptor.Format);
var result = new TextureBindingInfo(
target,
format,
descriptor.Binding,
descriptor.CbufSlot,
descriptor.HandleIndex,
descriptor.Flags);
if (descriptor.Binding > maxImageBinding)
{
maxImageBinding = descriptor.Binding;
}
return result;
}).ToArray();
ConstantBufferBindings[i] = stage.Info.CBuffers.ToArray();
StorageBufferBindings[i] = stage.Info.SBuffers.ToArray();
}
MaxTextureBinding = maxTextureBinding;
MaxImageBinding = maxImageBinding;
}
}
}

View file

@ -0,0 +1,56 @@
using Ryujinx.Graphics.GAL;
using System;
namespace Ryujinx.Graphics.Gpu.Shader
{
/// <summary>
/// Represents a program composed of one or more shader stages (for graphics shaders),
/// or a single shader (for compute shaders).
/// </summary>
class CachedShaderProgram : IDisposable
{
/// <summary>
/// Host shader program object.
/// </summary>
public IProgram HostProgram { get; }
/// <summary>
/// GPU state used to create this version of the shader.
/// </summary>
public ShaderSpecializationState SpecializationState { get; }
/// <summary>
/// Compiled shader for each shader stage.
/// </summary>
public CachedShaderStage[] Shaders { get; }
/// <summary>
/// Cached shader bindings, ready for placing into the bindings manager.
/// </summary>
public CachedShaderBindings Bindings { get; }
/// <summary>
/// Creates a new instance of the shader bundle.
/// </summary>
/// <param name="hostProgram">Host program with all the shader stages</param>
/// <param name="specializationState">GPU state used to create this version of the shader</param>
/// <param name="shaders">Shaders</param>
public CachedShaderProgram(IProgram hostProgram, ShaderSpecializationState specializationState, params CachedShaderStage[] shaders)
{
HostProgram = hostProgram;
SpecializationState = specializationState;
Shaders = shaders;
SpecializationState.Prepare(shaders);
Bindings = new CachedShaderBindings(shaders.Length == 1, shaders);
}
/// <summary>
/// Dispose of the host shader resources.
/// </summary>
public void Dispose()
{
HostProgram.Dispose();
}
}
}

View file

@ -0,0 +1,38 @@
using Ryujinx.Graphics.Shader;
namespace Ryujinx.Graphics.Gpu.Shader
{
/// <summary>
/// Cached shader code for a single shader stage.
/// </summary>
class CachedShaderStage
{
/// <summary>
/// Shader program information.
/// </summary>
public ShaderProgramInfo Info { get; }
/// <summary>
/// Maxwell binary shader code.
/// </summary>
public byte[] Code { get; }
/// <summary>
/// Constant buffer 1 data accessed by the shader.
/// </summary>
public byte[] Cb1Data { get; }
/// <summary>
/// Creates a new instance of the shader code holder.
/// </summary>
/// <param name="info">Shader program information</param>
/// <param name="code">Maxwell binary shader code</param>
/// <param name="cb1Data">Constant buffer 1 data accessed by the shader</param>
public CachedShaderStage(ShaderProgramInfo info, byte[] code, byte[] cb1Data)
{
Info = info;
Code = code;
Cb1Data = cb1Data;
}
}
}

View file

@ -0,0 +1,70 @@
using Ryujinx.Graphics.Gpu.Shader.HashTable;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Gpu.Shader
{
/// <summary>
/// Compute shader cache hash table.
/// </summary>
class ComputeShaderCacheHashTable
{
private readonly PartitionedHashTable<ShaderSpecializationList> _cache;
private readonly List<CachedShaderProgram> _shaderPrograms;
/// <summary>
/// Creates a new compute shader cache hash table.
/// </summary>
public ComputeShaderCacheHashTable()
{
_cache = new PartitionedHashTable<ShaderSpecializationList>();
_shaderPrograms = new List<CachedShaderProgram>();
}
/// <summary>
/// Adds a program to the cache.
/// </summary>
/// <param name="program">Program to be added</param>
public void Add(CachedShaderProgram program)
{
var specList = _cache.GetOrAdd(program.Shaders[0].Code, new ShaderSpecializationList());
specList.Add(program);
_shaderPrograms.Add(program);
}
/// <summary>
/// Tries to find a cached program.
/// </summary>
/// <param name="channel">GPU channel</param>
/// <param name="poolState">Texture pool state</param>
/// <param name="computeState">Compute state</param>
/// <param name="gpuVa">GPU virtual address of the compute shader</param>
/// <param name="program">Cached host program for the given state, if found</param>
/// <param name="cachedGuestCode">Cached guest code, if any found</param>
/// <returns>True if a cached host program was found, false otherwise</returns>
public bool TryFind(
GpuChannel channel,
GpuChannelPoolState poolState,
GpuChannelComputeState computeState,
ulong gpuVa,
out CachedShaderProgram program,
out byte[] cachedGuestCode)
{
program = null;
ShaderCodeAccessor codeAccessor = new ShaderCodeAccessor(channel.MemoryManager, gpuVa);
bool hasSpecList = _cache.TryFindItem(codeAccessor, out var specList, out cachedGuestCode);
return hasSpecList && specList.TryFindForCompute(channel, poolState, computeState, out program);
}
/// <summary>
/// Gets all programs that have been added to the table.
/// </summary>
/// <returns>Programs added to the table</returns>
public IEnumerable<CachedShaderProgram> GetPrograms()
{
foreach (var program in _shaderPrograms)
{
yield return program;
}
}
}
}

View file

@ -0,0 +1,138 @@
using Ryujinx.Common;
using Ryujinx.Common.Logging;
using System;
using System.IO;
namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
{
/// <summary>
/// Represents a background disk cache writer.
/// </summary>
class BackgroundDiskCacheWriter : IDisposable
{
/// <summary>
/// Possible operation to do on the <see cref="_fileWriterWorkerQueue"/>.
/// </summary>
private enum CacheFileOperation
{
/// <summary>
/// Operation to add a shader to the cache.
/// </summary>
AddShader
}
/// <summary>
/// Represents an operation to perform on the <see cref="_fileWriterWorkerQueue"/>.
/// </summary>
private readonly struct CacheFileOperationTask
{
/// <summary>
/// The type of operation to perform.
/// </summary>
public readonly CacheFileOperation Type;
/// <summary>
/// The data associated to this operation or null.
/// </summary>
public readonly object Data;
public CacheFileOperationTask(CacheFileOperation type, object data)
{
Type = type;
Data = data;
}
}
/// <summary>
/// Background shader cache write information.
/// </summary>
private readonly struct AddShaderData
{
/// <summary>
/// Cached shader program.
/// </summary>
public readonly CachedShaderProgram Program;
/// <summary>
/// Binary host code.
/// </summary>
public readonly byte[] HostCode;
/// <summary>
/// Creates a new background shader cache write information.
/// </summary>
/// <param name="program">Cached shader program</param>
/// <param name="hostCode">Binary host code</param>
public AddShaderData(CachedShaderProgram program, byte[] hostCode)
{
Program = program;
HostCode = hostCode;
}
}
private readonly GpuContext _context;
private readonly DiskCacheHostStorage _hostStorage;
private readonly AsyncWorkQueue<CacheFileOperationTask> _fileWriterWorkerQueue;
/// <summary>
/// Creates a new background disk cache writer.
/// </summary>
/// <param name="context">GPU context</param>
/// <param name="hostStorage">Disk cache host storage</param>
public BackgroundDiskCacheWriter(GpuContext context, DiskCacheHostStorage hostStorage)
{
_context = context;
_hostStorage = hostStorage;
_fileWriterWorkerQueue = new AsyncWorkQueue<CacheFileOperationTask>(ProcessTask, "GPU.BackgroundDiskCacheWriter");
}
/// <summary>
/// Processes a shader cache background operation.
/// </summary>
/// <param name="task">Task to process</param>
private void ProcessTask(CacheFileOperationTask task)
{
switch (task.Type)
{
case CacheFileOperation.AddShader:
AddShaderData data = (AddShaderData)task.Data;
try
{
_hostStorage.AddShader(_context, data.Program, data.HostCode);
}
catch (DiskCacheLoadException diskCacheLoadException)
{
Logger.Error?.Print(LogClass.Gpu, $"Error writing shader to disk cache. {diskCacheLoadException.Message}");
}
catch (IOException ioException)
{
Logger.Error?.Print(LogClass.Gpu, $"Error writing shader to disk cache. {ioException.Message}");
}
break;
}
}
/// <summary>
/// Adds a shader program to be cached in the background.
/// </summary>
/// <param name="program">Shader program to cache</param>
/// <param name="hostCode">Host binary code of the program</param>
public void AddShader(CachedShaderProgram program, byte[] hostCode)
{
_fileWriterWorkerQueue.Add(new CacheFileOperationTask(CacheFileOperation.AddShader, new AddShaderData(program, hostCode)));
}
public void Dispose()
{
Dispose(true);
}
protected virtual void Dispose(bool disposing)
{
if (disposing)
{
_fileWriterWorkerQueue.Dispose();
}
}
}
}

View file

@ -0,0 +1,216 @@
using System;
using System.IO;
using System.IO.Compression;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
{
/// <summary>
/// Binary data serializer.
/// </summary>
struct BinarySerializer
{
private readonly Stream _stream;
private Stream _activeStream;
/// <summary>
/// Creates a new binary serializer.
/// </summary>
/// <param name="stream">Stream to read from or write into</param>
public BinarySerializer(Stream stream)
{
_stream = stream;
_activeStream = stream;
}
/// <summary>
/// Reads data from the stream.
/// </summary>
/// <typeparam name="T">Type of the data</typeparam>
/// <param name="data">Data read</param>
public void Read<T>(ref T data) where T : unmanaged
{
Span<byte> buffer = MemoryMarshal.Cast<T, byte>(MemoryMarshal.CreateSpan(ref data, 1));
for (int offset = 0; offset < buffer.Length;)
{
offset += _activeStream.Read(buffer.Slice(offset));
}
}
/// <summary>
/// Tries to read data from the stream.
/// </summary>
/// <typeparam name="T">Type of the data</typeparam>
/// <param name="data">Data read</param>
/// <returns>True if the read was successful, false otherwise</returns>
public bool TryRead<T>(ref T data) where T : unmanaged
{
// Length is unknown on compressed streams.
if (_activeStream == _stream)
{
int size = Unsafe.SizeOf<T>();
if (_activeStream.Length - _activeStream.Position < size)
{
return false;
}
}
Read(ref data);
return true;
}
/// <summary>
/// Reads data prefixed with a magic and size from the stream.
/// </summary>
/// <typeparam name="T">Type of the data</typeparam>
/// <param name="data">Data read</param>
/// <param name="magic">Expected magic value, for validation</param>
public void ReadWithMagicAndSize<T>(ref T data, uint magic) where T : unmanaged
{
uint actualMagic = 0;
int size = 0;
Read(ref actualMagic);
Read(ref size);
if (actualMagic != magic)
{
throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedInvalidMagic);
}
// Structs are expected to expand but not shrink between versions.
if (size > Unsafe.SizeOf<T>())
{
throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedInvalidLength);
}
Span<byte> buffer = MemoryMarshal.Cast<T, byte>(MemoryMarshal.CreateSpan(ref data, 1)).Slice(0, size);
for (int offset = 0; offset < buffer.Length;)
{
offset += _activeStream.Read(buffer.Slice(offset));
}
}
/// <summary>
/// Writes data into the stream.
/// </summary>
/// <typeparam name="T">Type of the data</typeparam>
/// <param name="data">Data to be written</param>
public void Write<T>(ref T data) where T : unmanaged
{
Span<byte> buffer = MemoryMarshal.Cast<T, byte>(MemoryMarshal.CreateSpan(ref data, 1));
_activeStream.Write(buffer);
}
/// <summary>
/// Writes data prefixed with a magic and size into the stream.
/// </summary>
/// <typeparam name="T">Type of the data</typeparam>
/// <param name="data">Data to write</param>
/// <param name="magic">Magic value to write</param>
public void WriteWithMagicAndSize<T>(ref T data, uint magic) where T : unmanaged
{
int size = Unsafe.SizeOf<T>();
Write(ref magic);
Write(ref size);
Span<byte> buffer = MemoryMarshal.Cast<T, byte>(MemoryMarshal.CreateSpan(ref data, 1));
_activeStream.Write(buffer);
}
/// <summary>
/// Indicates that all data that will be read from the stream has been compressed.
/// </summary>
public void BeginCompression()
{
CompressionAlgorithm algorithm = CompressionAlgorithm.None;
Read(ref algorithm);
if (algorithm == CompressionAlgorithm.Deflate)
{
_activeStream = new DeflateStream(_stream, CompressionMode.Decompress, true);
}
}
/// <summary>
/// Indicates that all data that will be written into the stream should be compressed.
/// </summary>
/// <param name="algorithm">Compression algorithm that should be used</param>
public void BeginCompression(CompressionAlgorithm algorithm)
{
Write(ref algorithm);
if (algorithm == CompressionAlgorithm.Deflate)
{
_activeStream = new DeflateStream(_stream, CompressionLevel.SmallestSize, true);
}
}
/// <summary>
/// Indicates the end of a compressed chunck.
/// </summary>
/// <remarks>
/// Any data written after this will not be compressed unless <see cref="BeginCompression(CompressionAlgorithm)"/> is called again.
/// Any data read after this will be assumed to be uncompressed unless <see cref="BeginCompression"/> is called again.
/// </remarks>
public void EndCompression()
{
if (_activeStream != _stream)
{
_activeStream.Dispose();
_activeStream = _stream;
}
}
/// <summary>
/// Reads compressed data from the stream.
/// </summary>
/// <remarks>
/// <paramref name="data"/> must have the exact length of the uncompressed data,
/// otherwise decompression will fail.
/// </remarks>
/// <param name="stream">Stream to read from</param>
/// <param name="data">Buffer to write the uncompressed data into</param>
public static void ReadCompressed(Stream stream, Span<byte> data)
{
CompressionAlgorithm algorithm = (CompressionAlgorithm)stream.ReadByte();
switch (algorithm)
{
case CompressionAlgorithm.None:
stream.Read(data);
break;
case CompressionAlgorithm.Deflate:
stream = new DeflateStream(stream, CompressionMode.Decompress, true);
for (int offset = 0; offset < data.Length;)
{
offset += stream.Read(data.Slice(offset));
}
stream.Dispose();
break;
}
}
/// <summary>
/// Compresses and writes the compressed data into the stream.
/// </summary>
/// <param name="stream">Stream to write into</param>
/// <param name="data">Data to compress</param>
/// <param name="algorithm">Compression algorithm to be used</param>
public static void WriteCompressed(Stream stream, ReadOnlySpan<byte> data, CompressionAlgorithm algorithm)
{
stream.WriteByte((byte)algorithm);
switch (algorithm)
{
case CompressionAlgorithm.None:
stream.Write(data);
break;
case CompressionAlgorithm.Deflate:
stream = new DeflateStream(stream, CompressionLevel.SmallestSize, true);
stream.Write(data);
stream.Dispose();
break;
}
}
}
}

View file

@ -0,0 +1,18 @@
namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
{
/// <summary>
/// Algorithm used to compress the cache.
/// </summary>
enum CompressionAlgorithm : byte
{
/// <summary>
/// No compression, the data is stored as-is.
/// </summary>
None,
/// <summary>
/// Deflate compression (RFC 1951).
/// </summary>
Deflate
}
}

View file

@ -0,0 +1,57 @@
using Ryujinx.Common.Logging;
using System.IO;
namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
{
/// <summary>
/// Common disk cache utility methods.
/// </summary>
static class DiskCacheCommon
{
/// <summary>
/// Opens a file for read or write.
/// </summary>
/// <param name="basePath">Base path of the file (should not include the file name)</param>
/// <param name="fileName">Name of the file</param>
/// <param name="writable">Indicates if the file will be read or written</param>
/// <returns>File stream</returns>
public static FileStream OpenFile(string basePath, string fileName, bool writable)
{
string fullPath = Path.Combine(basePath, fileName);
FileMode mode;
FileAccess access;
if (writable)
{
mode = FileMode.OpenOrCreate;
access = FileAccess.ReadWrite;
}
else
{
mode = FileMode.Open;
access = FileAccess.Read;
}
try
{
return new FileStream(fullPath, mode, access, FileShare.Read);
}
catch (IOException ioException)
{
Logger.Error?.Print(LogClass.Gpu, $"Could not access file \"{fullPath}\". {ioException.Message}");
throw new DiskCacheLoadException(DiskCacheLoadResult.NoAccess);
}
}
/// <summary>
/// Gets the compression algorithm that should be used when writing the disk cache.
/// </summary>
/// <returns>Compression algorithm</returns>
public static CompressionAlgorithm GetCompressionAlgorithm()
{
return CompressionAlgorithm.Deflate;
}
}
}

View file

@ -0,0 +1,266 @@
using Ryujinx.Common.Logging;
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu.Image;
using Ryujinx.Graphics.Shader;
using Ryujinx.Graphics.Shader.Translation;
using System;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
{
/// <summary>
/// Represents a GPU state and memory accessor.
/// </summary>
class DiskCacheGpuAccessor : GpuAccessorBase, IGpuAccessor
{
private readonly ReadOnlyMemory<byte> _data;
private readonly ReadOnlyMemory<byte> _cb1Data;
private readonly ShaderSpecializationState _oldSpecState;
private readonly ShaderSpecializationState _newSpecState;
private readonly int _stageIndex;
private readonly bool _isVulkan;
private readonly ResourceCounts _resourceCounts;
/// <summary>
/// Creates a new instance of the cached GPU state accessor for shader translation.
/// </summary>
/// <param name="context">GPU context</param>
/// <param name="data">The data of the shader</param>
/// <param name="cb1Data">The constant buffer 1 data of the shader</param>
/// <param name="oldSpecState">Shader specialization state of the cached shader</param>
/// <param name="newSpecState">Shader specialization state of the recompiled shader</param>
/// <param name="stageIndex">Shader stage index</param>
public DiskCacheGpuAccessor(
GpuContext context,
ReadOnlyMemory<byte> data,
ReadOnlyMemory<byte> cb1Data,
ShaderSpecializationState oldSpecState,
ShaderSpecializationState newSpecState,
ResourceCounts counts,
int stageIndex) : base(context, counts, stageIndex)
{
_data = data;
_cb1Data = cb1Data;
_oldSpecState = oldSpecState;
_newSpecState = newSpecState;
_stageIndex = stageIndex;
_isVulkan = context.Capabilities.Api == TargetApi.Vulkan;
_resourceCounts = counts;
}
/// <inheritdoc/>
public uint ConstantBuffer1Read(int offset)
{
if (offset + sizeof(uint) > _cb1Data.Length)
{
throw new DiskCacheLoadException(DiskCacheLoadResult.InvalidCb1DataLength);
}
return MemoryMarshal.Cast<byte, uint>(_cb1Data.Span.Slice(offset))[0];
}
/// <inheritdoc/>
public void Log(string message)
{
Logger.Warning?.Print(LogClass.Gpu, $"Shader translator: {message}");
}
/// <inheritdoc/>
public ReadOnlySpan<ulong> GetCode(ulong address, int minimumSize)
{
return MemoryMarshal.Cast<byte, ulong>(_data.Span.Slice((int)address));
}
/// <inheritdoc/>
public bool QueryAlphaToCoverageDitherEnable()
{
return _oldSpecState.GraphicsState.AlphaToCoverageEnable && _oldSpecState.GraphicsState.AlphaToCoverageDitherEnable;
}
/// <inheritdoc/>
public AlphaTestOp QueryAlphaTestCompare()
{
if (!_isVulkan || !_oldSpecState.GraphicsState.AlphaTestEnable)
{
return AlphaTestOp.Always;
}
return _oldSpecState.GraphicsState.AlphaTestCompare switch
{
CompareOp.Never or CompareOp.NeverGl => AlphaTestOp.Never,
CompareOp.Less or CompareOp.LessGl => AlphaTestOp.Less,
CompareOp.Equal or CompareOp.EqualGl => AlphaTestOp.Equal,
CompareOp.LessOrEqual or CompareOp.LessOrEqualGl => AlphaTestOp.LessOrEqual,
CompareOp.Greater or CompareOp.GreaterGl => AlphaTestOp.Greater,
CompareOp.NotEqual or CompareOp.NotEqualGl => AlphaTestOp.NotEqual,
CompareOp.GreaterOrEqual or CompareOp.GreaterOrEqualGl => AlphaTestOp.GreaterOrEqual,
_ => AlphaTestOp.Always
};
}
/// <inheritdoc/>
public float QueryAlphaTestReference() => _oldSpecState.GraphicsState.AlphaTestReference;
/// <inheritdoc/>
public AttributeType QueryAttributeType(int location)
{
return _oldSpecState.GraphicsState.AttributeTypes[location];
}
/// <inheritdoc/>
public AttributeType QueryFragmentOutputType(int location)
{
return _oldSpecState.GraphicsState.FragmentOutputTypes[location];
}
/// <inheritdoc/>
public int QueryComputeLocalSizeX() => _oldSpecState.ComputeState.LocalSizeX;
/// <inheritdoc/>
public int QueryComputeLocalSizeY() => _oldSpecState.ComputeState.LocalSizeY;
/// <inheritdoc/>
public int QueryComputeLocalSizeZ() => _oldSpecState.ComputeState.LocalSizeZ;
/// <inheritdoc/>
public int QueryComputeLocalMemorySize() => _oldSpecState.ComputeState.LocalMemorySize;
/// <inheritdoc/>
public int QueryComputeSharedMemorySize() => _oldSpecState.ComputeState.SharedMemorySize;
/// <inheritdoc/>
public uint QueryConstantBufferUse()
{
_newSpecState.RecordConstantBufferUse(_stageIndex, _oldSpecState.ConstantBufferUse[_stageIndex]);
return _oldSpecState.ConstantBufferUse[_stageIndex];
}
/// <inheritdoc/>
public bool QueryHasConstantBufferDrawParameters()
{
return _oldSpecState.GraphicsState.HasConstantBufferDrawParameters;
}
/// <inheritdoc/>
public bool QueryDualSourceBlendEnable()
{
return _oldSpecState.GraphicsState.DualSourceBlendEnable;
}
/// <inheritdoc/>
public InputTopology QueryPrimitiveTopology()
{
_newSpecState.RecordPrimitiveTopology();
return ConvertToInputTopology(_oldSpecState.GraphicsState.Topology, _oldSpecState.GraphicsState.TessellationMode);
}
/// <inheritdoc/>
public bool QueryProgramPointSize()
{
return _oldSpecState.GraphicsState.ProgramPointSizeEnable;
}
/// <inheritdoc/>
public float QueryPointSize()
{
return _oldSpecState.GraphicsState.PointSize;
}
/// <inheritdoc/>
public bool QueryTessCw()
{
return _oldSpecState.GraphicsState.TessellationMode.UnpackCw();
}
/// <inheritdoc/>
public TessPatchType QueryTessPatchType()
{
return _oldSpecState.GraphicsState.TessellationMode.UnpackPatchType();
}
/// <inheritdoc/>
public TessSpacing QueryTessSpacing()
{
return _oldSpecState.GraphicsState.TessellationMode.UnpackSpacing();
}
/// <inheritdoc/>
public TextureFormat QueryTextureFormat(int handle, int cbufSlot)
{
_newSpecState.RecordTextureFormat(_stageIndex, handle, cbufSlot);
(uint format, bool formatSrgb) = _oldSpecState.GetFormat(_stageIndex, handle, cbufSlot);
return ConvertToTextureFormat(format, formatSrgb);
}
/// <inheritdoc/>
public SamplerType QuerySamplerType(int handle, int cbufSlot)
{
_newSpecState.RecordTextureSamplerType(_stageIndex, handle, cbufSlot);
return _oldSpecState.GetTextureTarget(_stageIndex, handle, cbufSlot).ConvertSamplerType();
}
/// <inheritdoc/>
public bool QueryTextureCoordNormalized(int handle, int cbufSlot)
{
_newSpecState.RecordTextureCoordNormalized(_stageIndex, handle, cbufSlot);
return _oldSpecState.GetCoordNormalized(_stageIndex, handle, cbufSlot);
}
/// <inheritdoc/>
public bool QueryTransformDepthMinusOneToOne()
{
return _oldSpecState.GraphicsState.DepthMode;
}
/// <inheritdoc/>
public bool QueryTransformFeedbackEnabled()
{
return _oldSpecState.TransformFeedbackDescriptors != null;
}
/// <inheritdoc/>
public ReadOnlySpan<byte> QueryTransformFeedbackVaryingLocations(int bufferIndex)
{
return _oldSpecState.TransformFeedbackDescriptors[bufferIndex].AsSpan();
}
/// <inheritdoc/>
public int QueryTransformFeedbackStride(int bufferIndex)
{
return _oldSpecState.TransformFeedbackDescriptors[bufferIndex].Stride;
}
/// <inheritdoc/>
public bool QueryEarlyZForce()
{
_newSpecState.RecordEarlyZForce();
return _oldSpecState.GraphicsState.EarlyZForce;
}
/// <inheritdoc/>
public bool QueryHasUnalignedStorageBuffer()
{
return _oldSpecState.GraphicsState.HasUnalignedStorageBuffer || _oldSpecState.ComputeState.HasUnalignedStorageBuffer;
}
/// <inheritdoc/>
public bool QueryViewportTransformDisable()
{
return _oldSpecState.GraphicsState.ViewportTransformDisable;
}
/// <inheritdoc/>
public void RegisterTexture(int handle, int cbufSlot)
{
if (!_oldSpecState.TextureRegistered(_stageIndex, handle, cbufSlot))
{
throw new DiskCacheLoadException(DiskCacheLoadResult.MissingTextureDescriptor);
}
(uint format, bool formatSrgb) = _oldSpecState.GetFormat(_stageIndex, handle, cbufSlot);
TextureTarget target = _oldSpecState.GetTextureTarget(_stageIndex, handle, cbufSlot);
bool coordNormalized = _oldSpecState.GetCoordNormalized(_stageIndex, handle, cbufSlot);
_newSpecState.RegisterTexture(_stageIndex, handle, cbufSlot, format, formatSrgb, target, coordNormalized);
}
}
}

View file

@ -0,0 +1,459 @@
using Ryujinx.Common;
using System;
using System.Collections.Generic;
using System.IO;
using System.Runtime.CompilerServices;
namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
{
/// <summary>
/// On-disk shader cache storage for guest code.
/// </summary>
class DiskCacheGuestStorage
{
private const uint TocMagic = (byte)'T' | ((byte)'O' << 8) | ((byte)'C' << 16) | ((byte)'G' << 24);
private const ushort VersionMajor = 1;
private const ushort VersionMinor = 1;
private const uint VersionPacked = ((uint)VersionMajor << 16) | VersionMinor;
private const string TocFileName = "guest.toc";
private const string DataFileName = "guest.data";
private readonly string _basePath;
/// <summary>
/// TOC (Table of contents) file header.
/// </summary>
private struct TocHeader
{
/// <summary>
/// Magic value, for validation and identification purposes.
/// </summary>
public uint Magic;
/// <summary>
/// File format version.
/// </summary>
public uint Version;
/// <summary>
/// Header padding.
/// </summary>
public uint Padding;
/// <summary>
/// Number of modifications to the file, also the shaders count.
/// </summary>
public uint ModificationsCount;
/// <summary>
/// Reserved space, to be used in the future. Write as zero.
/// </summary>
public ulong Reserved;
/// <summary>
/// Reserved space, to be used in the future. Write as zero.
/// </summary>
public ulong Reserved2;
}
/// <summary>
/// TOC (Table of contents) file entry.
/// </summary>
private struct TocEntry
{
/// <summary>
/// Offset of the data on the data file.
/// </summary>
public uint Offset;
/// <summary>
/// Code size.
/// </summary>
public uint CodeSize;
/// <summary>
/// Constant buffer 1 data size.
/// </summary>
public uint Cb1DataSize;
/// <summary>
/// Hash of the code and constant buffer data.
/// </summary>
public uint Hash;
}
/// <summary>
/// TOC (Table of contents) memory cache entry.
/// </summary>
private struct TocMemoryEntry
{
/// <summary>
/// Offset of the data on the data file.
/// </summary>
public uint Offset;
/// <summary>
/// Code size.
/// </summary>
public uint CodeSize;
/// <summary>
/// Constant buffer 1 data size.
/// </summary>
public uint Cb1DataSize;
/// <summary>
/// Index of the shader on the cache.
/// </summary>
public readonly int Index;
/// <summary>
/// Creates a new TOC memory entry.
/// </summary>
/// <param name="offset">Offset of the data on the data file</param>
/// <param name="codeSize">Code size</param>
/// <param name="cb1DataSize">Constant buffer 1 data size</param>
/// <param name="index">Index of the shader on the cache</param>
public TocMemoryEntry(uint offset, uint codeSize, uint cb1DataSize, int index)
{
Offset = offset;
CodeSize = codeSize;
Cb1DataSize = cb1DataSize;
Index = index;
}
}
private Dictionary<uint, List<TocMemoryEntry>> _toc;
private uint _tocModificationsCount;
private (byte[], byte[])[] _cache;
/// <summary>
/// Creates a new disk cache guest storage.
/// </summary>
/// <param name="basePath">Base path of the disk shader cache</param>
public DiskCacheGuestStorage(string basePath)
{
_basePath = basePath;
}
/// <summary>
/// Checks if the TOC (table of contents) file for the guest cache exists.
/// </summary>
/// <returns>True if the file exists, false otherwise</returns>
public bool TocFileExists()
{
return File.Exists(Path.Combine(_basePath, TocFileName));
}
/// <summary>
/// Checks if the data file for the guest cache exists.
/// </summary>
/// <returns>True if the file exists, false otherwise</returns>
public bool DataFileExists()
{
return File.Exists(Path.Combine(_basePath, DataFileName));
}
/// <summary>
/// Opens the guest cache TOC (table of contents) file.
/// </summary>
/// <returns>File stream</returns>
public Stream OpenTocFileStream()
{
return DiskCacheCommon.OpenFile(_basePath, TocFileName, writable: false);
}
/// <summary>
/// Opens the guest cache data file.
/// </summary>
/// <returns>File stream</returns>
public Stream OpenDataFileStream()
{
return DiskCacheCommon.OpenFile(_basePath, DataFileName, writable: false);
}
/// <summary>
/// Clear all content from the guest cache files.
/// </summary>
public void ClearCache()
{
using var tocFileStream = DiskCacheCommon.OpenFile(_basePath, TocFileName, writable: true);
using var dataFileStream = DiskCacheCommon.OpenFile(_basePath, DataFileName, writable: true);
tocFileStream.SetLength(0);
dataFileStream.SetLength(0);
}
/// <summary>
/// Loads the guest cache from file or memory cache.
/// </summary>
/// <param name="tocFileStream">Guest TOC file stream</param>
/// <param name="dataFileStream">Guest data file stream</param>
/// <param name="index">Guest shader index</param>
/// <returns>Guest code and constant buffer 1 data</returns>
public GuestCodeAndCbData LoadShader(Stream tocFileStream, Stream dataFileStream, int index)
{
if (_cache == null || index >= _cache.Length)
{
_cache = new (byte[], byte[])[Math.Max(index + 1, GetShadersCountFromLength(tocFileStream.Length))];
}
(byte[] guestCode, byte[] cb1Data) = _cache[index];
if (guestCode == null || cb1Data == null)
{
BinarySerializer tocReader = new BinarySerializer(tocFileStream);
tocFileStream.Seek(Unsafe.SizeOf<TocHeader>() + index * Unsafe.SizeOf<TocEntry>(), SeekOrigin.Begin);
TocEntry entry = new TocEntry();
tocReader.Read(ref entry);
guestCode = new byte[entry.CodeSize];
cb1Data = new byte[entry.Cb1DataSize];
if (entry.Offset >= (ulong)dataFileStream.Length)
{
throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric);
}
dataFileStream.Seek((long)entry.Offset, SeekOrigin.Begin);
dataFileStream.Read(cb1Data);
BinarySerializer.ReadCompressed(dataFileStream, guestCode);
_cache[index] = (guestCode, cb1Data);
}
return new GuestCodeAndCbData(guestCode, cb1Data);
}
/// <summary>
/// Clears guest code memory cache, forcing future loads to be from file.
/// </summary>
public void ClearMemoryCache()
{
_cache = null;
}
/// <summary>
/// Calculates the guest shaders count from the TOC file length.
/// </summary>
/// <param name="length">TOC file length</param>
/// <returns>Shaders count</returns>
private static int GetShadersCountFromLength(long length)
{
return (int)((length - Unsafe.SizeOf<TocHeader>()) / Unsafe.SizeOf<TocEntry>());
}
/// <summary>
/// Adds a guest shader to the cache.
/// </summary>
/// <remarks>
/// If the shader is already on the cache, the existing index will be returned and nothing will be written.
/// </remarks>
/// <param name="data">Guest code</param>
/// <param name="cb1Data">Constant buffer 1 data accessed by the code</param>
/// <returns>Index of the shader on the cache</returns>
public int AddShader(ReadOnlySpan<byte> data, ReadOnlySpan<byte> cb1Data)
{
using var tocFileStream = DiskCacheCommon.OpenFile(_basePath, TocFileName, writable: true);
using var dataFileStream = DiskCacheCommon.OpenFile(_basePath, DataFileName, writable: true);
TocHeader header = new TocHeader();
LoadOrCreateToc(tocFileStream, ref header);
uint hash = CalcHash(data, cb1Data);
if (_toc.TryGetValue(hash, out var list))
{
foreach (var entry in list)
{
if (data.Length != entry.CodeSize || cb1Data.Length != entry.Cb1DataSize)
{
continue;
}
dataFileStream.Seek((long)entry.Offset, SeekOrigin.Begin);
byte[] cachedCode = new byte[entry.CodeSize];
byte[] cachedCb1Data = new byte[entry.Cb1DataSize];
dataFileStream.Read(cachedCb1Data);
BinarySerializer.ReadCompressed(dataFileStream, cachedCode);
if (data.SequenceEqual(cachedCode) && cb1Data.SequenceEqual(cachedCb1Data))
{
return entry.Index;
}
}
}
return WriteNewEntry(tocFileStream, dataFileStream, ref header, data, cb1Data, hash);
}
/// <summary>
/// Loads the guest cache TOC file, or create a new one if not present.
/// </summary>
/// <param name="tocFileStream">Guest TOC file stream</param>
/// <param name="header">Set to the TOC file header</param>
private void LoadOrCreateToc(Stream tocFileStream, ref TocHeader header)
{
BinarySerializer reader = new BinarySerializer(tocFileStream);
if (!reader.TryRead(ref header) || header.Magic != TocMagic || header.Version != VersionPacked)
{
CreateToc(tocFileStream, ref header);
}
if (_toc == null || header.ModificationsCount != _tocModificationsCount)
{
if (!LoadTocEntries(tocFileStream, ref reader))
{
CreateToc(tocFileStream, ref header);
}
_tocModificationsCount = header.ModificationsCount;
}
}
/// <summary>
/// Creates a new guest cache TOC file.
/// </summary>
/// <param name="tocFileStream">Guest TOC file stream</param>
/// <param name="header">Set to the TOC header</param>
private void CreateToc(Stream tocFileStream, ref TocHeader header)
{
BinarySerializer writer = new BinarySerializer(tocFileStream);
header.Magic = TocMagic;
header.Version = VersionPacked;
header.Padding = 0;
header.ModificationsCount = 0;
header.Reserved = 0;
header.Reserved2 = 0;
if (tocFileStream.Length > 0)
{
tocFileStream.Seek(0, SeekOrigin.Begin);
tocFileStream.SetLength(0);
}
writer.Write(ref header);
}
/// <summary>
/// Reads all the entries on the guest TOC file.
/// </summary>
/// <param name="tocFileStream">Guest TOC file stream</param>
/// <param name="reader">TOC file reader</param>
/// <returns>True if the operation was successful, false otherwise</returns>
private bool LoadTocEntries(Stream tocFileStream, ref BinarySerializer reader)
{
_toc = new Dictionary<uint, List<TocMemoryEntry>>();
TocEntry entry = new TocEntry();
int index = 0;
while (tocFileStream.Position < tocFileStream.Length)
{
if (!reader.TryRead(ref entry))
{
return false;
}
AddTocMemoryEntry(entry.Offset, entry.CodeSize, entry.Cb1DataSize, entry.Hash, index++);
}
return true;
}
/// <summary>
/// Writes a new guest code entry into the file.
/// </summary>
/// <param name="tocFileStream">TOC file stream</param>
/// <param name="dataFileStream">Data file stream</param>
/// <param name="header">TOC header, to be updated with the new count</param>
/// <param name="data">Guest code</param>
/// <param name="cb1Data">Constant buffer 1 data accessed by the guest code</param>
/// <param name="hash">Code and constant buffer data hash</param>
/// <returns>Entry index</returns>
private int WriteNewEntry(
Stream tocFileStream,
Stream dataFileStream,
ref TocHeader header,
ReadOnlySpan<byte> data,
ReadOnlySpan<byte> cb1Data,
uint hash)
{
BinarySerializer tocWriter = new BinarySerializer(tocFileStream);
dataFileStream.Seek(0, SeekOrigin.End);
uint dataOffset = checked((uint)dataFileStream.Position);
uint codeSize = (uint)data.Length;
uint cb1DataSize = (uint)cb1Data.Length;
dataFileStream.Write(cb1Data);
BinarySerializer.WriteCompressed(dataFileStream, data, DiskCacheCommon.GetCompressionAlgorithm());
_tocModificationsCount = ++header.ModificationsCount;
tocFileStream.Seek(0, SeekOrigin.Begin);
tocWriter.Write(ref header);
TocEntry entry = new TocEntry()
{
Offset = dataOffset,
CodeSize = codeSize,
Cb1DataSize = cb1DataSize,
Hash = hash
};
tocFileStream.Seek(0, SeekOrigin.End);
int index = (int)((tocFileStream.Position - Unsafe.SizeOf<TocHeader>()) / Unsafe.SizeOf<TocEntry>());
tocWriter.Write(ref entry);
AddTocMemoryEntry(dataOffset, codeSize, cb1DataSize, hash, index);
return index;
}
/// <summary>
/// Adds an entry to the memory TOC cache. This can be used to avoid reading the TOC file all the time.
/// </summary>
/// <param name="dataOffset">Offset of the code and constant buffer data in the data file</param>
/// <param name="codeSize">Code size</param>
/// <param name="cb1DataSize">Constant buffer 1 data size</param>
/// <param name="hash">Code and constant buffer data hash</param>
/// <param name="index">Index of the data on the cache</param>
private void AddTocMemoryEntry(uint dataOffset, uint codeSize, uint cb1DataSize, uint hash, int index)
{
if (!_toc.TryGetValue(hash, out var list))
{
_toc.Add(hash, list = new List<TocMemoryEntry>());
}
list.Add(new TocMemoryEntry(dataOffset, codeSize, cb1DataSize, index));
}
/// <summary>
/// Calculates the hash for a data pair.
/// </summary>
/// <param name="data">Data 1</param>
/// <param name="data2">Data 2</param>
/// <returns>Hash of both data</returns>
private static uint CalcHash(ReadOnlySpan<byte> data, ReadOnlySpan<byte> data2)
{
return CalcHash(data2) * 23 ^ CalcHash(data);
}
/// <summary>
/// Calculates the hash for data.
/// </summary>
/// <param name="data">Data to be hashed</param>
/// <returns>Hash of the data</returns>
private static uint CalcHash(ReadOnlySpan<byte> data)
{
return (uint)XXHash128.ComputeHash(data).Low;
}
}
}

View file

@ -0,0 +1,839 @@
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Shader;
using Ryujinx.Graphics.Shader.Translation;
using System;
using System.IO;
using System.Numerics;
using System.Runtime.CompilerServices;
namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
{
/// <summary>
/// On-disk shader cache storage for host code.
/// </summary>
class DiskCacheHostStorage
{
private const uint TocsMagic = (byte)'T' | ((byte)'O' << 8) | ((byte)'C' << 16) | ((byte)'S' << 24);
private const uint TochMagic = (byte)'T' | ((byte)'O' << 8) | ((byte)'C' << 16) | ((byte)'H' << 24);
private const uint ShdiMagic = (byte)'S' | ((byte)'H' << 8) | ((byte)'D' << 16) | ((byte)'I' << 24);
private const uint BufdMagic = (byte)'B' | ((byte)'U' << 8) | ((byte)'F' << 16) | ((byte)'D' << 24);
private const uint TexdMagic = (byte)'T' | ((byte)'E' << 8) | ((byte)'X' << 16) | ((byte)'D' << 24);
private const ushort FileFormatVersionMajor = 1;
private const ushort FileFormatVersionMinor = 2;
private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
private const uint CodeGenVersion = 4735;
private const string SharedTocFileName = "shared.toc";
private const string SharedDataFileName = "shared.data";
private readonly string _basePath;
public bool CacheEnabled => !string.IsNullOrEmpty(_basePath);
/// <summary>
/// TOC (Table of contents) file header.
/// </summary>
private struct TocHeader
{
/// <summary>
/// Magic value, for validation and identification.
/// </summary>
public uint Magic;
/// <summary>
/// File format version.
/// </summary>
public uint FormatVersion;
/// <summary>
/// Generated shader code version.
/// </summary>
public uint CodeGenVersion;
/// <summary>
/// Header padding.
/// </summary>
public uint Padding;
/// <summary>
/// Timestamp of when the file was first created.
/// </summary>
public ulong Timestamp;
/// <summary>
/// Reserved space, to be used in the future. Write as zero.
/// </summary>
public ulong Reserved;
}
/// <summary>
/// Offset and size pair.
/// </summary>
private struct OffsetAndSize
{
/// <summary>
/// Offset.
/// </summary>
public ulong Offset;
/// <summary>
/// Size of uncompressed data.
/// </summary>
public uint UncompressedSize;
/// <summary>
/// Size of compressed data.
/// </summary>
public uint CompressedSize;
}
/// <summary>
/// Per-stage data entry.
/// </summary>
private struct DataEntryPerStage
{
/// <summary>
/// Index of the guest code on the guest code cache TOC file.
/// </summary>
public int GuestCodeIndex;
}
/// <summary>
/// Per-program data entry.
/// </summary>
private struct DataEntry
{
/// <summary>
/// Bit mask where each bit set is a used shader stage. Should be zero for compute shaders.
/// </summary>
public uint StagesBitMask;
}
/// <summary>
/// Per-stage shader information, returned by the translator.
/// </summary>
private struct DataShaderInfo
{
/// <summary>
/// Total constant buffers used.
/// </summary>
public ushort CBuffersCount;
/// <summary>
/// Total storage buffers used.
/// </summary>
public ushort SBuffersCount;
/// <summary>
/// Total textures used.
/// </summary>
public ushort TexturesCount;
/// <summary>
/// Total images used.
/// </summary>
public ushort ImagesCount;
/// <summary>
/// Shader stage.
/// </summary>
public ShaderStage Stage;
/// <summary>
/// Indicates if the shader accesses the Instance ID built-in variable.
/// </summary>
public bool UsesInstanceId;
/// <summary>
/// Indicates if the shader modifies the Layer built-in variable.
/// </summary>
public bool UsesRtLayer;
/// <summary>
/// Bit mask with the clip distances written on the vertex stage.
/// </summary>
public byte ClipDistancesWritten;
/// <summary>
/// Bit mask of the render target components written by the fragment stage.
/// </summary>
public int FragmentOutputMap;
/// <summary>
/// Indicates if the vertex shader accesses draw parameters.
/// </summary>
public bool UsesDrawParameters;
}
private readonly DiskCacheGuestStorage _guestStorage;
/// <summary>
/// Creates a disk cache host storage.
/// </summary>
/// <param name="basePath">Base path of the shader cache</param>
public DiskCacheHostStorage(string basePath)
{
_basePath = basePath;
_guestStorage = new DiskCacheGuestStorage(basePath);
if (CacheEnabled)
{
Directory.CreateDirectory(basePath);
}
}
/// <summary>
/// Gets the total of host programs on the cache.
/// </summary>
/// <returns>Host programs count</returns>
public int GetProgramCount()
{
string tocFilePath = Path.Combine(_basePath, SharedTocFileName);
if (!File.Exists(tocFilePath))
{
return 0;
}
return Math.Max((int)((new FileInfo(tocFilePath).Length - Unsafe.SizeOf<TocHeader>()) / sizeof(ulong)), 0);
}
/// <summary>
/// Guest the name of the host program cache file, with extension.
/// </summary>
/// <param name="context">GPU context</param>
/// <returns>Name of the file, without extension</returns>
private static string GetHostFileName(GpuContext context)
{
string apiName = context.Capabilities.Api.ToString().ToLowerInvariant();
string vendorName = RemoveInvalidCharacters(context.Capabilities.VendorName.ToLowerInvariant());
return $"{apiName}_{vendorName}";
}
/// <summary>
/// Removes invalid path characters and spaces from a file name.
/// </summary>
/// <param name="fileName">File name</param>
/// <returns>Filtered file name</returns>
private static string RemoveInvalidCharacters(string fileName)
{
int indexOfSpace = fileName.IndexOf(' ');
if (indexOfSpace >= 0)
{
fileName = fileName.Substring(0, indexOfSpace);
}
return string.Concat(fileName.Split(Path.GetInvalidFileNameChars(), StringSplitOptions.RemoveEmptyEntries));
}
/// <summary>
/// Gets the name of the TOC host file.
/// </summary>
/// <param name="context">GPU context</param>
/// <returns>File name</returns>
private static string GetHostTocFileName(GpuContext context)
{
return GetHostFileName(context) + ".toc";
}
/// <summary>
/// Gets the name of the data host file.
/// </summary>
/// <param name="context">GPU context</param>
/// <returns>File name</returns>
private static string GetHostDataFileName(GpuContext context)
{
return GetHostFileName(context) + ".data";
}
/// <summary>
/// Checks if a disk cache exists for the current application.
/// </summary>
/// <returns>True if a disk cache exists, false otherwise</returns>
public bool CacheExists()
{
string tocFilePath = Path.Combine(_basePath, SharedTocFileName);
string dataFilePath = Path.Combine(_basePath, SharedDataFileName);
if (!File.Exists(tocFilePath) || !File.Exists(dataFilePath) || !_guestStorage.TocFileExists() || !_guestStorage.DataFileExists())
{
return false;
}
return true;
}
/// <summary>
/// Loads all shaders from the cache.
/// </summary>
/// <param name="context">GPU context</param>
/// <param name="loader">Parallel disk cache loader</param>
public void LoadShaders(GpuContext context, ParallelDiskCacheLoader loader)
{
if (!CacheExists())
{
return;
}
Stream hostTocFileStream = null;
Stream hostDataFileStream = null;
try
{
using var tocFileStream = DiskCacheCommon.OpenFile(_basePath, SharedTocFileName, writable: false);
using var dataFileStream = DiskCacheCommon.OpenFile(_basePath, SharedDataFileName, writable: false);
using var guestTocFileStream = _guestStorage.OpenTocFileStream();
using var guestDataFileStream = _guestStorage.OpenDataFileStream();
BinarySerializer tocReader = new BinarySerializer(tocFileStream);
BinarySerializer dataReader = new BinarySerializer(dataFileStream);
TocHeader header = new TocHeader();
if (!tocReader.TryRead(ref header) || header.Magic != TocsMagic)
{
throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric);
}
if (header.FormatVersion != FileFormatVersionPacked)
{
throw new DiskCacheLoadException(DiskCacheLoadResult.IncompatibleVersion);
}
bool loadHostCache = header.CodeGenVersion == CodeGenVersion;
int programIndex = 0;
DataEntry entry = new DataEntry();
while (tocFileStream.Position < tocFileStream.Length && loader.Active)
{
ulong dataOffset = 0;
tocReader.Read(ref dataOffset);
if ((ulong)dataOffset >= (ulong)dataFileStream.Length)
{
throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric);
}
dataFileStream.Seek((long)dataOffset, SeekOrigin.Begin);
dataReader.BeginCompression();
dataReader.Read(ref entry);
uint stagesBitMask = entry.StagesBitMask;
if ((stagesBitMask & ~0x3fu) != 0)
{
throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric);
}
bool isCompute = stagesBitMask == 0;
if (isCompute)
{
stagesBitMask = 1;
}
GuestCodeAndCbData?[] guestShaders = new GuestCodeAndCbData?[isCompute ? 1 : Constants.ShaderStages + 1];
DataEntryPerStage stageEntry = new DataEntryPerStage();
while (stagesBitMask != 0)
{
int stageIndex = BitOperations.TrailingZeroCount(stagesBitMask);
dataReader.Read(ref stageEntry);
guestShaders[stageIndex] = _guestStorage.LoadShader(
guestTocFileStream,
guestDataFileStream,
stageEntry.GuestCodeIndex);
stagesBitMask &= ~(1u << stageIndex);
}
ShaderSpecializationState specState = ShaderSpecializationState.Read(ref dataReader);
dataReader.EndCompression();
if (loadHostCache)
{
(byte[] hostCode, CachedShaderStage[] shaders) = ReadHostCode(
context,
ref hostTocFileStream,
ref hostDataFileStream,
guestShaders,
programIndex,
header.Timestamp);
if (hostCode != null)
{
bool hasFragmentShader = shaders.Length > 5 && shaders[5] != null;
int fragmentOutputMap = hasFragmentShader ? shaders[5].Info.FragmentOutputMap : -1;
ShaderInfo shaderInfo = specState.PipelineState.HasValue
? new ShaderInfo(fragmentOutputMap, specState.PipelineState.Value, fromCache: true)
: new ShaderInfo(fragmentOutputMap, fromCache: true);
IProgram hostProgram;
if (context.Capabilities.Api == TargetApi.Vulkan)
{
ShaderSource[] shaderSources = ShaderBinarySerializer.Unpack(shaders, hostCode);
hostProgram = context.Renderer.CreateProgram(shaderSources, shaderInfo);
}
else
{
hostProgram = context.Renderer.LoadProgramBinary(hostCode, hasFragmentShader, shaderInfo);
}
CachedShaderProgram program = new CachedShaderProgram(hostProgram, specState, shaders);
loader.QueueHostProgram(program, hostCode, programIndex, isCompute);
}
else
{
loadHostCache = false;
}
}
if (!loadHostCache)
{
loader.QueueGuestProgram(guestShaders, specState, programIndex, isCompute);
}
loader.CheckCompilation();
programIndex++;
}
}
finally
{
_guestStorage.ClearMemoryCache();
hostTocFileStream?.Dispose();
hostDataFileStream?.Dispose();
}
}
/// <summary>
/// Reads the host code for a given shader, if existent.
/// </summary>
/// <param name="context">GPU context</param>
/// <param name="tocFileStream">Host TOC file stream, intialized if needed</param>
/// <param name="dataFileStream">Host data file stream, initialized if needed</param>
/// <param name="guestShaders">Guest shader code for each active stage</param>
/// <param name="programIndex">Index of the program on the cache</param>
/// <param name="expectedTimestamp">Timestamp of the shared cache file. The host file must be newer than it</param>
/// <returns>Host binary code, or null if not found</returns>
private (byte[], CachedShaderStage[]) ReadHostCode(
GpuContext context,
ref Stream tocFileStream,
ref Stream dataFileStream,
GuestCodeAndCbData?[] guestShaders,
int programIndex,
ulong expectedTimestamp)
{
if (tocFileStream == null && dataFileStream == null)
{
string tocFilePath = Path.Combine(_basePath, GetHostTocFileName(context));
string dataFilePath = Path.Combine(_basePath, GetHostDataFileName(context));
if (!File.Exists(tocFilePath) || !File.Exists(dataFilePath))
{
return (null, null);
}
tocFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: false);
dataFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostDataFileName(context), writable: false);
BinarySerializer tempTocReader = new BinarySerializer(tocFileStream);
TocHeader header = new TocHeader();
tempTocReader.Read(ref header);
if (header.Timestamp < expectedTimestamp)
{
return (null, null);
}
}
int offset = Unsafe.SizeOf<TocHeader>() + programIndex * Unsafe.SizeOf<OffsetAndSize>();
if (offset + Unsafe.SizeOf<OffsetAndSize>() > tocFileStream.Length)
{
return (null, null);
}
if ((ulong)offset >= (ulong)dataFileStream.Length)
{
throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric);
}
tocFileStream.Seek(offset, SeekOrigin.Begin);
BinarySerializer tocReader = new BinarySerializer(tocFileStream);
OffsetAndSize offsetAndSize = new OffsetAndSize();
tocReader.Read(ref offsetAndSize);
if (offsetAndSize.Offset >= (ulong)dataFileStream.Length)
{
throw new DiskCacheLoadException(DiskCacheLoadResult.FileCorruptedGeneric);
}
dataFileStream.Seek((long)offsetAndSize.Offset, SeekOrigin.Begin);
byte[] hostCode = new byte[offsetAndSize.UncompressedSize];
BinarySerializer.ReadCompressed(dataFileStream, hostCode);
CachedShaderStage[] shaders = new CachedShaderStage[guestShaders.Length];
BinarySerializer dataReader = new BinarySerializer(dataFileStream);
dataFileStream.Seek((long)(offsetAndSize.Offset + offsetAndSize.CompressedSize), SeekOrigin.Begin);
dataReader.BeginCompression();
for (int index = 0; index < guestShaders.Length; index++)
{
if (!guestShaders[index].HasValue)
{
continue;
}
GuestCodeAndCbData guestShader = guestShaders[index].Value;
ShaderProgramInfo info = index != 0 || guestShaders.Length == 1 ? ReadShaderProgramInfo(ref dataReader) : null;
shaders[index] = new CachedShaderStage(info, guestShader.Code, guestShader.Cb1Data);
}
dataReader.EndCompression();
return (hostCode, shaders);
}
/// <summary>
/// Gets output streams for the disk cache, for faster batch writing.
/// </summary>
/// <param name="context">The GPU context, used to determine the host disk cache</param>
/// <returns>A collection of disk cache output streams</returns>
public DiskCacheOutputStreams GetOutputStreams(GpuContext context)
{
var tocFileStream = DiskCacheCommon.OpenFile(_basePath, SharedTocFileName, writable: true);
var dataFileStream = DiskCacheCommon.OpenFile(_basePath, SharedDataFileName, writable: true);
var hostTocFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: true);
var hostDataFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostDataFileName(context), writable: true);
return new DiskCacheOutputStreams(tocFileStream, dataFileStream, hostTocFileStream, hostDataFileStream);
}
/// <summary>
/// Adds a shader to the cache.
/// </summary>
/// <param name="context">GPU context</param>
/// <param name="program">Cached program</param>
/// <param name="hostCode">Optional host binary code</param>
/// <param name="streams">Output streams to use</param>
public void AddShader(GpuContext context, CachedShaderProgram program, ReadOnlySpan<byte> hostCode, DiskCacheOutputStreams streams = null)
{
uint stagesBitMask = 0;
for (int index = 0; index < program.Shaders.Length; index++)
{
var shader = program.Shaders[index];
if (shader == null || (shader.Info != null && shader.Info.Stage == ShaderStage.Compute))
{
continue;
}
stagesBitMask |= 1u << index;
}
var tocFileStream = streams != null ? streams.TocFileStream : DiskCacheCommon.OpenFile(_basePath, SharedTocFileName, writable: true);
var dataFileStream = streams != null ? streams.DataFileStream : DiskCacheCommon.OpenFile(_basePath, SharedDataFileName, writable: true);
ulong timestamp = (ulong)DateTime.UtcNow.Subtract(DateTime.UnixEpoch).TotalSeconds;
if (tocFileStream.Length == 0)
{
TocHeader header = new TocHeader();
CreateToc(tocFileStream, ref header, TocsMagic, CodeGenVersion, timestamp);
}
tocFileStream.Seek(0, SeekOrigin.End);
dataFileStream.Seek(0, SeekOrigin.End);
BinarySerializer tocWriter = new BinarySerializer(tocFileStream);
BinarySerializer dataWriter = new BinarySerializer(dataFileStream);
ulong dataOffset = (ulong)dataFileStream.Position;
tocWriter.Write(ref dataOffset);
DataEntry entry = new DataEntry();
entry.StagesBitMask = stagesBitMask;
dataWriter.BeginCompression(DiskCacheCommon.GetCompressionAlgorithm());
dataWriter.Write(ref entry);
DataEntryPerStage stageEntry = new DataEntryPerStage();
for (int index = 0; index < program.Shaders.Length; index++)
{
var shader = program.Shaders[index];
if (shader == null)
{
continue;
}
stageEntry.GuestCodeIndex = _guestStorage.AddShader(shader.Code, shader.Cb1Data);
dataWriter.Write(ref stageEntry);
}
program.SpecializationState.Write(ref dataWriter);
dataWriter.EndCompression();
if (streams == null)
{
tocFileStream.Dispose();
dataFileStream.Dispose();
}
if (hostCode.IsEmpty)
{
return;
}
WriteHostCode(context, hostCode, program.Shaders, streams, timestamp);
}
/// <summary>
/// Clears all content from the guest cache files.
/// </summary>
public void ClearGuestCache()
{
_guestStorage.ClearCache();
}
/// <summary>
/// Clears all content from the shared cache files.
/// </summary>
/// <param name="context">GPU context</param>
public void ClearSharedCache()
{
using var tocFileStream = DiskCacheCommon.OpenFile(_basePath, SharedTocFileName, writable: true);
using var dataFileStream = DiskCacheCommon.OpenFile(_basePath, SharedDataFileName, writable: true);
tocFileStream.SetLength(0);
dataFileStream.SetLength(0);
}
/// <summary>
/// Deletes all content from the host cache files.
/// </summary>
/// <param name="context">GPU context</param>
public void ClearHostCache(GpuContext context)
{
using var tocFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: true);
using var dataFileStream = DiskCacheCommon.OpenFile(_basePath, GetHostDataFileName(context), writable: true);
tocFileStream.SetLength(0);
dataFileStream.SetLength(0);
}
/// <summary>
/// Writes the host binary code on the host cache.
/// </summary>
/// <param name="context">GPU context</param>
/// <param name="hostCode">Host binary code</param>
/// <param name="shaders">Shader stages to be added to the host cache</param>
/// <param name="streams">Output streams to use</param>
/// <param name="timestamp">File creation timestamp</param>
private void WriteHostCode(
GpuContext context,
ReadOnlySpan<byte> hostCode,
CachedShaderStage[] shaders,
DiskCacheOutputStreams streams,
ulong timestamp)
{
var tocFileStream = streams != null ? streams.HostTocFileStream : DiskCacheCommon.OpenFile(_basePath, GetHostTocFileName(context), writable: true);
var dataFileStream = streams != null ? streams.HostDataFileStream : DiskCacheCommon.OpenFile(_basePath, GetHostDataFileName(context), writable: true);
if (tocFileStream.Length == 0)
{
TocHeader header = new TocHeader();
CreateToc(tocFileStream, ref header, TochMagic, 0, timestamp);
}
tocFileStream.Seek(0, SeekOrigin.End);
dataFileStream.Seek(0, SeekOrigin.End);
BinarySerializer tocWriter = new BinarySerializer(tocFileStream);
BinarySerializer dataWriter = new BinarySerializer(dataFileStream);
OffsetAndSize offsetAndSize = new OffsetAndSize();
offsetAndSize.Offset = (ulong)dataFileStream.Position;
offsetAndSize.UncompressedSize = (uint)hostCode.Length;
long dataStartPosition = dataFileStream.Position;
BinarySerializer.WriteCompressed(dataFileStream, hostCode, DiskCacheCommon.GetCompressionAlgorithm());
offsetAndSize.CompressedSize = (uint)(dataFileStream.Position - dataStartPosition);
tocWriter.Write(ref offsetAndSize);
dataWriter.BeginCompression(DiskCacheCommon.GetCompressionAlgorithm());
for (int index = 0; index < shaders.Length; index++)
{
if (shaders[index] != null)
{
WriteShaderProgramInfo(ref dataWriter, shaders[index].Info);
}
}
dataWriter.EndCompression();
if (streams == null)
{
tocFileStream.Dispose();
dataFileStream.Dispose();
}
}
/// <summary>
/// Creates a TOC file for the host or shared cache.
/// </summary>
/// <param name="tocFileStream">TOC file stream</param>
/// <param name="header">Set to the TOC file header</param>
/// <param name="magic">Magic value to be written</param>
/// <param name="codegenVersion">Shader codegen version, only valid for the host file</param>
/// <param name="timestamp">File creation timestamp</param>
private void CreateToc(Stream tocFileStream, ref TocHeader header, uint magic, uint codegenVersion, ulong timestamp)
{
BinarySerializer writer = new BinarySerializer(tocFileStream);
header.Magic = magic;
header.FormatVersion = FileFormatVersionPacked;
header.CodeGenVersion = codegenVersion;
header.Padding = 0;
header.Reserved = 0;
header.Timestamp = timestamp;
if (tocFileStream.Length > 0)
{
tocFileStream.Seek(0, SeekOrigin.Begin);
tocFileStream.SetLength(0);
}
writer.Write(ref header);
}
/// <summary>
/// Reads the shader program info from the cache.
/// </summary>
/// <param name="dataReader">Cache data reader</param>
/// <returns>Shader program info</returns>
private static ShaderProgramInfo ReadShaderProgramInfo(ref BinarySerializer dataReader)
{
DataShaderInfo dataInfo = new DataShaderInfo();
dataReader.ReadWithMagicAndSize(ref dataInfo, ShdiMagic);
BufferDescriptor[] cBuffers = new BufferDescriptor[dataInfo.CBuffersCount];
BufferDescriptor[] sBuffers = new BufferDescriptor[dataInfo.SBuffersCount];
TextureDescriptor[] textures = new TextureDescriptor[dataInfo.TexturesCount];
TextureDescriptor[] images = new TextureDescriptor[dataInfo.ImagesCount];
for (int index = 0; index < dataInfo.CBuffersCount; index++)
{
dataReader.ReadWithMagicAndSize(ref cBuffers[index], BufdMagic);
}
for (int index = 0; index < dataInfo.SBuffersCount; index++)
{
dataReader.ReadWithMagicAndSize(ref sBuffers[index], BufdMagic);
}
for (int index = 0; index < dataInfo.TexturesCount; index++)
{
dataReader.ReadWithMagicAndSize(ref textures[index], TexdMagic);
}
for (int index = 0; index < dataInfo.ImagesCount; index++)
{
dataReader.ReadWithMagicAndSize(ref images[index], TexdMagic);
}
return new ShaderProgramInfo(
cBuffers,
sBuffers,
textures,
images,
ShaderIdentification.None,
0,
dataInfo.Stage,
dataInfo.UsesInstanceId,
dataInfo.UsesDrawParameters,
dataInfo.UsesRtLayer,
dataInfo.ClipDistancesWritten,
dataInfo.FragmentOutputMap);
}
/// <summary>
/// Writes the shader program info into the cache.
/// </summary>
/// <param name="dataWriter">Cache data writer</param>
/// <param name="info">Program info</param>
private static void WriteShaderProgramInfo(ref BinarySerializer dataWriter, ShaderProgramInfo info)
{
if (info == null)
{
return;
}
DataShaderInfo dataInfo = new DataShaderInfo();
dataInfo.CBuffersCount = (ushort)info.CBuffers.Count;
dataInfo.SBuffersCount = (ushort)info.SBuffers.Count;
dataInfo.TexturesCount = (ushort)info.Textures.Count;
dataInfo.ImagesCount = (ushort)info.Images.Count;
dataInfo.Stage = info.Stage;
dataInfo.UsesInstanceId = info.UsesInstanceId;
dataInfo.UsesDrawParameters = info.UsesDrawParameters;
dataInfo.UsesRtLayer = info.UsesRtLayer;
dataInfo.ClipDistancesWritten = info.ClipDistancesWritten;
dataInfo.FragmentOutputMap = info.FragmentOutputMap;
dataWriter.WriteWithMagicAndSize(ref dataInfo, ShdiMagic);
for (int index = 0; index < info.CBuffers.Count; index++)
{
var entry = info.CBuffers[index];
dataWriter.WriteWithMagicAndSize(ref entry, BufdMagic);
}
for (int index = 0; index < info.SBuffers.Count; index++)
{
var entry = info.SBuffers[index];
dataWriter.WriteWithMagicAndSize(ref entry, BufdMagic);
}
for (int index = 0; index < info.Textures.Count; index++)
{
var entry = info.Textures[index];
dataWriter.WriteWithMagicAndSize(ref entry, TexdMagic);
}
for (int index = 0; index < info.Images.Count; index++)
{
var entry = info.Images[index];
dataWriter.WriteWithMagicAndSize(ref entry, TexdMagic);
}
}
}
}

View file

@ -0,0 +1,48 @@
using System;
namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
{
/// <summary>
/// Disk cache load exception.
/// </summary>
class DiskCacheLoadException : Exception
{
/// <summary>
/// Result of the cache load operation.
/// </summary>
public DiskCacheLoadResult Result { get; }
/// <summary>
/// Creates a new instance of the disk cache load exception.
/// </summary>
public DiskCacheLoadException()
{
}
/// <summary>
/// Creates a new instance of the disk cache load exception.
/// </summary>
/// <param name="message">Exception message</param>
public DiskCacheLoadException(string message) : base(message)
{
}
/// <summary>
/// Creates a new instance of the disk cache load exception.
/// </summary>
/// <param name="message">Exception message</param>
/// <param name="inner">Inner exception</param>
public DiskCacheLoadException(string message, Exception inner) : base(message, inner)
{
}
/// <summary>
/// Creates a new instance of the disk cache load exception.
/// </summary>
/// <param name="result">Result code</param>
public DiskCacheLoadException(DiskCacheLoadResult result) : base(result.GetMessage())
{
Result = result;
}
}
}

View file

@ -0,0 +1,72 @@
namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
{
/// <summary>
/// Result of a shader cache load operation.
/// </summary>
enum DiskCacheLoadResult
{
/// <summary>
/// No error.
/// </summary>
Success,
/// <summary>
/// File can't be accessed.
/// </summary>
NoAccess,
/// <summary>
/// The constant buffer 1 data length is too low for the translation of the guest shader.
/// </summary>
InvalidCb1DataLength,
/// <summary>
/// The cache is missing the descriptor of a texture used by the shader.
/// </summary>
MissingTextureDescriptor,
/// <summary>
/// File is corrupted.
/// </summary>
FileCorruptedGeneric,
/// <summary>
/// File is corrupted, detected by magic value check.
/// </summary>
FileCorruptedInvalidMagic,
/// <summary>
/// File is corrupted, detected by length check.
/// </summary>
FileCorruptedInvalidLength,
/// <summary>
/// File might be valid, but is incompatible with the current emulator version.
/// </summary>
IncompatibleVersion
}
static class DiskCacheLoadResultExtensions
{
/// <summary>
/// Gets an error message from a result code.
/// </summary>
/// <param name="result">Result code</param>
/// <returns>Error message</returns>
public static string GetMessage(this DiskCacheLoadResult result)
{
return result switch
{
DiskCacheLoadResult.Success => "No error.",
DiskCacheLoadResult.NoAccess => "Could not access the cache file.",
DiskCacheLoadResult.InvalidCb1DataLength => "Constant buffer 1 data length is too low.",
DiskCacheLoadResult.MissingTextureDescriptor => "Texture descriptor missing from the cache file.",
DiskCacheLoadResult.FileCorruptedGeneric => "The cache file is corrupted.",
DiskCacheLoadResult.FileCorruptedInvalidMagic => "Magic check failed, the cache file is corrupted.",
DiskCacheLoadResult.FileCorruptedInvalidLength => "Length check failed, the cache file is corrupted.",
DiskCacheLoadResult.IncompatibleVersion => "The version of the disk cache is not compatible with this version of the emulator.",
_ => "Unknown error."
};
}
}
}

View file

@ -0,0 +1,57 @@
using System;
using System.IO;
namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
{
/// <summary>
/// Output streams for the disk shader cache.
/// </summary>
class DiskCacheOutputStreams : IDisposable
{
/// <summary>
/// Shared table of contents (TOC) file stream.
/// </summary>
public readonly FileStream TocFileStream;
/// <summary>
/// Shared data file stream.
/// </summary>
public readonly FileStream DataFileStream;
/// <summary>
/// Host table of contents (TOC) file stream.
/// </summary>
public readonly FileStream HostTocFileStream;
/// <summary>
/// Host data file stream.
/// </summary>
public readonly FileStream HostDataFileStream;
/// <summary>
/// Creates a new instance of a disk cache output stream container.
/// </summary>
/// <param name="tocFileStream">Stream for the shared table of contents file</param>
/// <param name="dataFileStream">Stream for the shared data file</param>
/// <param name="hostTocFileStream">Stream for the host table of contents file</param>
/// <param name="hostDataFileStream">Stream for the host data file</param>
public DiskCacheOutputStreams(FileStream tocFileStream, FileStream dataFileStream, FileStream hostTocFileStream, FileStream hostDataFileStream)
{
TocFileStream = tocFileStream;
DataFileStream = dataFileStream;
HostTocFileStream = hostTocFileStream;
HostDataFileStream = hostDataFileStream;
}
/// <summary>
/// Disposes the output file streams.
/// </summary>
public void Dispose()
{
TocFileStream.Dispose();
DataFileStream.Dispose();
HostTocFileStream.Dispose();
HostDataFileStream.Dispose();
}
}
}

View file

@ -0,0 +1,29 @@
namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
{
/// <summary>
/// Guest shader code and constant buffer data accessed by the shader.
/// </summary>
readonly struct GuestCodeAndCbData
{
/// <summary>
/// Maxwell binary shader code.
/// </summary>
public byte[] Code { get; }
/// <summary>
/// Constant buffer 1 data accessed by the shader.
/// </summary>
public byte[] Cb1Data { get; }
/// <summary>
/// Creates a new instance of the guest shader code and constant buffer data.
/// </summary>
/// <param name="code">Maxwell binary shader code</param>
/// <param name="cb1Data">Constant buffer 1 data accessed by the shader</param>
public GuestCodeAndCbData(byte[] code, byte[] cb1Data)
{
Code = code;
Cb1Data = cb1Data;
}
}
}

View file

@ -0,0 +1,725 @@
using Ryujinx.Common.Logging;
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Shader;
using Ryujinx.Graphics.Shader.Translation;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Threading;
using static Ryujinx.Graphics.Gpu.Shader.ShaderCache;
namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
{
class ParallelDiskCacheLoader
{
private const int ThreadCount = 8;
private readonly GpuContext _context;
private readonly ShaderCacheHashTable _graphicsCache;
private readonly ComputeShaderCacheHashTable _computeCache;
private readonly DiskCacheHostStorage _hostStorage;
private readonly CancellationToken _cancellationToken;
private readonly Action<ShaderCacheState, int, int> _stateChangeCallback;
/// <summary>
/// Indicates if the cache should be loaded.
/// </summary>
public bool Active => !_cancellationToken.IsCancellationRequested;
private bool _needsHostRegen;
/// <summary>
/// Number of shaders that failed to compile from the cache.
/// </summary>
public int ErrorCount { get; private set; }
/// <summary>
/// Program validation entry.
/// </summary>
private readonly struct ProgramEntry
{
/// <summary>
/// Cached shader program.
/// </summary>
public readonly CachedShaderProgram CachedProgram;
/// <summary>
/// Optional binary code. If not null, it is used instead of the backend host binary.
/// </summary>
public readonly byte[] BinaryCode;
/// <summary>
/// Program index.
/// </summary>
public readonly int ProgramIndex;
/// <summary>
/// Indicates if the program is a compute shader.
/// </summary>
public readonly bool IsCompute;
/// <summary>
/// Indicates if the program is a host binary shader.
/// </summary>
public readonly bool IsBinary;
/// <summary>
/// Creates a new program validation entry.
/// </summary>
/// <param name="cachedProgram">Cached shader program</param>
/// <param name="binaryCode">Optional binary code. If not null, it is used instead of the backend host binary</param>
/// <param name="programIndex">Program index</param>
/// <param name="isCompute">Indicates if the program is a compute shader</param>
/// <param name="isBinary">Indicates if the program is a host binary shader</param>
public ProgramEntry(
CachedShaderProgram cachedProgram,
byte[] binaryCode,
int programIndex,
bool isCompute,
bool isBinary)
{
CachedProgram = cachedProgram;
BinaryCode = binaryCode;
ProgramIndex = programIndex;
IsCompute = isCompute;
IsBinary = isBinary;
}
}
/// <summary>
/// Translated shader compilation entry.
/// </summary>
private readonly struct ProgramCompilation
{
/// <summary>
/// Translated shader stages.
/// </summary>
public readonly ShaderProgram[] TranslatedStages;
/// <summary>
/// Cached shaders.
/// </summary>
public readonly CachedShaderStage[] Shaders;
/// <summary>
/// Specialization state.
/// </summary>
public readonly ShaderSpecializationState SpecializationState;
/// <summary>
/// Program index.
/// </summary>
public readonly int ProgramIndex;
/// <summary>
/// Indicates if the program is a compute shader.
/// </summary>
public readonly bool IsCompute;
/// <summary>
/// Creates a new translated shader compilation entry.
/// </summary>
/// <param name="translatedStages">Translated shader stages</param>
/// <param name="shaders">Cached shaders</param>
/// <param name="specState">Specialization state</param>
/// <param name="programIndex">Program index</param>
/// <param name="isCompute">Indicates if the program is a compute shader</param>
public ProgramCompilation(
ShaderProgram[] translatedStages,
CachedShaderStage[] shaders,
ShaderSpecializationState specState,
int programIndex,
bool isCompute)
{
TranslatedStages = translatedStages;
Shaders = shaders;
SpecializationState = specState;
ProgramIndex = programIndex;
IsCompute = isCompute;
}
}
/// <summary>
/// Program translation entry.
/// </summary>
private readonly struct AsyncProgramTranslation
{
/// <summary>
/// Guest code for each active stage.
/// </summary>
public readonly GuestCodeAndCbData?[] GuestShaders;
/// <summary>
/// Specialization state.
/// </summary>
public readonly ShaderSpecializationState SpecializationState;
/// <summary>
/// Program index.
/// </summary>
public readonly int ProgramIndex;
/// <summary>
/// Indicates if the program is a compute shader.
/// </summary>
public readonly bool IsCompute;
/// <summary>
/// Creates a new program translation entry.
/// </summary>
/// <param name="guestShaders">Guest code for each active stage</param>
/// <param name="specState">Specialization state</param>
/// <param name="programIndex">Program index</param>
/// <param name="isCompute">Indicates if the program is a compute shader</param>
public AsyncProgramTranslation(
GuestCodeAndCbData?[] guestShaders,
ShaderSpecializationState specState,
int programIndex,
bool isCompute)
{
GuestShaders = guestShaders;
SpecializationState = specState;
ProgramIndex = programIndex;
IsCompute = isCompute;
}
}
private readonly Queue<ProgramEntry> _validationQueue;
private readonly ConcurrentQueue<ProgramCompilation> _compilationQueue;
private readonly BlockingCollection<AsyncProgramTranslation> _asyncTranslationQueue;
private readonly SortedList<int, (CachedShaderProgram, byte[])> _programList;
private int _backendParallelCompileThreads;
private int _compiledCount;
private int _totalCount;
/// <summary>
/// Creates a new parallel disk cache loader.
/// </summary>
/// <param name="context">GPU context</param>
/// <param name="graphicsCache">Graphics shader cache</param>
/// <param name="computeCache">Compute shader cache</param>
/// <param name="hostStorage">Disk cache host storage</param>
/// <param name="cancellationToken">Cancellation token</param>
/// <param name="stateChangeCallback">Function to be called when there is a state change, reporting state, compiled and total shaders count</param>
public ParallelDiskCacheLoader(
GpuContext context,
ShaderCacheHashTable graphicsCache,
ComputeShaderCacheHashTable computeCache,
DiskCacheHostStorage hostStorage,
CancellationToken cancellationToken,
Action<ShaderCacheState, int, int> stateChangeCallback)
{
_context = context;
_graphicsCache = graphicsCache;
_computeCache = computeCache;
_hostStorage = hostStorage;
_cancellationToken = cancellationToken;
_stateChangeCallback = stateChangeCallback;
_validationQueue = new Queue<ProgramEntry>();
_compilationQueue = new ConcurrentQueue<ProgramCompilation>();
_asyncTranslationQueue = new BlockingCollection<AsyncProgramTranslation>(ThreadCount);
_programList = new SortedList<int, (CachedShaderProgram, byte[])>();
_backendParallelCompileThreads = Math.Min(Environment.ProcessorCount, 8); // Must be kept in sync with the backend code.
}
/// <summary>
/// Loads all shaders from the cache.
/// </summary>
public void LoadShaders()
{
Thread[] workThreads = new Thread[ThreadCount];
for (int index = 0; index < ThreadCount; index++)
{
workThreads[index] = new Thread(ProcessAsyncQueue)
{
Name = $"GPU.AsyncTranslationThread.{index}"
};
}
int programCount = _hostStorage.GetProgramCount();
_compiledCount = 0;
_totalCount = programCount;
_stateChangeCallback(ShaderCacheState.Start, 0, programCount);
Logger.Info?.Print(LogClass.Gpu, $"Loading {programCount} shaders from the cache...");
for (int index = 0; index < ThreadCount; index++)
{
workThreads[index].Start(_cancellationToken);
}
try
{
_hostStorage.LoadShaders(_context, this);
}
catch (DiskCacheLoadException diskCacheLoadException)
{
Logger.Warning?.Print(LogClass.Gpu, $"Error loading the shader cache. {diskCacheLoadException.Message}");
// If we can't even access the file, then we also can't rebuild.
if (diskCacheLoadException.Result != DiskCacheLoadResult.NoAccess)
{
_needsHostRegen = true;
}
}
catch (InvalidDataException invalidDataException)
{
Logger.Warning?.Print(LogClass.Gpu, $"Error decompressing the shader cache file. {invalidDataException.Message}");
_needsHostRegen = true;
}
catch (IOException ioException)
{
Logger.Warning?.Print(LogClass.Gpu, $"Error reading the shader cache file. {ioException.Message}");
_needsHostRegen = true;
}
_asyncTranslationQueue.CompleteAdding();
for (int index = 0; index < ThreadCount; index++)
{
workThreads[index].Join();
}
CheckCompilationBlocking();
if (_needsHostRegen && Active)
{
// Rebuild both shared and host cache files.
// Rebuilding shared is required because the shader information returned by the translator
// might have changed, and so we have to reconstruct the file with the new information.
try
{
_hostStorage.ClearSharedCache();
_hostStorage.ClearHostCache(_context);
if (_programList.Count != 0)
{
Logger.Info?.Print(LogClass.Gpu, $"Rebuilding {_programList.Count} shaders...");
using var streams = _hostStorage.GetOutputStreams(_context);
foreach (var kv in _programList)
{
if (!Active)
{
break;
}
(CachedShaderProgram program, byte[] binaryCode) = kv.Value;
_hostStorage.AddShader(_context, program, binaryCode, streams);
}
Logger.Info?.Print(LogClass.Gpu, $"Rebuilt {_programList.Count} shaders successfully.");
}
else
{
_hostStorage.ClearGuestCache();
Logger.Info?.Print(LogClass.Gpu, "Shader cache deleted due to corruption.");
}
}
catch (DiskCacheLoadException diskCacheLoadException)
{
Logger.Warning?.Print(LogClass.Gpu, $"Error deleting the shader cache. {diskCacheLoadException.Message}");
}
catch (IOException ioException)
{
Logger.Warning?.Print(LogClass.Gpu, $"Error deleting the shader cache file. {ioException.Message}");
}
}
Logger.Info?.Print(LogClass.Gpu, "Shader cache loaded.");
_stateChangeCallback(ShaderCacheState.Loaded, programCount, programCount);
}
/// <summary>
/// Enqueues a host program for compilation.
/// </summary>
/// <param name="cachedProgram">Cached program</param>
/// <param name="binaryCode">Host binary code</param>
/// <param name="programIndex">Program index</param>
/// <param name="isCompute">Indicates if the program is a compute shader</param>
public void QueueHostProgram(CachedShaderProgram cachedProgram, byte[] binaryCode, int programIndex, bool isCompute)
{
EnqueueForValidation(new ProgramEntry(cachedProgram, binaryCode, programIndex, isCompute, isBinary: true));
}
/// <summary>
/// Enqueues a guest program for compilation.
/// </summary>
/// <param name="guestShaders">Guest code for each active stage</param>
/// <param name="specState">Specialization state</param>
/// <param name="programIndex">Program index</param>
/// <param name="isCompute">Indicates if the program is a compute shader</param>
public void QueueGuestProgram(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex, bool isCompute)
{
try
{
AsyncProgramTranslation asyncTranslation = new AsyncProgramTranslation(guestShaders, specState, programIndex, isCompute);
_asyncTranslationQueue.Add(asyncTranslation, _cancellationToken);
}
catch (OperationCanceledException)
{
}
}
/// <summary>
/// Check the state of programs that have already been compiled,
/// and add to the cache if the compilation was successful.
/// </summary>
public void CheckCompilation()
{
ProcessCompilationQueue();
// Process programs that already finished compiling.
// If not yet compiled, do nothing. This avoids blocking to wait for shader compilation.
while (_validationQueue.TryPeek(out ProgramEntry entry))
{
ProgramLinkStatus result = entry.CachedProgram.HostProgram.CheckProgramLink(false);
if (result != ProgramLinkStatus.Incomplete)
{
ProcessCompiledProgram(ref entry, result);
_validationQueue.Dequeue();
}
else
{
break;
}
}
}
/// <summary>
/// Waits until all programs finishes compiling, then adds the ones
/// with successful compilation to the cache.
/// </summary>
private void CheckCompilationBlocking()
{
ProcessCompilationQueue();
while (_validationQueue.TryDequeue(out ProgramEntry entry) && Active)
{
ProcessCompiledProgram(ref entry, entry.CachedProgram.HostProgram.CheckProgramLink(true), asyncCompile: false);
}
}
/// <summary>
/// Process a compiled program result.
/// </summary>
/// <param name="entry">Compiled program entry</param>
/// <param name="result">Compilation result</param>
/// <param name="asyncCompile">For failed host compilations, indicates if a guest compilation should be done asynchronously</param>
private void ProcessCompiledProgram(ref ProgramEntry entry, ProgramLinkStatus result, bool asyncCompile = true)
{
if (result == ProgramLinkStatus.Success)
{
// Compilation successful, add to memory cache.
if (entry.IsCompute)
{
_computeCache.Add(entry.CachedProgram);
}
else
{
_graphicsCache.Add(entry.CachedProgram);
}
if (!entry.IsBinary)
{
_needsHostRegen = true;
}
// Fetch the binary code from the backend if it isn't already present.
byte[] binaryCode = entry.BinaryCode ?? entry.CachedProgram.HostProgram.GetBinary();
_programList.Add(entry.ProgramIndex, (entry.CachedProgram, binaryCode));
SignalCompiled();
}
else if (entry.IsBinary)
{
// If this is a host binary and compilation failed,
// we still have a chance to recompile from the guest binary.
CachedShaderProgram program = entry.CachedProgram;
GuestCodeAndCbData?[] guestShaders = new GuestCodeAndCbData?[program.Shaders.Length];
for (int index = 0; index < program.Shaders.Length; index++)
{
CachedShaderStage shader = program.Shaders[index];
if (shader != null)
{
guestShaders[index] = new GuestCodeAndCbData(shader.Code, shader.Cb1Data);
}
}
if (asyncCompile)
{
QueueGuestProgram(guestShaders, program.SpecializationState, entry.ProgramIndex, entry.IsCompute);
}
else
{
RecompileFromGuestCode(guestShaders, program.SpecializationState, entry.ProgramIndex, entry.IsCompute);
ProcessCompilationQueue();
}
}
else
{
// Failed to compile from both host and guest binary.
ErrorCount++;
SignalCompiled();
}
}
/// <summary>
/// Processes the queue of translated guest programs that should be compiled on the host.
/// </summary>
private void ProcessCompilationQueue()
{
while (_compilationQueue.TryDequeue(out ProgramCompilation compilation) && Active)
{
ShaderSource[] shaderSources = new ShaderSource[compilation.TranslatedStages.Length];
int fragmentOutputMap = -1;
for (int index = 0; index < compilation.TranslatedStages.Length; index++)
{
ShaderProgram shader = compilation.TranslatedStages[index];
shaderSources[index] = CreateShaderSource(shader);
if (shader.Info.Stage == ShaderStage.Fragment)
{
fragmentOutputMap = shader.Info.FragmentOutputMap;
}
}
ShaderInfo shaderInfo = compilation.SpecializationState.PipelineState.HasValue
? new ShaderInfo(fragmentOutputMap, compilation.SpecializationState.PipelineState.Value, fromCache: true)
: new ShaderInfo(fragmentOutputMap, fromCache: true);
IProgram hostProgram = _context.Renderer.CreateProgram(shaderSources, shaderInfo);
CachedShaderProgram program = new CachedShaderProgram(hostProgram, compilation.SpecializationState, compilation.Shaders);
// Vulkan's binary code is the SPIR-V used for compilation, so it is ready immediately. Other APIs get this after compilation.
byte[] binaryCode = _context.Capabilities.Api == TargetApi.Vulkan ? ShaderBinarySerializer.Pack(shaderSources) : null;
EnqueueForValidation(new ProgramEntry(program, binaryCode, compilation.ProgramIndex, compilation.IsCompute, isBinary: false));
}
}
/// <summary>
/// Enqueues a program for validation, which will check if the program was compiled successfully.
/// </summary>
/// <param name="newEntry">Program entry to be validated</param>
private void EnqueueForValidation(ProgramEntry newEntry)
{
_validationQueue.Enqueue(newEntry);
// Do not allow more than N shader compilation in-flight, where N is the maximum number of threads
// the driver will be using for parallel compilation.
// Submitting more seems to cause NVIDIA OpenGL driver to crash.
if (_validationQueue.Count >= _backendParallelCompileThreads && _validationQueue.TryDequeue(out ProgramEntry entry))
{
ProcessCompiledProgram(ref entry, entry.CachedProgram.HostProgram.CheckProgramLink(true), asyncCompile: false);
}
}
/// <summary>
/// Processses the queue of programs that should be translated from guest code.
/// </summary>
/// <param name="state">Cancellation token</param>
private void ProcessAsyncQueue(object state)
{
CancellationToken ct = (CancellationToken)state;
try
{
foreach (AsyncProgramTranslation asyncCompilation in _asyncTranslationQueue.GetConsumingEnumerable(ct))
{
RecompileFromGuestCode(
asyncCompilation.GuestShaders,
asyncCompilation.SpecializationState,
asyncCompilation.ProgramIndex,
asyncCompilation.IsCompute);
}
}
catch (OperationCanceledException)
{
}
}
/// <summary>
/// Recompiles a program from guest code.
/// </summary>
/// <param name="guestShaders">Guest code for each active stage</param>
/// <param name="specState">Specialization state</param>
/// <param name="programIndex">Program index</param>
/// <param name="isCompute">Indicates if the program is a compute shader</param>
private void RecompileFromGuestCode(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex, bool isCompute)
{
try
{
if (isCompute)
{
RecompileComputeFromGuestCode(guestShaders, specState, programIndex);
}
else
{
RecompileGraphicsFromGuestCode(guestShaders, specState, programIndex);
}
}
catch (Exception exception)
{
Logger.Error?.Print(LogClass.Gpu, $"Error translating guest shader. {exception.Message}");
ErrorCount++;
SignalCompiled();
}
}
/// <summary>
/// Recompiles a graphics program from guest code.
/// </summary>
/// <param name="guestShaders">Guest code for each active stage</param>
/// <param name="specState">Specialization state</param>
/// <param name="programIndex">Program index</param>
private void RecompileGraphicsFromGuestCode(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex)
{
ShaderSpecializationState newSpecState = new ShaderSpecializationState(
ref specState.GraphicsState,
specState.PipelineState,
specState.TransformFeedbackDescriptors);
ResourceCounts counts = new ResourceCounts();
TranslatorContext[] translatorContexts = new TranslatorContext[Constants.ShaderStages + 1];
TranslatorContext nextStage = null;
TargetApi api = _context.Capabilities.Api;
for (int stageIndex = Constants.ShaderStages - 1; stageIndex >= 0; stageIndex--)
{
if (guestShaders[stageIndex + 1].HasValue)
{
GuestCodeAndCbData shader = guestShaders[stageIndex + 1].Value;
byte[] guestCode = shader.Code;
byte[] cb1Data = shader.Cb1Data;
DiskCacheGpuAccessor gpuAccessor = new DiskCacheGpuAccessor(_context, guestCode, cb1Data, specState, newSpecState, counts, stageIndex);
TranslatorContext currentStage = DecodeGraphicsShader(gpuAccessor, api, DefaultFlags, 0);
if (nextStage != null)
{
currentStage.SetNextStage(nextStage);
}
if (stageIndex == 0 && guestShaders[0].HasValue)
{
byte[] guestCodeA = guestShaders[0].Value.Code;
byte[] cb1DataA = guestShaders[0].Value.Cb1Data;
DiskCacheGpuAccessor gpuAccessorA = new DiskCacheGpuAccessor(_context, guestCodeA, cb1DataA, specState, newSpecState, counts, 0);
translatorContexts[0] = DecodeGraphicsShader(gpuAccessorA, api, DefaultFlags | TranslationFlags.VertexA, 0);
}
translatorContexts[stageIndex + 1] = currentStage;
nextStage = currentStage;
}
}
if (!_context.Capabilities.SupportsGeometryShader)
{
ShaderCache.TryRemoveGeometryStage(translatorContexts);
}
CachedShaderStage[] shaders = new CachedShaderStage[guestShaders.Length];
List<ShaderProgram> translatedStages = new List<ShaderProgram>();
TranslatorContext previousStage = null;
for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++)
{
TranslatorContext currentStage = translatorContexts[stageIndex + 1];
if (currentStage != null)
{
ShaderProgram program;
byte[] guestCode = guestShaders[stageIndex + 1].Value.Code;
byte[] cb1Data = guestShaders[stageIndex + 1].Value.Cb1Data;
if (stageIndex == 0 && guestShaders[0].HasValue)
{
program = currentStage.Translate(translatorContexts[0]);
byte[] guestCodeA = guestShaders[0].Value.Code;
byte[] cb1DataA = guestShaders[0].Value.Cb1Data;
shaders[0] = new CachedShaderStage(null, guestCodeA, cb1DataA);
shaders[1] = new CachedShaderStage(program.Info, guestCode, cb1Data);
}
else
{
program = currentStage.Translate();
shaders[stageIndex + 1] = new CachedShaderStage(program.Info, guestCode, cb1Data);
}
if (program != null)
{
translatedStages.Add(program);
}
previousStage = currentStage;
}
else if (
previousStage != null &&
previousStage.LayerOutputWritten &&
stageIndex == 3 &&
!_context.Capabilities.SupportsLayerVertexTessellation)
{
translatedStages.Add(previousStage.GenerateGeometryPassthrough());
}
}
_compilationQueue.Enqueue(new ProgramCompilation(translatedStages.ToArray(), shaders, newSpecState, programIndex, isCompute: false));
}
/// <summary>
/// Recompiles a compute program from guest code.
/// </summary>
/// <param name="guestShaders">Guest code for each active stage</param>
/// <param name="specState">Specialization state</param>
/// <param name="programIndex">Program index</param>
private void RecompileComputeFromGuestCode(GuestCodeAndCbData?[] guestShaders, ShaderSpecializationState specState, int programIndex)
{
GuestCodeAndCbData shader = guestShaders[0].Value;
ResourceCounts counts = new ResourceCounts();
ShaderSpecializationState newSpecState = new ShaderSpecializationState(ref specState.ComputeState);
DiskCacheGpuAccessor gpuAccessor = new DiskCacheGpuAccessor(_context, shader.Code, shader.Cb1Data, specState, newSpecState, counts, 0);
TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, _context.Capabilities.Api, 0);
ShaderProgram program = translatorContext.Translate();
CachedShaderStage[] shaders = new[] { new CachedShaderStage(program.Info, shader.Code, shader.Cb1Data) };
_compilationQueue.Enqueue(new ProgramCompilation(new[] { program }, shaders, newSpecState, programIndex, isCompute: true));
}
/// <summary>
/// Signals that compilation of a program has been finished successfully,
/// or that it failed and guest recompilation has also been attempted.
/// </summary>
private void SignalCompiled()
{
_stateChangeCallback(ShaderCacheState.Loading, ++_compiledCount, _totalCount);
}
}
}

View file

@ -0,0 +1,66 @@
using Ryujinx.Common;
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Shader;
using Ryujinx.Graphics.Shader.Translation;
using System;
using System.Collections.Generic;
using System.IO;
namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
{
static class ShaderBinarySerializer
{
public static byte[] Pack(ShaderSource[] sources)
{
using MemoryStream output = MemoryStreamManager.Shared.GetStream();
output.Write(sources.Length);
foreach (ShaderSource source in sources)
{
output.Write((int)source.Stage);
output.Write(source.BinaryCode.Length);
output.Write(source.BinaryCode);
}
return output.ToArray();
}
public static ShaderSource[] Unpack(CachedShaderStage[] stages, byte[] code)
{
using MemoryStream input = new MemoryStream(code);
using BinaryReader reader = new BinaryReader(input);
List<ShaderSource> output = new List<ShaderSource>();
int count = reader.ReadInt32();
for (int i = 0; i < count; i++)
{
ShaderStage stage = (ShaderStage)reader.ReadInt32();
int binaryCodeLength = reader.ReadInt32();
byte[] binaryCode = reader.ReadBytes(binaryCodeLength);
output.Add(new ShaderSource(binaryCode, GetBindings(stages, stage), stage, TargetLanguage.Spirv));
}
return output.ToArray();
}
private static ShaderBindings GetBindings(CachedShaderStage[] stages, ShaderStage stage)
{
for (int i = 0; i < stages.Length; i++)
{
CachedShaderStage currentStage = stages[i];
if (currentStage?.Info != null && currentStage.Info.Stage == stage)
{
return ShaderCache.GetBindings(currentStage.Info);
}
}
return new ShaderBindings(Array.Empty<int>(), Array.Empty<int>(), Array.Empty<int>(), Array.Empty<int>());
}
}
}

View file

@ -0,0 +1,297 @@
using Ryujinx.Common.Logging;
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu.Image;
using Ryujinx.Graphics.Shader;
using Ryujinx.Graphics.Shader.Translation;
using System;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Gpu.Shader
{
/// <summary>
/// Represents a GPU state and memory accessor.
/// </summary>
class GpuAccessor : GpuAccessorBase, IGpuAccessor
{
private readonly GpuChannel _channel;
private readonly GpuAccessorState _state;
private readonly int _stageIndex;
private readonly bool _compute;
private readonly bool _isVulkan;
/// <summary>
/// Creates a new instance of the GPU state accessor for graphics shader translation.
/// </summary>
/// <param name="context">GPU context</param>
/// <param name="channel">GPU channel</param>
/// <param name="state">Current GPU state</param>
/// <param name="stageIndex">Graphics shader stage index (0 = Vertex, 4 = Fragment)</param>
public GpuAccessor(
GpuContext context,
GpuChannel channel,
GpuAccessorState state,
int stageIndex) : base(context, state.ResourceCounts, stageIndex)
{
_isVulkan = context.Capabilities.Api == TargetApi.Vulkan;
_channel = channel;
_state = state;
_stageIndex = stageIndex;
}
/// <summary>
/// Creates a new instance of the GPU state accessor for compute shader translation.
/// </summary>
/// <param name="context">GPU context</param>
/// <param name="channel">GPU channel</param>
/// <param name="state">Current GPU state</param>
public GpuAccessor(GpuContext context, GpuChannel channel, GpuAccessorState state) : base(context, state.ResourceCounts, 0)
{
_channel = channel;
_state = state;
_compute = true;
}
/// <inheritdoc/>
public uint ConstantBuffer1Read(int offset)
{
ulong baseAddress = _compute
? _channel.BufferManager.GetComputeUniformBufferAddress(1)
: _channel.BufferManager.GetGraphicsUniformBufferAddress(_stageIndex, 1);
return _channel.MemoryManager.Physical.Read<uint>(baseAddress + (ulong)offset);
}
/// <inheritdoc/>
public void Log(string message)
{
Logger.Warning?.Print(LogClass.Gpu, $"Shader translator: {message}");
}
/// <inheritdoc/>
public ReadOnlySpan<ulong> GetCode(ulong address, int minimumSize)
{
int size = Math.Max(minimumSize, 0x1000 - (int)(address & 0xfff));
return MemoryMarshal.Cast<byte, ulong>(_channel.MemoryManager.GetSpan(address, size));
}
/// <inheritdoc/>
public bool QueryAlphaToCoverageDitherEnable()
{
return _state.GraphicsState.AlphaToCoverageEnable && _state.GraphicsState.AlphaToCoverageDitherEnable;
}
/// <inheritdoc/>
public AlphaTestOp QueryAlphaTestCompare()
{
if (!_isVulkan || !_state.GraphicsState.AlphaTestEnable)
{
return AlphaTestOp.Always;
}
return _state.GraphicsState.AlphaTestCompare switch
{
CompareOp.Never or CompareOp.NeverGl => AlphaTestOp.Never,
CompareOp.Less or CompareOp.LessGl => AlphaTestOp.Less,
CompareOp.Equal or CompareOp.EqualGl => AlphaTestOp.Equal,
CompareOp.LessOrEqual or CompareOp.LessOrEqualGl => AlphaTestOp.LessOrEqual,
CompareOp.Greater or CompareOp.GreaterGl => AlphaTestOp.Greater,
CompareOp.NotEqual or CompareOp.NotEqualGl => AlphaTestOp.NotEqual,
CompareOp.GreaterOrEqual or CompareOp.GreaterOrEqualGl => AlphaTestOp.GreaterOrEqual,
_ => AlphaTestOp.Always
};
}
/// <inheritdoc/>
public float QueryAlphaTestReference()
{
return _state.GraphicsState.AlphaTestReference;
}
/// <inheritdoc/>
public AttributeType QueryAttributeType(int location)
{
return _state.GraphicsState.AttributeTypes[location];
}
/// <inheritdoc/>
public AttributeType QueryFragmentOutputType(int location)
{
return _state.GraphicsState.FragmentOutputTypes[location];
}
/// <inheritdoc/>
public int QueryComputeLocalSizeX() => _state.ComputeState.LocalSizeX;
/// <inheritdoc/>
public int QueryComputeLocalSizeY() => _state.ComputeState.LocalSizeY;
/// <inheritdoc/>
public int QueryComputeLocalSizeZ() => _state.ComputeState.LocalSizeZ;
/// <inheritdoc/>
public int QueryComputeLocalMemorySize() => _state.ComputeState.LocalMemorySize;
/// <inheritdoc/>
public int QueryComputeSharedMemorySize() => _state.ComputeState.SharedMemorySize;
/// <inheritdoc/>
public uint QueryConstantBufferUse()
{
uint useMask = _compute
? _channel.BufferManager.GetComputeUniformBufferUseMask()
: _channel.BufferManager.GetGraphicsUniformBufferUseMask(_stageIndex);
_state.SpecializationState?.RecordConstantBufferUse(_stageIndex, useMask);
return useMask;
}
/// <inheritdoc/>
public bool QueryHasConstantBufferDrawParameters()
{
return _state.GraphicsState.HasConstantBufferDrawParameters;
}
/// <inheritdoc/>
public bool QueryHasUnalignedStorageBuffer()
{
return _state.GraphicsState.HasUnalignedStorageBuffer || _state.ComputeState.HasUnalignedStorageBuffer;
}
/// <inheritdoc/>
public bool QueryDualSourceBlendEnable()
{
return _state.GraphicsState.DualSourceBlendEnable;
}
/// <inheritdoc/>
public InputTopology QueryPrimitiveTopology()
{
_state.SpecializationState?.RecordPrimitiveTopology();
return ConvertToInputTopology(_state.GraphicsState.Topology, _state.GraphicsState.TessellationMode);
}
/// <inheritdoc/>
public bool QueryProgramPointSize()
{
return _state.GraphicsState.ProgramPointSizeEnable;
}
/// <inheritdoc/>
public float QueryPointSize()
{
return _state.GraphicsState.PointSize;
}
/// <inheritdoc/>
public bool QueryTessCw()
{
return _state.GraphicsState.TessellationMode.UnpackCw();
}
/// <inheritdoc/>
public TessPatchType QueryTessPatchType()
{
return _state.GraphicsState.TessellationMode.UnpackPatchType();
}
/// <inheritdoc/>
public TessSpacing QueryTessSpacing()
{
return _state.GraphicsState.TessellationMode.UnpackSpacing();
}
//// <inheritdoc/>
public TextureFormat QueryTextureFormat(int handle, int cbufSlot)
{
_state.SpecializationState?.RecordTextureFormat(_stageIndex, handle, cbufSlot);
var descriptor = GetTextureDescriptor(handle, cbufSlot);
return ConvertToTextureFormat(descriptor.UnpackFormat(), descriptor.UnpackSrgb());
}
/// <inheritdoc/>
public SamplerType QuerySamplerType(int handle, int cbufSlot)
{
_state.SpecializationState?.RecordTextureSamplerType(_stageIndex, handle, cbufSlot);
return GetTextureDescriptor(handle, cbufSlot).UnpackTextureTarget().ConvertSamplerType();
}
/// <inheritdoc/>
public bool QueryTextureCoordNormalized(int handle, int cbufSlot)
{
_state.SpecializationState?.RecordTextureCoordNormalized(_stageIndex, handle, cbufSlot);
return GetTextureDescriptor(handle, cbufSlot).UnpackTextureCoordNormalized();
}
/// <summary>
/// Gets the texture descriptor for a given texture on the pool.
/// </summary>
/// <param name="handle">Index of the texture (this is the word offset of the handle in the constant buffer)</param>
/// <param name="cbufSlot">Constant buffer slot for the texture handle</param>
/// <returns>Texture descriptor</returns>
private Image.TextureDescriptor GetTextureDescriptor(int handle, int cbufSlot)
{
if (_compute)
{
return _channel.TextureManager.GetComputeTextureDescriptor(
_state.PoolState.TexturePoolGpuVa,
_state.PoolState.TextureBufferIndex,
_state.PoolState.TexturePoolMaximumId,
handle,
cbufSlot);
}
else
{
return _channel.TextureManager.GetGraphicsTextureDescriptor(
_state.PoolState.TexturePoolGpuVa,
_state.PoolState.TextureBufferIndex,
_state.PoolState.TexturePoolMaximumId,
_stageIndex,
handle,
cbufSlot);
}
}
/// <inheritdoc/>
public bool QueryTransformDepthMinusOneToOne()
{
return _state.GraphicsState.DepthMode;
}
/// <inheritdoc/>
public bool QueryTransformFeedbackEnabled()
{
return _state.TransformFeedbackDescriptors != null;
}
/// <inheritdoc/>
public ReadOnlySpan<byte> QueryTransformFeedbackVaryingLocations(int bufferIndex)
{
return _state.TransformFeedbackDescriptors[bufferIndex].AsSpan();
}
/// <inheritdoc/>
public int QueryTransformFeedbackStride(int bufferIndex)
{
return _state.TransformFeedbackDescriptors[bufferIndex].Stride;
}
/// <inheritdoc/>
public bool QueryEarlyZForce()
{
_state.SpecializationState?.RecordEarlyZForce();
return _state.GraphicsState.EarlyZForce;
}
/// <inheritdoc/>
public bool QueryViewportTransformDisable()
{
return _state.GraphicsState.ViewportTransformDisable;
}
/// <inheritdoc/>
public void RegisterTexture(int handle, int cbufSlot)
{
_state.SpecializationState?.RegisterTexture(_stageIndex, handle, cbufSlot, GetTextureDescriptor(handle, cbufSlot));
}
}
}

View file

@ -0,0 +1,238 @@
using Ryujinx.Common.Logging;
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu.Engine.Threed;
using Ryujinx.Graphics.Gpu.Image;
using Ryujinx.Graphics.Shader;
using Ryujinx.Graphics.Shader.Translation;
namespace Ryujinx.Graphics.Gpu.Shader
{
/// <summary>
/// GPU accessor.
/// </summary>
class GpuAccessorBase
{
private readonly GpuContext _context;
private readonly ResourceCounts _resourceCounts;
private readonly int _stageIndex;
/// <summary>
/// Creates a new GPU accessor.
/// </summary>
/// <param name="context">GPU context</param>
public GpuAccessorBase(GpuContext context, ResourceCounts resourceCounts, int stageIndex)
{
_context = context;
_resourceCounts = resourceCounts;
_stageIndex = stageIndex;
}
public int QueryBindingConstantBuffer(int index)
{
if (_context.Capabilities.Api == TargetApi.Vulkan)
{
// We need to start counting from 1 since binding 0 is reserved for the support uniform buffer.
return GetBindingFromIndex(index, _context.Capabilities.MaximumUniformBuffersPerStage, "Uniform buffer") + 1;
}
else
{
return _resourceCounts.UniformBuffersCount++;
}
}
public int QueryBindingStorageBuffer(int index)
{
if (_context.Capabilities.Api == TargetApi.Vulkan)
{
return GetBindingFromIndex(index, _context.Capabilities.MaximumStorageBuffersPerStage, "Storage buffer");
}
else
{
return _resourceCounts.StorageBuffersCount++;
}
}
public int QueryBindingTexture(int index, bool isBuffer)
{
if (_context.Capabilities.Api == TargetApi.Vulkan)
{
if (isBuffer)
{
index += (int)_context.Capabilities.MaximumTexturesPerStage;
}
return GetBindingFromIndex(index, _context.Capabilities.MaximumTexturesPerStage * 2, "Texture");
}
else
{
return _resourceCounts.TexturesCount++;
}
}
public int QueryBindingImage(int index, bool isBuffer)
{
if (_context.Capabilities.Api == TargetApi.Vulkan)
{
if (isBuffer)
{
index += (int)_context.Capabilities.MaximumImagesPerStage;
}
return GetBindingFromIndex(index, _context.Capabilities.MaximumImagesPerStage * 2, "Image");
}
else
{
return _resourceCounts.ImagesCount++;
}
}
private int GetBindingFromIndex(int index, uint maxPerStage, string resourceName)
{
if ((uint)index >= maxPerStage)
{
Logger.Error?.Print(LogClass.Gpu, $"{resourceName} index {index} exceeds per stage limit of {maxPerStage}.");
}
return GetStageIndex() * (int)maxPerStage + index;
}
private int GetStageIndex()
{
// This is just a simple remapping to ensure that most frequently used shader stages
// have the lowest binding numbers.
// This is useful because if we need to run on a system with a low limit on the bindings,
// then we can still get most games working as the most common shaders will have low binding numbers.
return _stageIndex switch
{
4 => 1, // Fragment
3 => 2, // Geometry
1 => 3, // Tessellation control
2 => 4, // Tessellation evaluation
_ => 0 // Vertex/Compute
};
}
public int QueryHostGatherBiasPrecision() => _context.Capabilities.GatherBiasPrecision;
public bool QueryHostReducedPrecision() => _context.Capabilities.ReduceShaderPrecision;
public bool QueryHostHasFrontFacingBug() => _context.Capabilities.HasFrontFacingBug;
public bool QueryHostHasVectorIndexingBug() => _context.Capabilities.HasVectorIndexingBug;
public int QueryHostStorageBufferOffsetAlignment() => _context.Capabilities.StorageBufferOffsetAlignment;
public bool QueryHostSupportsBgraFormat() => _context.Capabilities.SupportsBgraFormat;
public bool QueryHostSupportsFragmentShaderInterlock() => _context.Capabilities.SupportsFragmentShaderInterlock;
public bool QueryHostSupportsFragmentShaderOrderingIntel() => _context.Capabilities.SupportsFragmentShaderOrderingIntel;
public bool QueryHostSupportsGeometryShader() => _context.Capabilities.SupportsGeometryShader;
public bool QueryHostSupportsGeometryShaderPassthrough() => _context.Capabilities.SupportsGeometryShaderPassthrough;
public bool QueryHostSupportsImageLoadFormatted() => _context.Capabilities.SupportsImageLoadFormatted;
public bool QueryHostSupportsLayerVertexTessellation() => _context.Capabilities.SupportsLayerVertexTessellation;
public bool QueryHostSupportsNonConstantTextureOffset() => _context.Capabilities.SupportsNonConstantTextureOffset;
public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot;
public bool QueryHostSupportsSnormBufferTextureFormat() => _context.Capabilities.SupportsSnormBufferTextureFormat;
public bool QueryHostSupportsTextureShadowLod() => _context.Capabilities.SupportsTextureShadowLod;
public bool QueryHostSupportsViewportIndexVertexTessellation() => _context.Capabilities.SupportsViewportIndexVertexTessellation;
public bool QueryHostSupportsViewportMask() => _context.Capabilities.SupportsViewportMask;
/// <summary>
/// Converts a packed Maxwell texture format to the shader translator texture format.
/// </summary>
/// <param name="format">Packed maxwell format</param>
/// <param name="formatSrgb">Indicates if the format is sRGB</param>
/// <returns>Shader translator texture format</returns>
protected static TextureFormat ConvertToTextureFormat(uint format, bool formatSrgb)
{
if (!FormatTable.TryGetTextureFormat(format, formatSrgb, out FormatInfo formatInfo))
{
return TextureFormat.Unknown;
}
return formatInfo.Format switch
{
Format.R8Unorm => TextureFormat.R8Unorm,
Format.R8Snorm => TextureFormat.R8Snorm,
Format.R8Uint => TextureFormat.R8Uint,
Format.R8Sint => TextureFormat.R8Sint,
Format.R16Float => TextureFormat.R16Float,
Format.R16Unorm => TextureFormat.R16Unorm,
Format.R16Snorm => TextureFormat.R16Snorm,
Format.R16Uint => TextureFormat.R16Uint,
Format.R16Sint => TextureFormat.R16Sint,
Format.R32Float => TextureFormat.R32Float,
Format.R32Uint => TextureFormat.R32Uint,
Format.R32Sint => TextureFormat.R32Sint,
Format.R8G8Unorm => TextureFormat.R8G8Unorm,
Format.R8G8Snorm => TextureFormat.R8G8Snorm,
Format.R8G8Uint => TextureFormat.R8G8Uint,
Format.R8G8Sint => TextureFormat.R8G8Sint,
Format.R16G16Float => TextureFormat.R16G16Float,
Format.R16G16Unorm => TextureFormat.R16G16Unorm,
Format.R16G16Snorm => TextureFormat.R16G16Snorm,
Format.R16G16Uint => TextureFormat.R16G16Uint,
Format.R16G16Sint => TextureFormat.R16G16Sint,
Format.R32G32Float => TextureFormat.R32G32Float,
Format.R32G32Uint => TextureFormat.R32G32Uint,
Format.R32G32Sint => TextureFormat.R32G32Sint,
Format.R8G8B8A8Unorm => TextureFormat.R8G8B8A8Unorm,
Format.R8G8B8A8Snorm => TextureFormat.R8G8B8A8Snorm,
Format.R8G8B8A8Uint => TextureFormat.R8G8B8A8Uint,
Format.R8G8B8A8Sint => TextureFormat.R8G8B8A8Sint,
Format.R8G8B8A8Srgb => TextureFormat.R8G8B8A8Unorm,
Format.R16G16B16A16Float => TextureFormat.R16G16B16A16Float,
Format.R16G16B16A16Unorm => TextureFormat.R16G16B16A16Unorm,
Format.R16G16B16A16Snorm => TextureFormat.R16G16B16A16Snorm,
Format.R16G16B16A16Uint => TextureFormat.R16G16B16A16Uint,
Format.R16G16B16A16Sint => TextureFormat.R16G16B16A16Sint,
Format.R32G32B32A32Float => TextureFormat.R32G32B32A32Float,
Format.R32G32B32A32Uint => TextureFormat.R32G32B32A32Uint,
Format.R32G32B32A32Sint => TextureFormat.R32G32B32A32Sint,
Format.R10G10B10A2Unorm => TextureFormat.R10G10B10A2Unorm,
Format.R10G10B10A2Uint => TextureFormat.R10G10B10A2Uint,
Format.R11G11B10Float => TextureFormat.R11G11B10Float,
_ => TextureFormat.Unknown
};
}
/// <summary>
/// Converts the Maxwell primitive topology to the shader translator topology.
/// </summary>
/// <param name="topology">Maxwell primitive topology</param>
/// <param name="tessellationMode">Maxwell tessellation mode</param>
/// <returns>Shader translator topology</returns>
protected static InputTopology ConvertToInputTopology(PrimitiveTopology topology, TessMode tessellationMode)
{
return topology switch
{
PrimitiveTopology.Points => InputTopology.Points,
PrimitiveTopology.Lines or
PrimitiveTopology.LineLoop or
PrimitiveTopology.LineStrip => InputTopology.Lines,
PrimitiveTopology.LinesAdjacency or
PrimitiveTopology.LineStripAdjacency => InputTopology.LinesAdjacency,
PrimitiveTopology.Triangles or
PrimitiveTopology.TriangleStrip or
PrimitiveTopology.TriangleFan => InputTopology.Triangles,
PrimitiveTopology.TrianglesAdjacency or
PrimitiveTopology.TriangleStripAdjacency => InputTopology.TrianglesAdjacency,
PrimitiveTopology.Patches => tessellationMode.UnpackPatchType() == TessPatchType.Isolines
? InputTopology.Lines
: InputTopology.Triangles,
_ => InputTopology.Points
};
}
}
}

View file

@ -0,0 +1,61 @@
namespace Ryujinx.Graphics.Gpu.Shader
{
/// <summary>
/// State used by the <see cref="GpuAccessor"/>.
/// </summary>
class GpuAccessorState
{
/// <summary>
/// GPU texture pool state.
/// </summary>
public readonly GpuChannelPoolState PoolState;
/// <summary>
/// GPU compute state, for compute shaders.
/// </summary>
public readonly GpuChannelComputeState ComputeState;
/// <summary>
/// GPU graphics state, for vertex, tessellation, geometry and fragment shaders.
/// </summary>
public readonly GpuChannelGraphicsState GraphicsState;
/// <summary>
/// Shader specialization state (shared by all stages).
/// </summary>
public readonly ShaderSpecializationState SpecializationState;
/// <summary>
/// Transform feedback information, if the shader uses transform feedback. Otherwise, should be null.
/// </summary>
public readonly TransformFeedbackDescriptor[] TransformFeedbackDescriptors;
/// <summary>
/// Shader resource counts (shared by all stages).
/// </summary>
public readonly ResourceCounts ResourceCounts;
/// <summary>
/// Creates a new GPU accessor state.
/// </summary>
/// <param name="poolState">GPU texture pool state</param>
/// <param name="computeState">GPU compute state, for compute shaders</param>
/// <param name="graphicsState">GPU graphics state, for vertex, tessellation, geometry and fragment shaders</param>
/// <param name="specializationState">Shader specialization state (shared by all stages)</param>
/// <param name="transformFeedbackDescriptors">Transform feedback information, if the shader uses transform feedback. Otherwise, should be null</param>
public GpuAccessorState(
GpuChannelPoolState poolState,
GpuChannelComputeState computeState,
GpuChannelGraphicsState graphicsState,
ShaderSpecializationState specializationState,
TransformFeedbackDescriptor[] transformFeedbackDescriptors = null)
{
PoolState = poolState;
GraphicsState = graphicsState;
ComputeState = computeState;
SpecializationState = specializationState;
TransformFeedbackDescriptors = transformFeedbackDescriptors;
ResourceCounts = new ResourceCounts();
}
}
}

View file

@ -0,0 +1,65 @@
namespace Ryujinx.Graphics.Gpu.Shader
{
/// <summary>
/// State used by the <see cref="GpuAccessor"/>.
/// </summary>
readonly struct GpuChannelComputeState
{
// New fields should be added to the end of the struct to keep disk shader cache compatibility.
/// <summary>
/// Local group size X of the compute shader.
/// </summary>
public readonly int LocalSizeX;
/// <summary>
/// Local group size Y of the compute shader.
/// </summary>
public readonly int LocalSizeY;
/// <summary>
/// Local group size Z of the compute shader.
/// </summary>
public readonly int LocalSizeZ;
/// <summary>
/// Local memory size of the compute shader.
/// </summary>
public readonly int LocalMemorySize;
/// <summary>
/// Shared memory size of the compute shader.
/// </summary>
public readonly int SharedMemorySize;
/// <summary>
/// Indicates that any storage buffer use is unaligned.
/// </summary>
public readonly bool HasUnalignedStorageBuffer;
/// <summary>
/// Creates a new GPU compute state.
/// </summary>
/// <param name="localSizeX">Local group size X of the compute shader</param>
/// <param name="localSizeY">Local group size Y of the compute shader</param>
/// <param name="localSizeZ">Local group size Z of the compute shader</param>
/// <param name="localMemorySize">Local memory size of the compute shader</param>
/// <param name="sharedMemorySize">Shared memory size of the compute shader</param>
/// <param name="hasUnalignedStorageBuffer">Indicates that any storage buffer use is unaligned</param>
public GpuChannelComputeState(
int localSizeX,
int localSizeY,
int localSizeZ,
int localMemorySize,
int sharedMemorySize,
bool hasUnalignedStorageBuffer)
{
LocalSizeX = localSizeX;
LocalSizeY = localSizeY;
LocalSizeZ = localSizeZ;
LocalMemorySize = localMemorySize;
SharedMemorySize = sharedMemorySize;
HasUnalignedStorageBuffer = hasUnalignedStorageBuffer;
}
}
}

View file

@ -0,0 +1,158 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu.Engine.Threed;
using Ryujinx.Graphics.Shader;
namespace Ryujinx.Graphics.Gpu.Shader
{
/// <summary>
/// State used by the <see cref="GpuAccessor"/>.
/// </summary>
struct GpuChannelGraphicsState
{
// New fields should be added to the end of the struct to keep disk shader cache compatibility.
/// <summary>
/// Early Z force enable.
/// </summary>
public bool EarlyZForce;
/// <summary>
/// Primitive topology of current draw.
/// </summary>
public PrimitiveTopology Topology;
/// <summary>
/// Tessellation mode.
/// </summary>
public TessMode TessellationMode;
/// <summary>
/// Indicates whether alpha-to-coverage is enabled.
/// </summary>
public bool AlphaToCoverageEnable;
/// <summary>
/// Indicates whether alpha-to-coverage dithering is enabled.
/// </summary>
public bool AlphaToCoverageDitherEnable;
/// <summary>
/// Indicates whether the viewport transform is disabled.
/// </summary>
public bool ViewportTransformDisable;
/// <summary>
/// Depth mode zero to one or minus one to one.
/// </summary>
public bool DepthMode;
/// <summary>
/// Indicates if the point size is set on the shader or is fixed.
/// </summary>
public bool ProgramPointSizeEnable;
/// <summary>
/// Point size used if <see cref="ProgramPointSizeEnable" /> is false.
/// </summary>
public float PointSize;
/// <summary>
/// Indicates whether alpha test is enabled.
/// </summary>
public bool AlphaTestEnable;
/// <summary>
/// When alpha test is enabled, indicates the comparison that decides if the fragment should be discarded.
/// </summary>
public CompareOp AlphaTestCompare;
/// <summary>
/// When alpha test is enabled, indicates the value to compare with the fragment output alpha.
/// </summary>
public float AlphaTestReference;
/// <summary>
/// Type of the vertex attributes consumed by the shader.
/// </summary>
public Array32<AttributeType> AttributeTypes;
/// <summary>
/// Indicates that the draw is writing the base vertex, base instance and draw index to Constant Buffer 0.
/// </summary>
public bool HasConstantBufferDrawParameters;
/// <summary>
/// Indicates that any storage buffer use is unaligned.
/// </summary>
public bool HasUnalignedStorageBuffer;
/// <summary>
/// Type of the fragment shader outputs.
/// </summary>
public Array8<AttributeType> FragmentOutputTypes;
/// <summary>
/// Indicates whether dual source blend is enabled.
/// </summary>
public bool DualSourceBlendEnable;
/// <summary>
/// Creates a new GPU graphics state.
/// </summary>
/// <param name="earlyZForce">Early Z force enable</param>
/// <param name="topology">Primitive topology</param>
/// <param name="tessellationMode">Tessellation mode</param>
/// <param name="alphaToCoverageEnable">Indicates whether alpha-to-coverage is enabled</param>
/// <param name="alphaToCoverageDitherEnable">Indicates whether alpha-to-coverage dithering is enabled</param>
/// <param name="viewportTransformDisable">Indicates whether the viewport transform is disabled</param>
/// <param name="depthMode">Depth mode zero to one or minus one to one</param>
/// <param name="programPointSizeEnable">Indicates if the point size is set on the shader or is fixed</param>
/// <param name="pointSize">Point size if not set from shader</param>
/// <param name="alphaTestEnable">Indicates whether alpha test is enabled</param>
/// <param name="alphaTestCompare">When alpha test is enabled, indicates the comparison that decides if the fragment should be discarded</param>
/// <param name="alphaTestReference">When alpha test is enabled, indicates the value to compare with the fragment output alpha</param>
/// <param name="attributeTypes">Type of the vertex attributes consumed by the shader</param>
/// <param name="hasConstantBufferDrawParameters">Indicates that the draw is writing the base vertex, base instance and draw index to Constant Buffer 0</param>
/// <param name="hasUnalignedStorageBuffer">Indicates that any storage buffer use is unaligned</param>
/// <param name="fragmentOutputTypes">Type of the fragment shader outputs</param>
/// <param name="dualSourceBlendEnable">Type of the vertex attributes consumed by the shader</param>
public GpuChannelGraphicsState(
bool earlyZForce,
PrimitiveTopology topology,
TessMode tessellationMode,
bool alphaToCoverageEnable,
bool alphaToCoverageDitherEnable,
bool viewportTransformDisable,
bool depthMode,
bool programPointSizeEnable,
float pointSize,
bool alphaTestEnable,
CompareOp alphaTestCompare,
float alphaTestReference,
ref Array32<AttributeType> attributeTypes,
bool hasConstantBufferDrawParameters,
bool hasUnalignedStorageBuffer,
ref Array8<AttributeType> fragmentOutputTypes,
bool dualSourceBlendEnable)
{
EarlyZForce = earlyZForce;
Topology = topology;
TessellationMode = tessellationMode;
AlphaToCoverageEnable = alphaToCoverageEnable;
AlphaToCoverageDitherEnable = alphaToCoverageDitherEnable;
ViewportTransformDisable = viewportTransformDisable;
DepthMode = depthMode;
ProgramPointSizeEnable = programPointSizeEnable;
PointSize = pointSize;
AlphaTestEnable = alphaTestEnable;
AlphaTestCompare = alphaTestCompare;
AlphaTestReference = alphaTestReference;
AttributeTypes = attributeTypes;
HasConstantBufferDrawParameters = hasConstantBufferDrawParameters;
HasUnalignedStorageBuffer = hasUnalignedStorageBuffer;
FragmentOutputTypes = fragmentOutputTypes;
DualSourceBlendEnable = dualSourceBlendEnable;
}
}
}

View file

@ -0,0 +1,50 @@
using System;
namespace Ryujinx.Graphics.Gpu.Shader
{
/// <summary>
/// State used by the <see cref="GpuAccessor"/>.
/// </summary>
readonly struct GpuChannelPoolState : IEquatable<GpuChannelPoolState>
{
/// <summary>
/// GPU virtual address of the texture pool.
/// </summary>
public readonly ulong TexturePoolGpuVa;
/// <summary>
/// Maximum ID of the texture pool.
/// </summary>
public readonly int TexturePoolMaximumId;
/// <summary>
/// Constant buffer slot where the texture handles are located.
/// </summary>
public readonly int TextureBufferIndex;
/// <summary>
/// Creates a new GPU texture pool state.
/// </summary>
/// <param name="texturePoolGpuVa">GPU virtual address of the texture pool</param>
/// <param name="texturePoolMaximumId">Maximum ID of the texture pool</param>
/// <param name="textureBufferIndex">Constant buffer slot where the texture handles are located</param>
public GpuChannelPoolState(ulong texturePoolGpuVa, int texturePoolMaximumId, int textureBufferIndex)
{
TexturePoolGpuVa = texturePoolGpuVa;
TexturePoolMaximumId = texturePoolMaximumId;
TextureBufferIndex = textureBufferIndex;
}
/// <summary>
/// Check if the pool states are equal.
/// </summary>
/// <param name="other">Pool state to compare with</param>
/// <returns>True if they are equal, false otherwise</returns>
public bool Equals(GpuChannelPoolState other)
{
return TexturePoolGpuVa == other.TexturePoolGpuVa &&
TexturePoolMaximumId == other.TexturePoolMaximumId &&
TextureBufferIndex == other.TextureBufferIndex;
}
}
}

View file

@ -0,0 +1,113 @@
using System;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Gpu.Shader.HashTable
{
/// <summary>
/// State of a hash calculation.
/// </summary>
struct HashState
{
// This is using a slightly modified implementation of FastHash64.
// Reference: https://github.com/ztanml/fast-hash/blob/master/fasthash.c
private const ulong M = 0x880355f21e6d1965UL;
private ulong _hash;
private int _start;
/// <summary>
/// One shot hash calculation for a given data.
/// </summary>
/// <param name="data">Data to be hashed</param>
/// <returns>Hash of the given data</returns>
public static uint CalcHash(ReadOnlySpan<byte> data)
{
HashState state = new HashState();
state.Initialize();
state.Continue(data);
return state.Finalize(data);
}
/// <summary>
/// Initializes the hash state.
/// </summary>
public void Initialize()
{
_hash = 23;
}
/// <summary>
/// Calculates the hash of the given data.
/// </summary>
/// <remarks>
/// The full data must be passed on <paramref name="data"/>.
/// If this is not the first time the method is called, then <paramref name="data"/> must start with the data passed on the last call.
/// If a smaller slice of the data was already hashed before, only the additional data will be hashed.
/// This can be used for additive hashing of data in chuncks.
/// </remarks>
/// <param name="data">Data to be hashed</param>
public void Continue(ReadOnlySpan<byte> data)
{
ulong h = _hash;
ReadOnlySpan<ulong> dataAsUlong = MemoryMarshal.Cast<byte, ulong>(data.Slice(_start));
for (int i = 0; i < dataAsUlong.Length; i++)
{
ulong value = dataAsUlong[i];
h ^= Mix(value);
h *= M;
}
_hash = h;
_start = data.Length & ~7;
}
/// <summary>
/// Performs the hash finalization step, and returns the calculated hash.
/// </summary>
/// <remarks>
/// The full data must be passed on <paramref name="data"/>.
/// <paramref name="data"/> must start with the data passed on the last call to <see cref="Continue"/>.
/// No internal state is changed, so one can still continue hashing data with <see cref="Continue"/>
/// after calling this method.
/// </remarks>
/// <param name="data">Data to be hashed</param>
/// <returns>Hash of all the data hashed with this <see cref="HashState"/></returns>
public uint Finalize(ReadOnlySpan<byte> data)
{
ulong h = _hash;
int remainder = data.Length & 7;
if (remainder != 0)
{
ulong v = 0;
for (int i = data.Length - remainder; i < data.Length; i++)
{
v |= (ulong)data[i] << ((i - remainder) * 8);
}
h ^= Mix(v);
h *= M;
}
h = Mix(h);
return (uint)(h - (h >> 32));
}
/// <summary>
/// Hash mix function.
/// </summary>
/// <param name="h">Hash to mix</param>
/// <returns>Mixed hash</returns>
private static ulong Mix(ulong h)
{
h ^= h >> 23;
h *= 0x2127599bf4325c37UL;
h ^= h >> 47;
return h;
}
}
}

View file

@ -0,0 +1,27 @@
using System;
namespace Ryujinx.Graphics.Gpu.Shader.HashTable
{
/// <summary>
/// Data accessor, used by <see cref="PartitionedHashTable{T}"/> to access data of unknown length.
/// </summary>
/// <remarks>
/// This will be used to access chuncks of data and try finding a match on the table.
/// This is necessary because the data size is assumed to be unknown, and so the
/// hash table must try to "guess" the size of the data based on the entries on the table.
/// </remarks>
public interface IDataAccessor
{
/// <summary>
/// Gets a span of shader code at the specified offset, with at most the specified size.
/// </summary>
/// <remarks>
/// This might return a span smaller than the requested <paramref name="length"/> if there's
/// no more code available.
/// </remarks>
/// <param name="offset">Offset in shader code</param>
/// <param name="length">Size in bytes</param>
/// <returns>Shader code span</returns>
ReadOnlySpan<byte> GetSpan(int offset, int length);
}
}

View file

@ -0,0 +1,451 @@
using System;
using System.Collections.Generic;
using System.Numerics;
namespace Ryujinx.Graphics.Gpu.Shader.HashTable
{
/// <summary>
/// Partitioned hash table.
/// </summary>
/// <typeparam name="T">Hash table entry type</typeparam>
class PartitionHashTable<T>
{
/// <summary>
/// Hash table entry.
/// </summary>
private struct Entry
{
/// <summary>
/// Hash <see cref="OwnSize"/> bytes of <see cref="Data"/>.
/// </summary>
public readonly uint Hash;
/// <summary>
/// If this entry is only a sub-region of <see cref="Data"/>, this indicates the size in bytes
/// of that region. Otherwise, it should be zero.
/// </summary>
public readonly int OwnSize;
/// <summary>
/// Data used to compute the hash for this entry.
/// </summary>
/// <remarks>
/// To avoid additional allocations, this might be a instance of the full entry data,
/// and only a sub-region of it might be actually used by this entry. Such sub-region
/// has its size indicated by <see cref="OwnSize"/> in this case.
/// </remarks>
public readonly byte[] Data;
/// <summary>
/// Item associated with this entry.
/// </summary>
public T Item;
/// <summary>
/// Indicates if the entry is partial, which means that this entry is only for a sub-region of the data.
/// </summary>
/// <remarks>
/// Partial entries have no items associated with them. They just indicates that the data might be present on
/// the table, and one must keep looking for the full entry on other tables of larger data size.
/// </remarks>
public bool IsPartial => OwnSize != 0;
/// <summary>
/// Creates a new partial hash table entry.
/// </summary>
/// <param name="hash">Hash of the data</param>
/// <param name="ownerData">Full data</param>
/// <param name="ownSize">Size of the sub-region of data that belongs to this entry</param>
public Entry(uint hash, byte[] ownerData, int ownSize)
{
Hash = hash;
OwnSize = ownSize;
Data = ownerData;
Item = default;
}
/// <summary>
/// Creates a new full hash table entry.
/// </summary>
/// <param name="hash">Hash of the data</param>
/// <param name="data">Data</param>
/// <param name="item">Item associated with this entry</param>
public Entry(uint hash, byte[] data, T item)
{
Hash = hash;
OwnSize = 0;
Data = data;
Item = item;
}
/// <summary>
/// Gets the data for this entry, either full or partial.
/// </summary>
/// <returns>Data sub-region</returns>
public ReadOnlySpan<byte> GetData()
{
if (OwnSize != 0)
{
return new ReadOnlySpan<byte>(Data).Slice(0, OwnSize);
}
return Data;
}
}
/// <summary>
/// Hash table bucket.
/// </summary>
private struct Bucket
{
/// <summary>
/// Inline entry, to avoid allocations for the common single entry case.
/// </summary>
public Entry InlineEntry;
/// <summary>
/// List of additional entries for the not-so-common multiple entries case.
/// </summary>
public List<Entry> MoreEntries;
}
private Bucket[] _buckets;
private int _count;
/// <summary>
/// Total amount of entries on the hash table.
/// </summary>
public int Count => _count;
/// <summary>
/// Creates a new instance of the partitioned hash table.
/// </summary>
public PartitionHashTable()
{
_buckets = Array.Empty<Bucket>();
}
/// <summary>
/// Gets an item on the table, or adds a new one if not present.
/// </summary>
/// <param name="data">Data</param>
/// <param name="dataHash">Hash of the data</param>
/// <param name="item">Item to be added if not found</param>
/// <returns>Existing item if found, or <paramref name="item"/> if not found</returns>
public T GetOrAdd(byte[] data, uint dataHash, T item)
{
if (TryFindItem(dataHash, data, out T existingItem))
{
return existingItem;
}
Entry entry = new Entry(dataHash, data, item);
AddToBucket(dataHash, ref entry);
return item;
}
/// <summary>
/// Adds an item to the hash table.
/// </summary>
/// <param name="data">Data</param>
/// <param name="dataHash">Hash of the data</param>
/// <param name="item">Item to be added</param>
/// <returns>True if the item was added, false due to an item associated with the data already being on the table</returns>
public bool Add(byte[] data, uint dataHash, T item)
{
if (TryFindItem(dataHash, data, out _))
{
return false;
}
Entry entry = new Entry(dataHash, data, item);
AddToBucket(dataHash, ref entry);
return true;
}
/// <summary>
/// Adds a partial entry to the hash table.
/// </summary>
/// <param name="ownerData">Full data</param>
/// <param name="ownSize">Size of the sub-region of <paramref name="ownerData"/> used by the partial entry</param>
/// <returns>True if added, false otherwise</returns>
public bool AddPartial(byte[] ownerData, int ownSize)
{
ReadOnlySpan<byte> data = new ReadOnlySpan<byte>(ownerData).Slice(0, ownSize);
return AddPartial(ownerData, HashState.CalcHash(data), ownSize);
}
/// <summary>
/// Adds a partial entry to the hash table.
/// </summary>
/// <param name="ownerData">Full data</param>
/// <param name="dataHash">Hash of the data sub-region</param>
/// <param name="ownSize">Size of the sub-region of <paramref name="ownerData"/> used by the partial entry</param>
/// <returns>True if added, false otherwise</returns>
public bool AddPartial(byte[] ownerData, uint dataHash, int ownSize)
{
ReadOnlySpan<byte> data = new ReadOnlySpan<byte>(ownerData).Slice(0, ownSize);
if (TryFindItem(dataHash, data, out _))
{
return false;
}
Entry entry = new Entry(dataHash, ownerData, ownSize);
AddToBucket(dataHash, ref entry);
return true;
}
/// <summary>
/// Adds entry with a given hash to the table.
/// </summary>
/// <param name="dataHash">Hash of the entry</param>
/// <param name="entry">Entry</param>
private void AddToBucket(uint dataHash, ref Entry entry)
{
int pow2Count = GetPow2Count(++_count);
if (pow2Count != _buckets.Length)
{
Rebuild(pow2Count);
}
ref Bucket bucket = ref GetBucketForHash(dataHash);
AddToBucket(ref bucket, ref entry);
}
/// <summary>
/// Adds an entry to a bucket.
/// </summary>
/// <param name="bucket">Bucket to add the entry into</param>
/// <param name="entry">Entry to be added</param>
private void AddToBucket(ref Bucket bucket, ref Entry entry)
{
if (bucket.InlineEntry.Data == null)
{
bucket.InlineEntry = entry;
}
else
{
(bucket.MoreEntries ??= new List<Entry>()).Add(entry);
}
}
/// <summary>
/// Creates partial entries on a new hash table for all existing full entries.
/// </summary>
/// <remarks>
/// This should be called every time a new hash table is created, and there are hash
/// tables with data sizes that are higher than that of the new table.
/// This will then fill the new hash table with "partial" entries of full entries
/// on the hash tables with higher size.
/// </remarks>
/// <param name="newTable">New hash table</param>
/// <param name="newEntrySize">Size of the data on the new hash table</param>
public void FillPartials(PartitionHashTable<T> newTable, int newEntrySize)
{
for (int i = 0; i < _buckets.Length; i++)
{
ref Bucket bucket = ref _buckets[i];
ref Entry inlineEntry = ref bucket.InlineEntry;
if (inlineEntry.Data != null)
{
if (!inlineEntry.IsPartial)
{
newTable.AddPartial(inlineEntry.Data, newEntrySize);
}
if (bucket.MoreEntries != null)
{
foreach (Entry entry in bucket.MoreEntries)
{
if (entry.IsPartial)
{
continue;
}
newTable.AddPartial(entry.Data, newEntrySize);
}
}
}
}
}
/// <summary>
/// Tries to find an item on the table.
/// </summary>
/// <param name="dataHash">Hash of <paramref name="data"/></param>
/// <param name="data">Data to find</param>
/// <param name="item">Item associated with the data</param>
/// <returns>True if an item was found, false otherwise</returns>
private bool TryFindItem(uint dataHash, ReadOnlySpan<byte> data, out T item)
{
if (_count == 0)
{
item = default;
return false;
}
ref Bucket bucket = ref GetBucketForHash(dataHash);
if (bucket.InlineEntry.Data != null)
{
if (bucket.InlineEntry.Hash == dataHash && bucket.InlineEntry.GetData().SequenceEqual(data))
{
item = bucket.InlineEntry.Item;
return true;
}
if (bucket.MoreEntries != null)
{
foreach (Entry entry in bucket.MoreEntries)
{
if (entry.Hash == dataHash && entry.GetData().SequenceEqual(data))
{
item = entry.Item;
return true;
}
}
}
}
item = default;
return false;
}
/// <summary>
/// Indicates the result of a hash table lookup.
/// </summary>
public enum SearchResult
{
/// <summary>
/// No entry was found, the search must continue on hash tables of lower size.
/// </summary>
NotFound,
/// <summary>
/// A partial entry was found, the search must continue on hash tables of higher size.
/// </summary>
FoundPartial,
/// <summary>
/// A full entry was found, the search was concluded and the item can be retrieved.
/// </summary>
FoundFull
}
/// <summary>
/// Tries to find an item on the table.
/// </summary>
/// <param name="dataAccessor">Data accessor</param>
/// <param name="size">Size of the hash table data</param>
/// <param name="item">The item on the table, if found, otherwise unmodified</param>
/// <param name="data">The data on the table, if found, otherwise unmodified</param>
/// <returns>Table lookup result</returns>
public SearchResult TryFindItem(scoped ref SmartDataAccessor dataAccessor, int size, scoped ref T item, scoped ref byte[] data)
{
if (_count == 0)
{
return SearchResult.NotFound;
}
ReadOnlySpan<byte> dataSpan = dataAccessor.GetSpanAndHash(size, out uint dataHash);
if (dataSpan.Length != size)
{
return SearchResult.NotFound;
}
ref Bucket bucket = ref GetBucketForHash(dataHash);
if (bucket.InlineEntry.Data != null)
{
if (bucket.InlineEntry.Hash == dataHash && bucket.InlineEntry.GetData().SequenceEqual(dataSpan))
{
item = bucket.InlineEntry.Item;
data = bucket.InlineEntry.Data;
return bucket.InlineEntry.IsPartial ? SearchResult.FoundPartial : SearchResult.FoundFull;
}
if (bucket.MoreEntries != null)
{
foreach (Entry entry in bucket.MoreEntries)
{
if (entry.Hash == dataHash && entry.GetData().SequenceEqual(dataSpan))
{
item = entry.Item;
data = entry.Data;
return entry.IsPartial ? SearchResult.FoundPartial : SearchResult.FoundFull;
}
}
}
}
return SearchResult.NotFound;
}
/// <summary>
/// Rebuilds the table for a new count.
/// </summary>
/// <param name="newPow2Count">New power of two count of the table</param>
private void Rebuild(int newPow2Count)
{
Bucket[] newBuckets = new Bucket[newPow2Count];
uint mask = (uint)newPow2Count - 1;
for (int i = 0; i < _buckets.Length; i++)
{
ref Bucket bucket = ref _buckets[i];
if (bucket.InlineEntry.Data != null)
{
AddToBucket(ref newBuckets[(int)(bucket.InlineEntry.Hash & mask)], ref bucket.InlineEntry);
if (bucket.MoreEntries != null)
{
foreach (Entry entry in bucket.MoreEntries)
{
Entry entryCopy = entry;
AddToBucket(ref newBuckets[(int)(entry.Hash & mask)], ref entryCopy);
}
}
}
}
_buckets = newBuckets;
}
/// <summary>
/// Gets the bucket for a given hash.
/// </summary>
/// <param name="hash">Data hash</param>
/// <returns>Bucket for the hash</returns>
private ref Bucket GetBucketForHash(uint hash)
{
int index = (int)(hash & (_buckets.Length - 1));
return ref _buckets[index];
}
/// <summary>
/// Gets a power of two count from a regular count.
/// </summary>
/// <param name="count">Count</param>
/// <returns>Power of two count</returns>
private static int GetPow2Count(int count)
{
// This returns the nearest power of two that is lower than count.
// This was done to optimize memory usage rather than performance.
return 1 << BitOperations.Log2((uint)count);
}
}
}

View file

@ -0,0 +1,244 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
namespace Ryujinx.Graphics.Gpu.Shader.HashTable
{
/// <summary>
/// Partitioned hash table.
/// </summary>
/// <typeparam name="T"></typeparam>
public class PartitionedHashTable<T>
{
/// <summary>
/// Entry for a given data size.
/// </summary>
private readonly struct SizeEntry
{
/// <summary>
/// Size for the data that will be stored on the hash table on this entry.
/// </summary>
public int Size { get; }
/// <summary>
/// Number of entries on the hash table.
/// </summary>
public int TableCount => _table.Count;
private readonly PartitionHashTable<T> _table;
/// <summary>
/// Creates an entry for a given size.
/// </summary>
/// <param name="size">Size of the data to be stored on this entry</param>
public SizeEntry(int size)
{
Size = size;
_table = new PartitionHashTable<T>();
}
/// <summary>
/// Gets an item for existing data, or adds a new one.
/// </summary>
/// <param name="data">Data associated with the item</param>
/// <param name="dataHash">Hash of <paramref name="data"/></param>
/// <param name="item">Item to be added</param>
/// <returns>Existing item, or <paramref name="item"/> if not present</returns>
public T GetOrAdd(byte[] data, uint dataHash, T item)
{
Debug.Assert(data.Length == Size);
return _table.GetOrAdd(data, dataHash, item);
}
/// <summary>
/// Adds a new item.
/// </summary>
/// <param name="data">Data associated with the item</param>
/// <param name="dataHash">Hash of <paramref name="data"/></param>
/// <param name="item">Item to be added</param>
/// <returns>True if added, false otherwise</returns>
public bool Add(byte[] data, uint dataHash, T item)
{
Debug.Assert(data.Length == Size);
return _table.Add(data, dataHash, item);
}
/// <summary>
/// Adds a partial entry.
/// </summary>
/// <param name="ownerData">Full entry data</param>
/// <param name="dataHash">Hash of the sub-region of the data that belongs to this entry</param>
/// <returns>True if added, false otherwise</returns>
public bool AddPartial(byte[] ownerData, uint dataHash)
{
return _table.AddPartial(ownerData, dataHash, Size);
}
/// <summary>
/// Fills a new hash table with "partials" of existing full entries of higher size.
/// </summary>
/// <param name="newEntry">Entry with the new hash table</param>
public void FillPartials(SizeEntry newEntry)
{
Debug.Assert(newEntry.Size < Size);
_table.FillPartials(newEntry._table, newEntry.Size);
}
/// <summary>
/// Tries to find an item on the hash table.
/// </summary>
/// <param name="dataAccessor">Data accessor</param>
/// <param name="item">The item on the table, if found, otherwise unmodified</param>
/// <param name="data">The data on the table, if found, otherwise unmodified</param>
/// <returns>Table lookup result</returns>
public PartitionHashTable<T>.SearchResult TryFindItem(scoped ref SmartDataAccessor dataAccessor, scoped ref T item, scoped ref byte[] data)
{
return _table.TryFindItem(ref dataAccessor, Size, ref item, ref data);
}
}
private readonly List<SizeEntry> _sizeTable;
/// <summary>
/// Creates a new partitioned hash table.
/// </summary>
public PartitionedHashTable()
{
_sizeTable = new List<SizeEntry>();
}
/// <summary>
/// Adds a new item to the table.
/// </summary>
/// <param name="data">Data</param>
/// <param name="item">Item associated with the data</param>
public void Add(byte[] data, T item)
{
GetOrAdd(data, item);
}
/// <summary>
/// Gets an existing item from the table, or adds a new one if not present.
/// </summary>
/// <param name="data">Data</param>
/// <param name="item">Item associated with the data</param>
/// <returns>Existing item, or <paramref name="item"/> if not present</returns>
public T GetOrAdd(byte[] data, T item)
{
SizeEntry sizeEntry;
int index = BinarySearch(_sizeTable, data.Length);
if (index < _sizeTable.Count && _sizeTable[index].Size == data.Length)
{
sizeEntry = _sizeTable[index];
}
else
{
if (index < _sizeTable.Count && _sizeTable[index].Size < data.Length)
{
index++;
}
sizeEntry = new SizeEntry(data.Length);
_sizeTable.Insert(index, sizeEntry);
for (int i = index + 1; i < _sizeTable.Count; i++)
{
_sizeTable[i].FillPartials(sizeEntry);
}
}
HashState hashState = new HashState();
hashState.Initialize();
for (int i = 0; i < index; i++)
{
ReadOnlySpan<byte> dataSlice = new ReadOnlySpan<byte>(data).Slice(0, _sizeTable[i].Size);
hashState.Continue(dataSlice);
_sizeTable[i].AddPartial(data, hashState.Finalize(dataSlice));
}
hashState.Continue(data);
return sizeEntry.GetOrAdd(data, hashState.Finalize(data), item);
}
/// <summary>
/// Performs binary search on a list of hash tables, each one with a fixed data size.
/// </summary>
/// <param name="entries">List of hash tables</param>
/// <param name="size">Size to search for</param>
/// <returns>Index of the hash table with the given size, or nearest one otherwise</returns>
private static int BinarySearch(List<SizeEntry> entries, int size)
{
int left = 0;
int middle = 0;
int right = entries.Count - 1;
while (left <= right)
{
middle = left + ((right - left) >> 1);
SizeEntry entry = entries[middle];
if (size == entry.Size)
{
break;
}
if (size < entry.Size)
{
right = middle - 1;
}
else
{
left = middle + 1;
}
}
return middle;
}
/// <summary>
/// Tries to find an item on the table.
/// </summary>
/// <param name="dataAccessor">Data accessor</param>
/// <param name="item">Item, if found</param>
/// <param name="data">Data, if found</param>
/// <returns>True if the item was found on the table, false otherwise</returns>
public bool TryFindItem(IDataAccessor dataAccessor, out T item, out byte[] data)
{
SmartDataAccessor sda = new SmartDataAccessor(dataAccessor);
item = default;
data = null;
int left = 0;
int right = _sizeTable.Count;
while (left != right)
{
int index = left + ((right - left) >> 1);
PartitionHashTable<T>.SearchResult result = _sizeTable[index].TryFindItem(ref sda, ref item, ref data);
if (result == PartitionHashTable<T>.SearchResult.FoundFull)
{
return true;
}
if (result == PartitionHashTable<T>.SearchResult.NotFound)
{
right = index;
}
else /* if (result == PartitionHashTable<T>.SearchResult.FoundPartial) */
{
left = index + 1;
}
}
data = null;
return false;
}
}
}

View file

@ -0,0 +1,96 @@
using System;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Gpu.Shader.HashTable
{
/// <summary>
/// Smart data accessor that can cache data and hashes to avoid reading and re-hashing the same memory regions.
/// </summary>
ref struct SmartDataAccessor
{
private readonly IDataAccessor _dataAccessor;
private ReadOnlySpan<byte> _data;
private readonly SortedList<int, HashState> _cachedHashes;
/// <summary>
/// Creates a new smart data accessor.
/// </summary>
/// <param name="dataAccessor">Data accessor</param>
public SmartDataAccessor(IDataAccessor dataAccessor)
{
_dataAccessor = dataAccessor;
_data = ReadOnlySpan<byte>.Empty;
_cachedHashes = new SortedList<int, HashState>();
}
/// <summary>
/// Get a spans of a given size.
/// </summary>
/// <remarks>
/// The actual length of the span returned depends on the <see cref="IDataAccessor"/>
/// and might be less than requested.
/// </remarks>
/// <param name="length">Size in bytes</param>
/// <returns>Span with the requested size</returns>
public ReadOnlySpan<byte> GetSpan(int length)
{
if (_data.Length < length)
{
_data = _dataAccessor.GetSpan(0, length);
}
else if (_data.Length > length)
{
return _data.Slice(0, length);
}
return _data;
}
/// <summary>
/// Gets a span of the requested size, and a hash of its data.
/// </summary>
/// <param name="length">Length of the span</param>
/// <param name="hash">Hash of the span data</param>
/// <returns>Span of data</returns>
public ReadOnlySpan<byte> GetSpanAndHash(int length, out uint hash)
{
ReadOnlySpan<byte> data = GetSpan(length);
hash = data.Length == length ? CalcHashCached(data) : 0;
return data;
}
/// <summary>
/// Calculates the hash for a requested span.
/// This will try to use a cached hash if the data was already accessed before, to avoid re-hashing.
/// </summary>
/// <param name="data">Data to be hashed</param>
/// <returns>Hash of the data</returns>
private uint CalcHashCached(ReadOnlySpan<byte> data)
{
HashState state = default;
bool found = false;
for (int i = _cachedHashes.Count - 1; i >= 0; i--)
{
int cachedHashSize = _cachedHashes.Keys[i];
if (cachedHashSize < data.Length)
{
state = _cachedHashes.Values[i];
found = true;
break;
}
}
if (!found)
{
state = new HashState();
state.Initialize();
}
state.Continue(data);
_cachedHashes[data.Length & ~7] = state;
return state.Finalize(data);
}
}
}

View file

@ -0,0 +1,36 @@
namespace Ryujinx.Graphics.Gpu.Shader
{
/// <summary>
/// Holds counts for the resources used by a shader.
/// </summary>
class ResourceCounts
{
/// <summary>
/// Total of uniform buffers used by the shaders.
/// </summary>
public int UniformBuffersCount;
/// <summary>
/// Total of storage buffers used by the shaders.
/// </summary>
public int StorageBuffersCount;
/// <summary>
/// Total of textures used by the shaders.
/// </summary>
public int TexturesCount;
/// <summary>
/// Total of images used by the shaders.
/// </summary>
public int ImagesCount;
/// <summary>
/// Creates a new instance of the shader resource counts class.
/// </summary>
public ResourceCounts()
{
UniformBuffersCount = 1; // The first binding is reserved for the support buffer.
}
}
}

View file

@ -0,0 +1,64 @@
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Gpu.Shader
{
/// <summary>
/// Shader code addresses in memory for each shader stage.
/// </summary>
struct ShaderAddresses : IEquatable<ShaderAddresses>
{
#pragma warning disable CS0649
public ulong VertexA;
public ulong VertexB;
public ulong TessControl;
public ulong TessEvaluation;
public ulong Geometry;
public ulong Fragment;
#pragma warning restore CS0649
/// <summary>
/// Check if the addresses are equal.
/// </summary>
/// <param name="other">Shader addresses structure to compare with</param>
/// <returns>True if they are equal, false otherwise</returns>
public override bool Equals(object other)
{
return other is ShaderAddresses addresses && Equals(addresses);
}
/// <summary>
/// Check if the addresses are equal.
/// </summary>
/// <param name="other">Shader addresses structure to compare with</param>
/// <returns>True if they are equal, false otherwise</returns>
public bool Equals(ShaderAddresses other)
{
return VertexA == other.VertexA &&
VertexB == other.VertexB &&
TessControl == other.TessControl &&
TessEvaluation == other.TessEvaluation &&
Geometry == other.Geometry &&
Fragment == other.Fragment;
}
/// <summary>
/// Computes hash code from the addresses.
/// </summary>
/// <returns>Hash code</returns>
public override int GetHashCode()
{
return HashCode.Combine(VertexA, VertexB, TessControl, TessEvaluation, Geometry, Fragment);
}
/// <summary>
/// Gets a view of the structure as a span of addresses.
/// </summary>
/// <returns>Span of addresses</returns>
public Span<ulong> AsSpan()
{
return MemoryMarshal.CreateSpan(ref VertexA, Unsafe.SizeOf<ShaderAddresses>() / sizeof(ulong));
}
}
}

View file

@ -0,0 +1,774 @@
using Ryujinx.Common.Configuration;
using Ryujinx.Common.Logging;
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu.Engine.Threed;
using Ryujinx.Graphics.Gpu.Engine.Types;
using Ryujinx.Graphics.Gpu.Image;
using Ryujinx.Graphics.Gpu.Memory;
using Ryujinx.Graphics.Gpu.Shader.DiskCache;
using Ryujinx.Graphics.Shader;
using Ryujinx.Graphics.Shader.Translation;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading;
namespace Ryujinx.Graphics.Gpu.Shader
{
/// <summary>
/// Memory cache of shader code.
/// </summary>
class ShaderCache : IDisposable
{
/// <summary>
/// Default flags used on the shader translation process.
/// </summary>
public const TranslationFlags DefaultFlags = TranslationFlags.DebugMode;
private readonly struct TranslatedShader
{
public readonly CachedShaderStage Shader;
public readonly ShaderProgram Program;
public TranslatedShader(CachedShaderStage shader, ShaderProgram program)
{
Shader = shader;
Program = program;
}
}
private readonly struct TranslatedShaderVertexPair
{
public readonly CachedShaderStage VertexA;
public readonly CachedShaderStage VertexB;
public readonly ShaderProgram Program;
public TranslatedShaderVertexPair(CachedShaderStage vertexA, CachedShaderStage vertexB, ShaderProgram program)
{
VertexA = vertexA;
VertexB = vertexB;
Program = program;
}
}
private readonly GpuContext _context;
private readonly ShaderDumper _dumper;
private readonly Dictionary<ulong, CachedShaderProgram> _cpPrograms;
private readonly Dictionary<ShaderAddresses, CachedShaderProgram> _gpPrograms;
private readonly struct ProgramToSave
{
public readonly CachedShaderProgram CachedProgram;
public readonly IProgram HostProgram;
public readonly byte[] BinaryCode;
public ProgramToSave(CachedShaderProgram cachedProgram, IProgram hostProgram, byte[] binaryCode)
{
CachedProgram = cachedProgram;
HostProgram = hostProgram;
BinaryCode = binaryCode;
}
}
private Queue<ProgramToSave> _programsToSaveQueue;
private readonly ComputeShaderCacheHashTable _computeShaderCache;
private readonly ShaderCacheHashTable _graphicsShaderCache;
private readonly DiskCacheHostStorage _diskCacheHostStorage;
private readonly BackgroundDiskCacheWriter _cacheWriter;
/// <summary>
/// Event for signalling shader cache loading progress.
/// </summary>
public event Action<ShaderCacheState, int, int> ShaderCacheStateChanged;
/// <summary>
/// Creates a new instance of the shader cache.
/// </summary>
/// <param name="context">GPU context that the shader cache belongs to</param>
public ShaderCache(GpuContext context)
{
_context = context;
_dumper = new ShaderDumper();
_cpPrograms = new Dictionary<ulong, CachedShaderProgram>();
_gpPrograms = new Dictionary<ShaderAddresses, CachedShaderProgram>();
_programsToSaveQueue = new Queue<ProgramToSave>();
string diskCacheTitleId = GetDiskCachePath();
_computeShaderCache = new ComputeShaderCacheHashTable();
_graphicsShaderCache = new ShaderCacheHashTable();
_diskCacheHostStorage = new DiskCacheHostStorage(diskCacheTitleId);
if (_diskCacheHostStorage.CacheEnabled)
{
_cacheWriter = new BackgroundDiskCacheWriter(context, _diskCacheHostStorage);
}
}
/// <summary>
/// Gets the path where the disk cache for the current application is stored.
/// </summary>
private static string GetDiskCachePath()
{
return GraphicsConfig.EnableShaderCache && GraphicsConfig.TitleId != null
? Path.Combine(AppDataManager.GamesDirPath, GraphicsConfig.TitleId, "cache", "shader")
: null;
}
/// <summary>
/// Processes the queue of shaders that must save their binaries to the disk cache.
/// </summary>
public void ProcessShaderCacheQueue()
{
// Check to see if the binaries for previously compiled shaders are ready, and save them out.
while (_programsToSaveQueue.TryPeek(out ProgramToSave programToSave))
{
ProgramLinkStatus result = programToSave.HostProgram.CheckProgramLink(false);
if (result != ProgramLinkStatus.Incomplete)
{
if (result == ProgramLinkStatus.Success)
{
_cacheWriter.AddShader(programToSave.CachedProgram, programToSave.BinaryCode ?? programToSave.HostProgram.GetBinary());
}
_programsToSaveQueue.Dequeue();
}
else
{
break;
}
}
}
/// <summary>
/// Initialize the cache.
/// </summary>
/// <param name="cancellationToken">Cancellation token to cancel the shader cache initialization process</param>
internal void Initialize(CancellationToken cancellationToken)
{
if (_diskCacheHostStorage.CacheEnabled)
{
ParallelDiskCacheLoader loader = new ParallelDiskCacheLoader(
_context,
_graphicsShaderCache,
_computeShaderCache,
_diskCacheHostStorage,
cancellationToken,
ShaderCacheStateUpdate);
loader.LoadShaders();
int errorCount = loader.ErrorCount;
if (errorCount != 0)
{
Logger.Warning?.Print(LogClass.Gpu, $"Failed to load {errorCount} shaders from the disk cache.");
}
}
}
/// <summary>
/// Shader cache state update handler.
/// </summary>
/// <param name="state">Current state of the shader cache load process</param>
/// <param name="current">Number of the current shader being processed</param>
/// <param name="total">Total number of shaders to process</param>
private void ShaderCacheStateUpdate(ShaderCacheState state, int current, int total)
{
ShaderCacheStateChanged?.Invoke(state, current, total);
}
/// <summary>
/// Gets a compute shader from the cache.
/// </summary>
/// <remarks>
/// This automatically translates, compiles and adds the code to the cache if not present.
/// </remarks>
/// <param name="channel">GPU channel</param>
/// <param name="poolState">Texture pool state</param>
/// <param name="computeState">Compute engine state</param>
/// <param name="gpuVa">GPU virtual address of the binary shader code</param>
/// <returns>Compiled compute shader code</returns>
public CachedShaderProgram GetComputeShader(
GpuChannel channel,
GpuChannelPoolState poolState,
GpuChannelComputeState computeState,
ulong gpuVa)
{
if (_cpPrograms.TryGetValue(gpuVa, out var cpShader) && IsShaderEqual(channel, poolState, computeState, cpShader, gpuVa))
{
return cpShader;
}
if (_computeShaderCache.TryFind(channel, poolState, computeState, gpuVa, out cpShader, out byte[] cachedGuestCode))
{
_cpPrograms[gpuVa] = cpShader;
return cpShader;
}
ShaderSpecializationState specState = new ShaderSpecializationState(ref computeState);
GpuAccessorState gpuAccessorState = new GpuAccessorState(poolState, computeState, default, specState);
GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gpuAccessorState);
TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, _context.Capabilities.Api, gpuVa);
TranslatedShader translatedShader = TranslateShader(_dumper, channel, translatorContext, cachedGuestCode);
ShaderSource[] shaderSourcesArray = new ShaderSource[] { CreateShaderSource(translatedShader.Program) };
IProgram hostProgram = _context.Renderer.CreateProgram(shaderSourcesArray, new ShaderInfo(-1));
cpShader = new CachedShaderProgram(hostProgram, specState, translatedShader.Shader);
_computeShaderCache.Add(cpShader);
EnqueueProgramToSave(cpShader, hostProgram, shaderSourcesArray);
_cpPrograms[gpuVa] = cpShader;
return cpShader;
}
/// <summary>
/// Updates the shader pipeline state based on the current GPU state.
/// </summary>
/// <param name="state">Current GPU 3D engine state</param>
/// <param name="pipeline">Shader pipeline state to be updated</param>
/// <param name="graphicsState">Current graphics state</param>
/// <param name="channel">Current GPU channel</param>
private void UpdatePipelineInfo(
ref ThreedClassState state,
ref ProgramPipelineState pipeline,
GpuChannelGraphicsState graphicsState,
GpuChannel channel)
{
channel.TextureManager.UpdateRenderTargets();
var rtControl = state.RtControl;
var msaaMode = state.RtMsaaMode;
pipeline.SamplesCount = msaaMode.SamplesInX() * msaaMode.SamplesInY();
int count = rtControl.UnpackCount();
for (int index = 0; index < Constants.TotalRenderTargets; index++)
{
int rtIndex = rtControl.UnpackPermutationIndex(index);
var colorState = state.RtColorState[rtIndex];
if (index >= count || colorState.Format == 0 || colorState.WidthOrStride == 0)
{
pipeline.AttachmentEnable[index] = false;
pipeline.AttachmentFormats[index] = Format.R8G8B8A8Unorm;
}
else
{
pipeline.AttachmentEnable[index] = true;
pipeline.AttachmentFormats[index] = colorState.Format.Convert().Format;
}
}
pipeline.DepthStencilEnable = state.RtDepthStencilEnable;
pipeline.DepthStencilFormat = pipeline.DepthStencilEnable ? state.RtDepthStencilState.Format.Convert().Format : Format.D24UnormS8Uint;
pipeline.VertexBufferCount = Constants.TotalVertexBuffers;
pipeline.Topology = graphicsState.Topology;
}
/// <summary>
/// Gets a graphics shader program from the shader cache.
/// This includes all the specified shader stages.
/// </summary>
/// <remarks>
/// This automatically translates, compiles and adds the code to the cache if not present.
/// </remarks>
/// <param name="state">GPU state</param>
/// <param name="pipeline">Pipeline state</param>
/// <param name="channel">GPU channel</param>
/// <param name="poolState">Texture pool state</param>
/// <param name="graphicsState">3D engine state</param>
/// <param name="addresses">Addresses of the shaders for each stage</param>
/// <returns>Compiled graphics shader code</returns>
public CachedShaderProgram GetGraphicsShader(
ref ThreedClassState state,
ref ProgramPipelineState pipeline,
GpuChannel channel,
ref GpuChannelPoolState poolState,
ref GpuChannelGraphicsState graphicsState,
ShaderAddresses addresses)
{
if (_gpPrograms.TryGetValue(addresses, out var gpShaders) && IsShaderEqual(channel, ref poolState, ref graphicsState, gpShaders, addresses))
{
return gpShaders;
}
if (_graphicsShaderCache.TryFind(channel, ref poolState, ref graphicsState, addresses, out gpShaders, out var cachedGuestCode))
{
_gpPrograms[addresses] = gpShaders;
return gpShaders;
}
TransformFeedbackDescriptor[] transformFeedbackDescriptors = GetTransformFeedbackDescriptors(ref state);
UpdatePipelineInfo(ref state, ref pipeline, graphicsState, channel);
ShaderSpecializationState specState = new ShaderSpecializationState(ref graphicsState, ref pipeline, transformFeedbackDescriptors);
GpuAccessorState gpuAccessorState = new GpuAccessorState(poolState, default, graphicsState, specState, transformFeedbackDescriptors);
ReadOnlySpan<ulong> addressesSpan = addresses.AsSpan();
TranslatorContext[] translatorContexts = new TranslatorContext[Constants.ShaderStages + 1];
TranslatorContext nextStage = null;
TargetApi api = _context.Capabilities.Api;
for (int stageIndex = Constants.ShaderStages - 1; stageIndex >= 0; stageIndex--)
{
ulong gpuVa = addressesSpan[stageIndex + 1];
if (gpuVa != 0)
{
GpuAccessor gpuAccessor = new GpuAccessor(_context, channel, gpuAccessorState, stageIndex);
TranslatorContext currentStage = DecodeGraphicsShader(gpuAccessor, api, DefaultFlags, gpuVa);
if (nextStage != null)
{
currentStage.SetNextStage(nextStage);
}
if (stageIndex == 0 && addresses.VertexA != 0)
{
translatorContexts[0] = DecodeGraphicsShader(gpuAccessor, api, DefaultFlags | TranslationFlags.VertexA, addresses.VertexA);
}
translatorContexts[stageIndex + 1] = currentStage;
nextStage = currentStage;
}
}
if (!_context.Capabilities.SupportsGeometryShader)
{
TryRemoveGeometryStage(translatorContexts);
}
CachedShaderStage[] shaders = new CachedShaderStage[Constants.ShaderStages + 1];
List<ShaderSource> shaderSources = new List<ShaderSource>();
TranslatorContext previousStage = null;
for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++)
{
TranslatorContext currentStage = translatorContexts[stageIndex + 1];
if (currentStage != null)
{
ShaderProgram program;
if (stageIndex == 0 && translatorContexts[0] != null)
{
TranslatedShaderVertexPair translatedShader = TranslateShader(
_dumper,
channel,
currentStage,
translatorContexts[0],
cachedGuestCode.VertexACode,
cachedGuestCode.VertexBCode);
shaders[0] = translatedShader.VertexA;
shaders[1] = translatedShader.VertexB;
program = translatedShader.Program;
}
else
{
byte[] code = cachedGuestCode.GetByIndex(stageIndex);
TranslatedShader translatedShader = TranslateShader(_dumper, channel, currentStage, code);
shaders[stageIndex + 1] = translatedShader.Shader;
program = translatedShader.Program;
}
if (program != null)
{
shaderSources.Add(CreateShaderSource(program));
}
previousStage = currentStage;
}
else if (
previousStage != null &&
previousStage.LayerOutputWritten &&
stageIndex == 3 &&
!_context.Capabilities.SupportsLayerVertexTessellation)
{
shaderSources.Add(CreateShaderSource(previousStage.GenerateGeometryPassthrough()));
}
}
ShaderSource[] shaderSourcesArray = shaderSources.ToArray();
int fragmentOutputMap = shaders[5]?.Info.FragmentOutputMap ?? -1;
IProgram hostProgram = _context.Renderer.CreateProgram(shaderSourcesArray, new ShaderInfo(fragmentOutputMap, pipeline));
gpShaders = new CachedShaderProgram(hostProgram, specState, shaders);
_graphicsShaderCache.Add(gpShaders);
EnqueueProgramToSave(gpShaders, hostProgram, shaderSourcesArray);
_gpPrograms[addresses] = gpShaders;
return gpShaders;
}
/// <summary>
/// Tries to eliminate the geometry stage from the array of translator contexts.
/// </summary>
/// <param name="translatorContexts">Array of translator contexts</param>
public static void TryRemoveGeometryStage(TranslatorContext[] translatorContexts)
{
if (translatorContexts[4] != null)
{
// We have a geometry shader, but geometry shaders are not supported.
// Try to eliminate the geometry shader.
ShaderProgramInfo info = translatorContexts[4].Translate().Info;
if (info.Identification == ShaderIdentification.GeometryLayerPassthrough)
{
// We managed to identify that this geometry shader is only used to set the output Layer value,
// we can set the Layer on the previous stage instead (usually the vertex stage) and eliminate it.
for (int i = 3; i >= 1; i--)
{
if (translatorContexts[i] != null)
{
translatorContexts[i].SetGeometryShaderLayerInputAttribute(info.GpLayerInputAttribute);
translatorContexts[i].SetLastInVertexPipeline();
break;
}
}
translatorContexts[4] = null;
}
}
}
/// <summary>
/// Creates a shader source for use with the backend from a translated shader program.
/// </summary>
/// <param name="program">Translated shader program</param>
/// <returns>Shader source</returns>
public static ShaderSource CreateShaderSource(ShaderProgram program)
{
return new ShaderSource(program.Code, program.BinaryCode, GetBindings(program.Info), program.Info.Stage, program.Language);
}
/// <summary>
/// Puts a program on the queue of programs to be saved on the disk cache.
/// </summary>
/// <remarks>
/// This will not do anything if disk shader cache is disabled.
/// </remarks>
/// <param name="program">Cached shader program</param>
/// <param name="hostProgram">Host program</param>
/// <param name="sources">Source for each shader stage</param>
private void EnqueueProgramToSave(CachedShaderProgram program, IProgram hostProgram, ShaderSource[] sources)
{
if (_diskCacheHostStorage.CacheEnabled)
{
byte[] binaryCode = _context.Capabilities.Api == TargetApi.Vulkan ? ShaderBinarySerializer.Pack(sources) : null;
ProgramToSave programToSave = new ProgramToSave(program, hostProgram, binaryCode);
_programsToSaveQueue.Enqueue(programToSave);
}
}
/// <summary>
/// Gets transform feedback state from the current GPU state.
/// </summary>
/// <param name="state">Current GPU state</param>
/// <returns>Four transform feedback descriptors for the enabled TFBs, or null if TFB is disabled</returns>
private static TransformFeedbackDescriptor[] GetTransformFeedbackDescriptors(ref ThreedClassState state)
{
bool tfEnable = state.TfEnable;
if (!tfEnable)
{
return null;
}
TransformFeedbackDescriptor[] descs = new TransformFeedbackDescriptor[Constants.TotalTransformFeedbackBuffers];
for (int i = 0; i < Constants.TotalTransformFeedbackBuffers; i++)
{
var tf = state.TfState[i];
descs[i] = new TransformFeedbackDescriptor(
tf.BufferIndex,
tf.Stride,
tf.VaryingsCount,
ref state.TfVaryingLocations[i]);
}
return descs;
}
/// <summary>
/// Checks if compute shader code in memory is equal to the cached shader.
/// </summary>
/// <param name="channel">GPU channel using the shader</param>
/// <param name="poolState">GPU channel state to verify shader compatibility</param>
/// <param name="computeState">GPU channel compute state to verify shader compatibility</param>
/// <param name="cpShader">Cached compute shader</param>
/// <param name="gpuVa">GPU virtual address of the shader code in memory</param>
/// <returns>True if the code is different, false otherwise</returns>
private static bool IsShaderEqual(
GpuChannel channel,
GpuChannelPoolState poolState,
GpuChannelComputeState computeState,
CachedShaderProgram cpShader,
ulong gpuVa)
{
if (IsShaderEqual(channel.MemoryManager, cpShader.Shaders[0], gpuVa))
{
return cpShader.SpecializationState.MatchesCompute(channel, ref poolState, computeState, true);
}
return false;
}
/// <summary>
/// Checks if graphics shader code from all stages in memory are equal to the cached shaders.
/// </summary>
/// <param name="channel">GPU channel using the shader</param>
/// <param name="poolState">GPU channel state to verify shader compatibility</param>
/// <param name="graphicsState">GPU channel graphics state to verify shader compatibility</param>
/// <param name="gpShaders">Cached graphics shaders</param>
/// <param name="addresses">GPU virtual addresses of all enabled shader stages</param>
/// <returns>True if the code is different, false otherwise</returns>
private static bool IsShaderEqual(
GpuChannel channel,
ref GpuChannelPoolState poolState,
ref GpuChannelGraphicsState graphicsState,
CachedShaderProgram gpShaders,
ShaderAddresses addresses)
{
ReadOnlySpan<ulong> addressesSpan = addresses.AsSpan();
for (int stageIndex = 0; stageIndex < gpShaders.Shaders.Length; stageIndex++)
{
CachedShaderStage shader = gpShaders.Shaders[stageIndex];
ulong gpuVa = addressesSpan[stageIndex];
if (!IsShaderEqual(channel.MemoryManager, shader, gpuVa))
{
return false;
}
}
bool usesDrawParameters = gpShaders.Shaders[1]?.Info.UsesDrawParameters ?? false;
return gpShaders.SpecializationState.MatchesGraphics(channel, ref poolState, ref graphicsState, usesDrawParameters, true);
}
/// <summary>
/// Checks if the code of the specified cached shader is different from the code in memory.
/// </summary>
/// <param name="memoryManager">Memory manager used to access the GPU memory where the shader is located</param>
/// <param name="shader">Cached shader to compare with</param>
/// <param name="gpuVa">GPU virtual address of the binary shader code</param>
/// <returns>True if the code is different, false otherwise</returns>
private static bool IsShaderEqual(MemoryManager memoryManager, CachedShaderStage shader, ulong gpuVa)
{
if (shader == null)
{
return true;
}
ReadOnlySpan<byte> memoryCode = memoryManager.GetSpan(gpuVa, shader.Code.Length);
return memoryCode.SequenceEqual(shader.Code);
}
/// <summary>
/// Decode the binary Maxwell shader code to a translator context.
/// </summary>
/// <param name="gpuAccessor">GPU state accessor</param>
/// <param name="api">Graphics API that will be used with the shader</param>
/// <param name="gpuVa">GPU virtual address of the binary shader code</param>
/// <returns>The generated translator context</returns>
public static TranslatorContext DecodeComputeShader(IGpuAccessor gpuAccessor, TargetApi api, ulong gpuVa)
{
var options = CreateTranslationOptions(api, DefaultFlags | TranslationFlags.Compute);
return Translator.CreateContext(gpuVa, gpuAccessor, options);
}
/// <summary>
/// Decode the binary Maxwell shader code to a translator context.
/// </summary>
/// <remarks>
/// This will combine the "Vertex A" and "Vertex B" shader stages, if specified, into one shader.
/// </remarks>
/// <param name="gpuAccessor">GPU state accessor</param>
/// <param name="api">Graphics API that will be used with the shader</param>
/// <param name="flags">Flags that controls shader translation</param>
/// <param name="gpuVa">GPU virtual address of the shader code</param>
/// <returns>The generated translator context</returns>
public static TranslatorContext DecodeGraphicsShader(IGpuAccessor gpuAccessor, TargetApi api, TranslationFlags flags, ulong gpuVa)
{
var options = CreateTranslationOptions(api, flags);
return Translator.CreateContext(gpuVa, gpuAccessor, options);
}
/// <summary>
/// Translates a previously generated translator context to something that the host API accepts.
/// </summary>
/// <param name="dumper">Optional shader code dumper</param>
/// <param name="channel">GPU channel using the shader</param>
/// <param name="currentStage">Translator context of the stage to be translated</param>
/// <param name="vertexA">Optional translator context of the shader that should be combined</param>
/// <param name="codeA">Optional Maxwell binary code of the Vertex A shader, if present</param>
/// <param name="codeB">Optional Maxwell binary code of the Vertex B or current stage shader, if present on cache</param>
/// <returns>Compiled graphics shader code</returns>
private static TranslatedShaderVertexPair TranslateShader(
ShaderDumper dumper,
GpuChannel channel,
TranslatorContext currentStage,
TranslatorContext vertexA,
byte[] codeA,
byte[] codeB)
{
ulong cb1DataAddress = channel.BufferManager.GetGraphicsUniformBufferAddress(0, 1);
var memoryManager = channel.MemoryManager;
codeA ??= memoryManager.GetSpan(vertexA.Address, vertexA.Size).ToArray();
codeB ??= memoryManager.GetSpan(currentStage.Address, currentStage.Size).ToArray();
byte[] cb1DataA = memoryManager.Physical.GetSpan(cb1DataAddress, vertexA.Cb1DataSize).ToArray();
byte[] cb1DataB = memoryManager.Physical.GetSpan(cb1DataAddress, currentStage.Cb1DataSize).ToArray();
ShaderDumpPaths pathsA = default;
ShaderDumpPaths pathsB = default;
if (dumper != null)
{
pathsA = dumper.Dump(codeA, compute: false);
pathsB = dumper.Dump(codeB, compute: false);
}
ShaderProgram program = currentStage.Translate(vertexA);
pathsB.Prepend(program);
pathsA.Prepend(program);
CachedShaderStage vertexAStage = new CachedShaderStage(null, codeA, cb1DataA);
CachedShaderStage vertexBStage = new CachedShaderStage(program.Info, codeB, cb1DataB);
return new TranslatedShaderVertexPair(vertexAStage, vertexBStage, program);
}
/// <summary>
/// Translates a previously generated translator context to something that the host API accepts.
/// </summary>
/// <param name="dumper">Optional shader code dumper</param>
/// <param name="channel">GPU channel using the shader</param>
/// <param name="context">Translator context of the stage to be translated</param>
/// <param name="code">Optional Maxwell binary code of the current stage shader, if present on cache</param>
/// <returns>Compiled graphics shader code</returns>
private static TranslatedShader TranslateShader(ShaderDumper dumper, GpuChannel channel, TranslatorContext context, byte[] code)
{
var memoryManager = channel.MemoryManager;
ulong cb1DataAddress = context.Stage == ShaderStage.Compute
? channel.BufferManager.GetComputeUniformBufferAddress(1)
: channel.BufferManager.GetGraphicsUniformBufferAddress(StageToStageIndex(context.Stage), 1);
byte[] cb1Data = memoryManager.Physical.GetSpan(cb1DataAddress, context.Cb1DataSize).ToArray();
code ??= memoryManager.GetSpan(context.Address, context.Size).ToArray();
ShaderDumpPaths paths = dumper?.Dump(code, context.Stage == ShaderStage.Compute) ?? default;
ShaderProgram program = context.Translate();
paths.Prepend(program);
return new TranslatedShader(new CachedShaderStage(program.Info, code, cb1Data), program);
}
/// <summary>
/// Gets the index of a stage from a <see cref="ShaderStage"/>.
/// </summary>
/// <param name="stage">Stage to get the index from</param>
/// <returns>Stage index</returns>
private static int StageToStageIndex(ShaderStage stage)
{
return stage switch
{
ShaderStage.TessellationControl => 1,
ShaderStage.TessellationEvaluation => 2,
ShaderStage.Geometry => 3,
ShaderStage.Fragment => 4,
_ => 0
};
}
/// <summary>
/// Gets information about the bindings used by a shader program.
/// </summary>
/// <param name="info">Shader program information to get the information from</param>
/// <returns>Shader bindings</returns>
public static ShaderBindings GetBindings(ShaderProgramInfo info)
{
var uniformBufferBindings = info.CBuffers.Select(x => x.Binding).ToArray();
var storageBufferBindings = info.SBuffers.Select(x => x.Binding).ToArray();
var textureBindings = info.Textures.Select(x => x.Binding).ToArray();
var imageBindings = info.Images.Select(x => x.Binding).ToArray();
return new ShaderBindings(
uniformBufferBindings,
storageBufferBindings,
textureBindings,
imageBindings);
}
/// <summary>
/// Creates shader translation options with the requested graphics API and flags.
/// The shader language is choosen based on the current configuration and graphics API.
/// </summary>
/// <param name="api">Target graphics API</param>
/// <param name="flags">Translation flags</param>
/// <returns>Translation options</returns>
private static TranslationOptions CreateTranslationOptions(TargetApi api, TranslationFlags flags)
{
TargetLanguage lang = GraphicsConfig.EnableSpirvCompilationOnVulkan && api == TargetApi.Vulkan
? TargetLanguage.Spirv
: TargetLanguage.Glsl;
return new TranslationOptions(lang, api, flags);
}
/// <summary>
/// Disposes the shader cache, deleting all the cached shaders.
/// It's an error to use the shader cache after disposal.
/// </summary>
public void Dispose()
{
foreach (CachedShaderProgram program in _graphicsShaderCache.GetPrograms())
{
program.Dispose();
}
foreach (CachedShaderProgram program in _computeShaderCache.GetPrograms())
{
program.Dispose();
}
_cacheWriter?.Dispose();
}
}
}

View file

@ -0,0 +1,282 @@
using Ryujinx.Graphics.Gpu.Memory;
using Ryujinx.Graphics.Gpu.Shader.HashTable;
using Ryujinx.Graphics.Shader;
using System;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Gpu.Shader
{
/// <summary>
/// Holds already cached code for a guest shader.
/// </summary>
struct CachedGraphicsGuestCode
{
public byte[] VertexACode;
public byte[] VertexBCode;
public byte[] TessControlCode;
public byte[] TessEvaluationCode;
public byte[] GeometryCode;
public byte[] FragmentCode;
/// <summary>
/// Gets the guest code of a shader stage by its index.
/// </summary>
/// <param name="stageIndex">Index of the shader stage</param>
/// <returns>Guest code, or null if not present</returns>
public byte[] GetByIndex(int stageIndex)
{
return stageIndex switch
{
1 => TessControlCode,
2 => TessEvaluationCode,
3 => GeometryCode,
4 => FragmentCode,
_ => VertexBCode
};
}
}
/// <summary>
/// Graphics shader cache hash table.
/// </summary>
class ShaderCacheHashTable
{
/// <summary>
/// Shader ID cache.
/// </summary>
private struct IdCache
{
private PartitionedHashTable<int> _cache;
private int _id;
/// <summary>
/// Initializes the state.
/// </summary>
public void Initialize()
{
_cache = new PartitionedHashTable<int>();
_id = 0;
}
/// <summary>
/// Adds guest code to the cache.
/// </summary>
/// <remarks>
/// If the code was already cached, it will just return the existing ID.
/// </remarks>
/// <param name="code">Code to add</param>
/// <returns>Unique ID for the guest code</returns>
public int Add(byte[] code)
{
int id = ++_id;
int cachedId = _cache.GetOrAdd(code, id);
if (cachedId != id)
{
--_id;
}
return cachedId;
}
/// <summary>
/// Tries to find cached guest code.
/// </summary>
/// <param name="dataAccessor">Code accessor used to read guest code to find a match on the hash table</param>
/// <param name="id">ID of the guest code, if found</param>
/// <param name="data">Cached guest code, if found</param>
/// <returns>True if found, false otherwise</returns>
public bool TryFind(IDataAccessor dataAccessor, out int id, out byte[] data)
{
return _cache.TryFindItem(dataAccessor, out id, out data);
}
}
/// <summary>
/// Guest code IDs of the guest shaders that when combined forms a single host program.
/// </summary>
private struct IdTable : IEquatable<IdTable>
{
public int VertexAId;
public int VertexBId;
public int TessControlId;
public int TessEvaluationId;
public int GeometryId;
public int FragmentId;
public override bool Equals(object obj)
{
return obj is IdTable other && Equals(other);
}
public bool Equals(IdTable other)
{
return other.VertexAId == VertexAId &&
other.VertexBId == VertexBId &&
other.TessControlId == TessControlId &&
other.TessEvaluationId == TessEvaluationId &&
other.GeometryId == GeometryId &&
other.FragmentId == FragmentId;
}
public override int GetHashCode()
{
return HashCode.Combine(VertexAId, VertexBId, TessControlId, TessEvaluationId, GeometryId, FragmentId);
}
}
private IdCache _vertexACache;
private IdCache _vertexBCache;
private IdCache _tessControlCache;
private IdCache _tessEvaluationCache;
private IdCache _geometryCache;
private IdCache _fragmentCache;
private readonly Dictionary<IdTable, ShaderSpecializationList> _shaderPrograms;
/// <summary>
/// Creates a new graphics shader cache hash table.
/// </summary>
public ShaderCacheHashTable()
{
_vertexACache.Initialize();
_vertexBCache.Initialize();
_tessControlCache.Initialize();
_tessEvaluationCache.Initialize();
_geometryCache.Initialize();
_fragmentCache.Initialize();
_shaderPrograms = new Dictionary<IdTable, ShaderSpecializationList>();
}
/// <summary>
/// Adds a program to the cache.
/// </summary>
/// <param name="program">Program to be added</param>
public void Add(CachedShaderProgram program)
{
IdTable idTable = new IdTable();
foreach (var shader in program.Shaders)
{
if (shader == null)
{
continue;
}
if (shader.Info != null)
{
switch (shader.Info.Stage)
{
case ShaderStage.Vertex:
idTable.VertexBId = _vertexBCache.Add(shader.Code);
break;
case ShaderStage.TessellationControl:
idTable.TessControlId = _tessControlCache.Add(shader.Code);
break;
case ShaderStage.TessellationEvaluation:
idTable.TessEvaluationId = _tessEvaluationCache.Add(shader.Code);
break;
case ShaderStage.Geometry:
idTable.GeometryId = _geometryCache.Add(shader.Code);
break;
case ShaderStage.Fragment:
idTable.FragmentId = _fragmentCache.Add(shader.Code);
break;
}
}
else
{
idTable.VertexAId = _vertexACache.Add(shader.Code);
}
}
if (!_shaderPrograms.TryGetValue(idTable, out ShaderSpecializationList specList))
{
specList = new ShaderSpecializationList();
_shaderPrograms.Add(idTable, specList);
}
specList.Add(program);
}
/// <summary>
/// Tries to find a cached program.
/// </summary>
/// <remarks>
/// Even if false is returned, <paramref name="guestCode"/> might still contain cached guest code.
/// This can be used to avoid additional allocations for guest code that was already cached.
/// </remarks>
/// <param name="channel">GPU channel</param>
/// <param name="poolState">Texture pool state</param>
/// <param name="graphicsState">Graphics state</param>
/// <param name="addresses">Guest addresses of the shaders to find</param>
/// <param name="program">Cached host program for the given state, if found</param>
/// <param name="guestCode">Cached guest code, if any found</param>
/// <returns>True if a cached host program was found, false otherwise</returns>
public bool TryFind(
GpuChannel channel,
ref GpuChannelPoolState poolState,
ref GpuChannelGraphicsState graphicsState,
ShaderAddresses addresses,
out CachedShaderProgram program,
out CachedGraphicsGuestCode guestCode)
{
var memoryManager = channel.MemoryManager;
IdTable idTable = new IdTable();
guestCode = new CachedGraphicsGuestCode();
program = null;
bool found = TryGetId(_vertexACache, memoryManager, addresses.VertexA, out idTable.VertexAId, out guestCode.VertexACode);
found &= TryGetId(_vertexBCache, memoryManager, addresses.VertexB, out idTable.VertexBId, out guestCode.VertexBCode);
found &= TryGetId(_tessControlCache, memoryManager, addresses.TessControl, out idTable.TessControlId, out guestCode.TessControlCode);
found &= TryGetId(_tessEvaluationCache, memoryManager, addresses.TessEvaluation, out idTable.TessEvaluationId, out guestCode.TessEvaluationCode);
found &= TryGetId(_geometryCache, memoryManager, addresses.Geometry, out idTable.GeometryId, out guestCode.GeometryCode);
found &= TryGetId(_fragmentCache, memoryManager, addresses.Fragment, out idTable.FragmentId, out guestCode.FragmentCode);
if (found && _shaderPrograms.TryGetValue(idTable, out ShaderSpecializationList specList))
{
return specList.TryFindForGraphics(channel, ref poolState, ref graphicsState, out program);
}
return false;
}
/// <summary>
/// Tries to get the ID of a single cached shader stage.
/// </summary>
/// <param name="idCache">ID cache of the stage</param>
/// <param name="memoryManager">GPU memory manager</param>
/// <param name="baseAddress">Base address of the shader</param>
/// <param name="id">ID, if found</param>
/// <param name="data">Cached guest code, if found</param>
/// <returns>True if a cached shader is found, false otherwise</returns>
private static bool TryGetId(IdCache idCache, MemoryManager memoryManager, ulong baseAddress, out int id, out byte[] data)
{
if (baseAddress == 0)
{
id = 0;
data = null;
return true;
}
ShaderCodeAccessor codeAccessor = new ShaderCodeAccessor(memoryManager, baseAddress);
return idCache.TryFind(codeAccessor, out id, out data);
}
/// <summary>
/// Gets all programs that have been added to the table.
/// </summary>
/// <returns>Programs added to the table</returns>
public IEnumerable<CachedShaderProgram> GetPrograms()
{
foreach (var specList in _shaderPrograms.Values)
{
foreach (var program in specList)
{
yield return program;
}
}
}
}
}

View file

@ -0,0 +1,13 @@
namespace Ryujinx.Graphics.Gpu.Shader
{
/// <summary>Shader cache loading states</summary>
public enum ShaderCacheState
{
/// <summary>Shader cache started loading</summary>
Start,
/// <summary>Shader cache is loading</summary>
Loading,
/// <summary>Shader cache finished loading</summary>
Loaded
}
}

View file

@ -0,0 +1,32 @@
using Ryujinx.Graphics.Gpu.Memory;
using Ryujinx.Graphics.Gpu.Shader.HashTable;
using System;
namespace Ryujinx.Graphics.Gpu.Shader
{
/// <summary>
/// Shader code accessor.
/// </summary>
readonly struct ShaderCodeAccessor : IDataAccessor
{
private readonly MemoryManager _memoryManager;
private readonly ulong _baseAddress;
/// <summary>
/// Creates a new shader code accessor.
/// </summary>
/// <param name="memoryManager">Memory manager used to access the shader code</param>
/// <param name="baseAddress">Base address of the shader in memory</param>
public ShaderCodeAccessor(MemoryManager memoryManager, ulong baseAddress)
{
_memoryManager = memoryManager;
_baseAddress = baseAddress;
}
/// <inheritdoc/>
public ReadOnlySpan<byte> GetSpan(int offset, int length)
{
return _memoryManager.GetSpanMapped(_baseAddress + (ulong)offset, length);
}
}
}

View file

@ -0,0 +1,49 @@
using Ryujinx.Graphics.Shader;
namespace Ryujinx.Graphics.Gpu.Shader
{
/// <summary>
/// Paths where shader code was dumped on disk.
/// </summary>
readonly struct ShaderDumpPaths
{
/// <summary>
/// Path where the full shader code with header was dumped, or null if not dumped.
/// </summary>
public string FullPath { get; }
/// <summary>
/// Path where the shader code without header was dumped, or null if not dumped.
/// </summary>
public string CodePath { get; }
/// <summary>
/// True if the shader was dumped, false otherwise.
/// </summary>
public bool HasPath => FullPath != null && CodePath != null;
/// <summary>
/// Creates a new shader dumps path structure.
/// </summary>
/// <param name="fullPath">Path where the full shader code with header was dumped, or null if not dumped</param>
/// <param name="codePath">Path where the shader code without header was dumped, or null if not dumped</param>
public ShaderDumpPaths(string fullPath, string codePath)
{
FullPath = fullPath;
CodePath = codePath;
}
/// <summary>
/// Prepends the shader paths on the program source, as a comment.
/// </summary>
/// <param name="program">Program to prepend into</param>
public void Prepend(ShaderProgram program)
{
if (HasPath)
{
program.Prepend("// " + CodePath);
program.Prepend("// " + FullPath);
}
}
}
}

View file

@ -0,0 +1,129 @@
using System.IO;
namespace Ryujinx.Graphics.Gpu.Shader
{
/// <summary>
/// Shader dumper, writes binary shader code to disk.
/// </summary>
class ShaderDumper
{
private string _runtimeDir;
private string _dumpPath;
/// <summary>
/// Current index of the shader dump binary file.
/// This is incremented after each save, in order to give unique names to the files.
/// </summary>
public int CurrentDumpIndex { get; private set; }
/// <summary>
/// Creates a new instance of the shader dumper.
/// </summary>
public ShaderDumper()
{
CurrentDumpIndex = 1;
}
/// <summary>
/// Dumps shader code to disk.
/// </summary>
/// <param name="code">Code to be dumped</param>
/// <param name="compute">True for compute shader code, false for graphics shader code</param>
/// <returns>Paths where the shader code was dumped</returns>
public ShaderDumpPaths Dump(byte[] code, bool compute)
{
_dumpPath = GraphicsConfig.ShadersDumpPath;
if (string.IsNullOrWhiteSpace(_dumpPath))
{
return default;
}
string fileName = "Shader" + CurrentDumpIndex.ToString("d4") + ".bin";
string fullPath = Path.Combine(FullDir(), fileName);
string codePath = Path.Combine(CodeDir(), fileName);
CurrentDumpIndex++;
using MemoryStream stream = new MemoryStream(code);
BinaryReader codeReader = new BinaryReader(stream);
using FileStream fullFile = File.Create(fullPath);
using FileStream codeFile = File.Create(codePath);
BinaryWriter fullWriter = new BinaryWriter(fullFile);
BinaryWriter codeWriter = new BinaryWriter(codeFile);
int headerSize = compute ? 0 : 0x50;
fullWriter.Write(codeReader.ReadBytes(headerSize));
byte[] temp = codeReader.ReadBytes(code.Length - headerSize);
fullWriter.Write(temp);
codeWriter.Write(temp);
// Align to meet nvdisasm requirements.
while (codeFile.Length % 0x20 != 0)
{
codeWriter.Write(0);
}
return new ShaderDumpPaths(fullPath, codePath);
}
/// <summary>
/// Returns the output directory for shader code with header.
/// </summary>
/// <returns>Directory path</returns>
private string FullDir()
{
return CreateAndReturn(Path.Combine(DumpDir(), "Full"));
}
/// <summary>
/// Returns the output directory for shader code without header.
/// </summary>
/// <returns>Directory path</returns>
private string CodeDir()
{
return CreateAndReturn(Path.Combine(DumpDir(), "Code"));
}
/// <summary>
/// Returns the full output directory for the current shader dump.
/// </summary>
/// <returns>Directory path</returns>
private string DumpDir()
{
if (string.IsNullOrEmpty(_runtimeDir))
{
int index = 1;
do
{
_runtimeDir = Path.Combine(_dumpPath, "Dumps" + index.ToString("d2"));
index++;
}
while (Directory.Exists(_runtimeDir));
Directory.CreateDirectory(_runtimeDir);
}
return _runtimeDir;
}
/// <summary>
/// Creates a new specified directory if needed.
/// </summary>
/// <param name="dir">The directory to create</param>
/// <returns>The same directory passed to the method</returns>
private static string CreateAndReturn(string dir)
{
Directory.CreateDirectory(dir);
return dir;
}
}
}

View file

@ -0,0 +1,84 @@
using System.Collections;
using System.Collections.Generic;
namespace Ryujinx.Graphics.Gpu.Shader
{
/// <summary>
/// List of cached shader programs that differs only by specialization state.
/// </summary>
class ShaderSpecializationList : IEnumerable<CachedShaderProgram>
{
private readonly List<CachedShaderProgram> _entries = new List<CachedShaderProgram>();
/// <summary>
/// Adds a program to the list.
/// </summary>
/// <param name="program">Program to be added</param>
public void Add(CachedShaderProgram program)
{
_entries.Add(program);
}
/// <summary>
/// Tries to find an existing 3D program on the cache.
/// </summary>
/// <param name="channel">GPU channel</param>
/// <param name="poolState">Texture pool state</param>
/// <param name="graphicsState">Graphics state</param>
/// <param name="program">Cached program, if found</param>
/// <returns>True if a compatible program is found, false otherwise</returns>
public bool TryFindForGraphics(
GpuChannel channel,
ref GpuChannelPoolState poolState,
ref GpuChannelGraphicsState graphicsState,
out CachedShaderProgram program)
{
foreach (var entry in _entries)
{
bool usesDrawParameters = entry.Shaders[1]?.Info.UsesDrawParameters ?? false;
if (entry.SpecializationState.MatchesGraphics(channel, ref poolState, ref graphicsState, usesDrawParameters, true))
{
program = entry;
return true;
}
}
program = default;
return false;
}
/// <summary>
/// Tries to find an existing compute program on the cache.
/// </summary>
/// <param name="channel">GPU channel</param>
/// <param name="poolState">Texture pool state</param>
/// <param name="computeState">Compute state</param>
/// <param name="program">Cached program, if found</param>
/// <returns>True if a compatible program is found, false otherwise</returns>
public bool TryFindForCompute(GpuChannel channel, GpuChannelPoolState poolState, GpuChannelComputeState computeState, out CachedShaderProgram program)
{
foreach (var entry in _entries)
{
if (entry.SpecializationState.MatchesCompute(channel, ref poolState, computeState, true))
{
program = entry;
return true;
}
}
program = default;
return false;
}
public IEnumerator<CachedShaderProgram> GetEnumerator()
{
return _entries.GetEnumerator();
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
}
}

View file

@ -0,0 +1,874 @@
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu.Image;
using Ryujinx.Graphics.Gpu.Memory;
using Ryujinx.Graphics.Gpu.Shader.DiskCache;
using Ryujinx.Graphics.Shader;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Gpu.Shader
{
class ShaderSpecializationState
{
private const uint ComsMagic = (byte)'C' | ((byte)'O' << 8) | ((byte)'M' << 16) | ((byte)'S' << 24);
private const uint GfxsMagic = (byte)'G' | ((byte)'F' << 8) | ((byte)'X' << 16) | ((byte)'S' << 24);
private const uint TfbdMagic = (byte)'T' | ((byte)'F' << 8) | ((byte)'B' << 16) | ((byte)'D' << 24);
private const uint TexkMagic = (byte)'T' | ((byte)'E' << 8) | ((byte)'X' << 16) | ((byte)'K' << 24);
private const uint TexsMagic = (byte)'T' | ((byte)'E' << 8) | ((byte)'X' << 16) | ((byte)'S' << 24);
private const uint PgpsMagic = (byte)'P' | ((byte)'G' << 8) | ((byte)'P' << 16) | ((byte)'S' << 24);
/// <summary>
/// Flags indicating GPU state that is used by the shader.
/// </summary>
[Flags]
private enum QueriedStateFlags
{
EarlyZForce = 1 << 0,
PrimitiveTopology = 1 << 1,
TessellationMode = 1 << 2,
TransformFeedback = 1 << 3
}
private QueriedStateFlags _queriedState;
private bool _compute;
private byte _constantBufferUsePerStage;
/// <summary>
/// Compute engine state.
/// </summary>
public GpuChannelComputeState ComputeState;
/// <summary>
/// 3D engine state.
/// </summary>
public GpuChannelGraphicsState GraphicsState;
/// <summary>
/// Contant buffers bound at the time the shader was compiled, per stage.
/// </summary>
public Array5<uint> ConstantBufferUse;
/// <summary>
/// Pipeline state captured at the time of shader use.
/// </summary>
public ProgramPipelineState? PipelineState;
/// <summary>
/// Transform feedback buffers active at the time the shader was compiled.
/// </summary>
public TransformFeedbackDescriptor[] TransformFeedbackDescriptors;
/// <summary>
/// Flags indicating texture state that is used by the shader.
/// </summary>
[Flags]
private enum QueriedTextureStateFlags
{
TextureFormat = 1 << 0,
SamplerType = 1 << 1,
CoordNormalized = 1 << 2
}
/// <summary>
/// Reference type wrapping a value.
/// </summary>
private class Box<T>
{
/// <summary>
/// Wrapped value.
/// </summary>
public T Value;
}
/// <summary>
/// State of a texture or image that is accessed by the shader.
/// </summary>
private struct TextureSpecializationState
{
// New fields should be added to the end of the struct to keep disk shader cache compatibility.
/// <summary>
/// Flags indicating which state of the texture the shader depends on.
/// </summary>
public QueriedTextureStateFlags QueriedFlags;
/// <summary>
/// Encoded texture format value.
/// </summary>
public uint Format;
/// <summary>
/// True if the texture format is sRGB, false otherwise.
/// </summary>
public bool FormatSrgb;
/// <summary>
/// Texture target.
/// </summary>
public TextureTarget TextureTarget;
/// <summary>
/// Indicates if the coordinates used to sample the texture are normalized or not (0.0..1.0 or 0..Width/Height).
/// </summary>
public bool CoordNormalized;
}
/// <summary>
/// Texture binding information, used to identify each texture accessed by the shader.
/// </summary>
private readonly record struct TextureKey
{
// New fields should be added to the end of the struct to keep disk shader cache compatibility.
/// <summary>
/// Shader stage where the texture is used.
/// </summary>
public readonly int StageIndex;
/// <summary>
/// Texture handle offset in words on the texture buffer.
/// </summary>
public readonly int Handle;
/// <summary>
/// Constant buffer slot of the texture buffer (-1 to use the texture buffer index GPU register).
/// </summary>
public readonly int CbufSlot;
/// <summary>
/// Creates a new texture key.
/// </summary>
/// <param name="stageIndex">Shader stage where the texture is used</param>
/// <param name="handle">Texture handle offset in words on the texture buffer</param>
/// <param name="cbufSlot">Constant buffer slot of the texture buffer (-1 to use the texture buffer index GPU register)</param>
public TextureKey(int stageIndex, int handle, int cbufSlot)
{
StageIndex = stageIndex;
Handle = handle;
CbufSlot = cbufSlot;
}
}
private readonly Dictionary<TextureKey, Box<TextureSpecializationState>> _textureSpecialization;
private KeyValuePair<TextureKey, Box<TextureSpecializationState>>[] _allTextures;
private Box<TextureSpecializationState>[][] _textureByBinding;
private Box<TextureSpecializationState>[][] _imageByBinding;
/// <summary>
/// Creates a new instance of the shader specialization state.
/// </summary>
private ShaderSpecializationState()
{
_textureSpecialization = new Dictionary<TextureKey, Box<TextureSpecializationState>>();
}
/// <summary>
/// Creates a new instance of the shader specialization state.
/// </summary>
/// <param name="state">Current compute engine state</param>
public ShaderSpecializationState(ref GpuChannelComputeState state) : this()
{
ComputeState = state;
_compute = true;
}
/// <summary>
/// Creates a new instance of the shader specialization state.
/// </summary>
/// <param name="state">Current 3D engine state</param>
/// <param name="descriptors">Optional transform feedback buffers in use, if any</param>
private ShaderSpecializationState(ref GpuChannelGraphicsState state, TransformFeedbackDescriptor[] descriptors) : this()
{
GraphicsState = state;
_compute = false;
if (descriptors != null)
{
TransformFeedbackDescriptors = descriptors;
_queriedState |= QueriedStateFlags.TransformFeedback;
}
}
/// <summary>
/// Prepare the shader specialization state for quick binding lookups.
/// </summary>
/// <param name="stages">The shader stages</param>
public void Prepare(CachedShaderStage[] stages)
{
_allTextures = _textureSpecialization.ToArray();
_textureByBinding = new Box<TextureSpecializationState>[stages.Length][];
_imageByBinding = new Box<TextureSpecializationState>[stages.Length][];
for (int i = 0; i < stages.Length; i++)
{
CachedShaderStage stage = stages[i];
if (stage?.Info != null)
{
var textures = stage.Info.Textures;
var images = stage.Info.Images;
var texBindings = new Box<TextureSpecializationState>[textures.Count];
var imageBindings = new Box<TextureSpecializationState>[images.Count];
int stageIndex = Math.Max(i - 1, 0); // Don't count VertexA for looking up spec state. No-Op for compute.
for (int j = 0; j < textures.Count; j++)
{
var texture = textures[j];
texBindings[j] = GetTextureSpecState(stageIndex, texture.HandleIndex, texture.CbufSlot);
}
for (int j = 0; j < images.Count; j++)
{
var image = images[j];
imageBindings[j] = GetTextureSpecState(stageIndex, image.HandleIndex, image.CbufSlot);
}
_textureByBinding[i] = texBindings;
_imageByBinding[i] = imageBindings;
}
}
}
/// <summary>
/// Creates a new instance of the shader specialization state.
/// </summary>
/// <param name="state">Current 3D engine state</param>
/// <param name="pipelineState">Current program pipeline state</param>
/// <param name="descriptors">Optional transform feedback buffers in use, if any</param>
public ShaderSpecializationState(
ref GpuChannelGraphicsState state,
ref ProgramPipelineState pipelineState,
TransformFeedbackDescriptor[] descriptors) : this(ref state, descriptors)
{
PipelineState = pipelineState;
}
/// <summary>
/// Creates a new instance of the shader specialization state.
/// </summary>
/// <param name="state">Current 3D engine state</param>
/// <param name="pipelineState">Current program pipeline state</param>
/// <param name="descriptors">Optional transform feedback buffers in use, if any</param>
public ShaderSpecializationState(
ref GpuChannelGraphicsState state,
ProgramPipelineState? pipelineState,
TransformFeedbackDescriptor[] descriptors) : this(ref state, descriptors)
{
PipelineState = pipelineState;
}
/// <summary>
/// Indicates that the shader accesses the early Z force state.
/// </summary>
public void RecordEarlyZForce()
{
_queriedState |= QueriedStateFlags.EarlyZForce;
}
/// <summary>
/// Indicates that the shader accesses the primitive topology state.
/// </summary>
public void RecordPrimitiveTopology()
{
_queriedState |= QueriedStateFlags.PrimitiveTopology;
}
/// <summary>
/// Indicates that the shader accesses the tessellation mode state.
/// </summary>
public void RecordTessellationMode()
{
_queriedState |= QueriedStateFlags.TessellationMode;
}
/// <summary>
/// Indicates that the shader accesses the constant buffer use state.
/// </summary>
/// <param name="stageIndex">Shader stage index</param>
/// <param name="useMask">Mask indicating the constant buffers bound at the time of the shader compilation</param>
public void RecordConstantBufferUse(int stageIndex, uint useMask)
{
ConstantBufferUse[stageIndex] = useMask;
_constantBufferUsePerStage |= (byte)(1 << stageIndex);
}
/// <summary>
/// Indicates that a given texture is accessed by the shader.
/// </summary>
/// <param name="stageIndex">Shader stage where the texture is used</param>
/// <param name="handle">Offset in words of the texture handle on the texture buffer</param>
/// <param name="cbufSlot">Slot of the texture buffer constant buffer</param>
/// <param name="descriptor">Descriptor of the texture</param>
public void RegisterTexture(int stageIndex, int handle, int cbufSlot, Image.TextureDescriptor descriptor)
{
Box<TextureSpecializationState> state = GetOrCreateTextureSpecState(stageIndex, handle, cbufSlot);
state.Value.Format = descriptor.UnpackFormat();
state.Value.FormatSrgb = descriptor.UnpackSrgb();
state.Value.TextureTarget = descriptor.UnpackTextureTarget();
state.Value.CoordNormalized = descriptor.UnpackTextureCoordNormalized();
}
/// <summary>
/// Indicates that a given texture is accessed by the shader.
/// </summary>
/// <param name="stageIndex">Shader stage where the texture is used</param>
/// <param name="handle">Offset in words of the texture handle on the texture buffer</param>
/// <param name="cbufSlot">Slot of the texture buffer constant buffer</param>
/// <param name="format">Maxwell texture format value</param>
/// <param name="formatSrgb">Whenever the texture format is a sRGB format</param>
/// <param name="target">Texture target type</param>
/// <param name="coordNormalized">Whenever the texture coordinates used on the shader are considered normalized</param>
public void RegisterTexture(
int stageIndex,
int handle,
int cbufSlot,
uint format,
bool formatSrgb,
TextureTarget target,
bool coordNormalized)
{
Box<TextureSpecializationState> state = GetOrCreateTextureSpecState(stageIndex, handle, cbufSlot);
state.Value.Format = format;
state.Value.FormatSrgb = formatSrgb;
state.Value.TextureTarget = target;
state.Value.CoordNormalized = coordNormalized;
}
/// <summary>
/// Indicates that the format of a given texture was used during the shader translation process.
/// </summary>
/// <param name="stageIndex">Shader stage where the texture is used</param>
/// <param name="handle">Offset in words of the texture handle on the texture buffer</param>
/// <param name="cbufSlot">Slot of the texture buffer constant buffer</param>
public void RecordTextureFormat(int stageIndex, int handle, int cbufSlot)
{
Box<TextureSpecializationState> state = GetOrCreateTextureSpecState(stageIndex, handle, cbufSlot);
state.Value.QueriedFlags |= QueriedTextureStateFlags.TextureFormat;
}
/// <summary>
/// Indicates that the target of a given texture was used during the shader translation process.
/// </summary>
/// <param name="stageIndex">Shader stage where the texture is used</param>
/// <param name="handle">Offset in words of the texture handle on the texture buffer</param>
/// <param name="cbufSlot">Slot of the texture buffer constant buffer</param>
public void RecordTextureSamplerType(int stageIndex, int handle, int cbufSlot)
{
Box<TextureSpecializationState> state = GetOrCreateTextureSpecState(stageIndex, handle, cbufSlot);
state.Value.QueriedFlags |= QueriedTextureStateFlags.SamplerType;
}
/// <summary>
/// Indicates that the coordinate normalization state of a given texture was used during the shader translation process.
/// </summary>
/// <param name="stageIndex">Shader stage where the texture is used</param>
/// <param name="handle">Offset in words of the texture handle on the texture buffer</param>
/// <param name="cbufSlot">Slot of the texture buffer constant buffer</param>
public void RecordTextureCoordNormalized(int stageIndex, int handle, int cbufSlot)
{
Box<TextureSpecializationState> state = GetOrCreateTextureSpecState(stageIndex, handle, cbufSlot);
state.Value.QueriedFlags |= QueriedTextureStateFlags.CoordNormalized;
}
/// <summary>
/// Checks if primitive topology was queried by the shader.
/// </summary>
/// <returns>True if queried, false otherwise</returns>
public bool IsPrimitiveTopologyQueried()
{
return _queriedState.HasFlag(QueriedStateFlags.PrimitiveTopology);
}
/// <summary>
/// Checks if a given texture was registerd on this specialization state.
/// </summary>
/// <param name="stageIndex">Shader stage where the texture is used</param>
/// <param name="handle">Offset in words of the texture handle on the texture buffer</param>
/// <param name="cbufSlot">Slot of the texture buffer constant buffer</param>
public bool TextureRegistered(int stageIndex, int handle, int cbufSlot)
{
return GetTextureSpecState(stageIndex, handle, cbufSlot) != null;
}
/// <summary>
/// Gets the recorded format of a given texture.
/// </summary>
/// <param name="stageIndex">Shader stage where the texture is used</param>
/// <param name="handle">Offset in words of the texture handle on the texture buffer</param>
/// <param name="cbufSlot">Slot of the texture buffer constant buffer</param>
public (uint, bool) GetFormat(int stageIndex, int handle, int cbufSlot)
{
TextureSpecializationState state = GetTextureSpecState(stageIndex, handle, cbufSlot).Value;
return (state.Format, state.FormatSrgb);
}
/// <summary>
/// Gets the recorded target of a given texture.
/// </summary>
/// <param name="stageIndex">Shader stage where the texture is used</param>
/// <param name="handle">Offset in words of the texture handle on the texture buffer</param>
/// <param name="cbufSlot">Slot of the texture buffer constant buffer</param>
public TextureTarget GetTextureTarget(int stageIndex, int handle, int cbufSlot)
{
return GetTextureSpecState(stageIndex, handle, cbufSlot).Value.TextureTarget;
}
/// <summary>
/// Gets the recorded coordinate normalization state of a given texture.
/// </summary>
/// <param name="stageIndex">Shader stage where the texture is used</param>
/// <param name="handle">Offset in words of the texture handle on the texture buffer</param>
/// <param name="cbufSlot">Slot of the texture buffer constant buffer</param>
public bool GetCoordNormalized(int stageIndex, int handle, int cbufSlot)
{
return GetTextureSpecState(stageIndex, handle, cbufSlot).Value.CoordNormalized;
}
/// <summary>
/// Gets texture specialization state for a given texture, or create a new one if not present.
/// </summary>
/// <param name="stageIndex">Shader stage where the texture is used</param>
/// <param name="handle">Offset in words of the texture handle on the texture buffer</param>
/// <param name="cbufSlot">Slot of the texture buffer constant buffer</param>
/// <returns>Texture specialization state</returns>
private Box<TextureSpecializationState> GetOrCreateTextureSpecState(int stageIndex, int handle, int cbufSlot)
{
TextureKey key = new TextureKey(stageIndex, handle, cbufSlot);
if (!_textureSpecialization.TryGetValue(key, out Box<TextureSpecializationState> state))
{
_textureSpecialization.Add(key, state = new Box<TextureSpecializationState>());
}
return state;
}
/// <summary>
/// Gets texture specialization state for a given texture.
/// </summary>
/// <param name="stageIndex">Shader stage where the texture is used</param>
/// <param name="handle">Offset in words of the texture handle on the texture buffer</param>
/// <param name="cbufSlot">Slot of the texture buffer constant buffer</param>
/// <returns>Texture specialization state</returns>
private Box<TextureSpecializationState> GetTextureSpecState(int stageIndex, int handle, int cbufSlot)
{
TextureKey key = new TextureKey(stageIndex, handle, cbufSlot);
if (_textureSpecialization.TryGetValue(key, out Box<TextureSpecializationState> state))
{
return state;
}
return null;
}
/// <summary>
/// Checks if the recorded state matches the current GPU 3D engine state.
/// </summary>
/// <param name="channel">GPU channel</param>
/// <param name="poolState">Texture pool state</param>
/// <param name="graphicsState">Graphics state</param>
/// <param name="usesDrawParameters">Indicates whether the vertex shader accesses draw parameters</param>
/// <param name="checkTextures">Indicates whether texture descriptors should be checked</param>
/// <returns>True if the state matches, false otherwise</returns>
public bool MatchesGraphics(
GpuChannel channel,
ref GpuChannelPoolState poolState,
ref GpuChannelGraphicsState graphicsState,
bool usesDrawParameters,
bool checkTextures)
{
if (graphicsState.ViewportTransformDisable != GraphicsState.ViewportTransformDisable)
{
return false;
}
bool thisA2cDitherEnable = GraphicsState.AlphaToCoverageEnable && GraphicsState.AlphaToCoverageDitherEnable;
bool otherA2cDitherEnable = graphicsState.AlphaToCoverageEnable && graphicsState.AlphaToCoverageDitherEnable;
if (otherA2cDitherEnable != thisA2cDitherEnable)
{
return false;
}
if (graphicsState.DepthMode != GraphicsState.DepthMode)
{
return false;
}
if (graphicsState.AlphaTestEnable != GraphicsState.AlphaTestEnable)
{
return false;
}
if (graphicsState.AlphaTestEnable &&
(graphicsState.AlphaTestCompare != GraphicsState.AlphaTestCompare ||
graphicsState.AlphaTestReference != GraphicsState.AlphaTestReference))
{
return false;
}
if (!graphicsState.AttributeTypes.AsSpan().SequenceEqual(GraphicsState.AttributeTypes.AsSpan()))
{
return false;
}
if (usesDrawParameters && graphicsState.HasConstantBufferDrawParameters != GraphicsState.HasConstantBufferDrawParameters)
{
return false;
}
if (graphicsState.HasUnalignedStorageBuffer != GraphicsState.HasUnalignedStorageBuffer)
{
return false;
}
if (channel.Capabilities.NeedsFragmentOutputSpecialization && !graphicsState.FragmentOutputTypes.AsSpan().SequenceEqual(GraphicsState.FragmentOutputTypes.AsSpan()))
{
return false;
}
if (graphicsState.DualSourceBlendEnable != GraphicsState.DualSourceBlendEnable)
{
return false;
}
return Matches(channel, ref poolState, checkTextures, isCompute: false);
}
/// <summary>
/// Checks if the recorded state matches the current GPU compute engine state.
/// </summary>
/// <param name="channel">GPU channel</param>
/// <param name="poolState">Texture pool state</param>
/// <param name="computeState">Compute state</param>
/// <param name="checkTextures">Indicates whether texture descriptors should be checked</param>
/// <returns>True if the state matches, false otherwise</returns>
public bool MatchesCompute(GpuChannel channel, ref GpuChannelPoolState poolState, GpuChannelComputeState computeState, bool checkTextures)
{
if (computeState.HasUnalignedStorageBuffer != ComputeState.HasUnalignedStorageBuffer)
{
return false;
}
return Matches(channel, ref poolState, checkTextures, isCompute: true);
}
/// <summary>
/// Fetch the constant buffers used for a texture to cache.
/// </summary>
/// <param name="channel">GPU channel</param>
/// <param name="isCompute">Indicates whenever the check is requested by the 3D or compute engine</param>
/// <param name="cachedTextureBufferIndex">The currently cached texture buffer index</param>
/// <param name="cachedSamplerBufferIndex">The currently cached sampler buffer index</param>
/// <param name="cachedTextureBuffer">The currently cached texture buffer data</param>
/// <param name="cachedSamplerBuffer">The currently cached sampler buffer data</param>
/// <param name="cachedStageIndex">The currently cached stage</param>
/// <param name="textureBufferIndex">The new texture buffer index</param>
/// <param name="samplerBufferIndex">The new sampler buffer index</param>
/// <param name="stageIndex">Stage index of the constant buffer</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void UpdateCachedBuffer(
GpuChannel channel,
bool isCompute,
scoped ref int cachedTextureBufferIndex,
scoped ref int cachedSamplerBufferIndex,
scoped ref ReadOnlySpan<int> cachedTextureBuffer,
scoped ref ReadOnlySpan<int> cachedSamplerBuffer,
scoped ref int cachedStageIndex,
int textureBufferIndex,
int samplerBufferIndex,
int stageIndex)
{
bool stageChange = stageIndex != cachedStageIndex;
if (stageChange || textureBufferIndex != cachedTextureBufferIndex)
{
ref BufferBounds bounds = ref channel.BufferManager.GetUniformBufferBounds(isCompute, stageIndex, textureBufferIndex);
cachedTextureBuffer = MemoryMarshal.Cast<byte, int>(channel.MemoryManager.Physical.GetSpan(bounds.Address, (int)bounds.Size));
cachedTextureBufferIndex = textureBufferIndex;
if (samplerBufferIndex == textureBufferIndex)
{
cachedSamplerBuffer = cachedTextureBuffer;
cachedSamplerBufferIndex = samplerBufferIndex;
}
}
if (stageChange || samplerBufferIndex != cachedSamplerBufferIndex)
{
ref BufferBounds bounds = ref channel.BufferManager.GetUniformBufferBounds(isCompute, stageIndex, samplerBufferIndex);
cachedSamplerBuffer = MemoryMarshal.Cast<byte, int>(channel.MemoryManager.Physical.GetSpan(bounds.Address, (int)bounds.Size));
cachedSamplerBufferIndex = samplerBufferIndex;
}
cachedStageIndex = stageIndex;
}
/// <summary>
/// Checks if the recorded state matches the current GPU state.
/// </summary>
/// <param name="channel">GPU channel</param>
/// <param name="poolState">Texture pool state</param>
/// <param name="checkTextures">Indicates whether texture descriptors should be checked</param>
/// <param name="isCompute">Indicates whenever the check is requested by the 3D or compute engine</param>
/// <returns>True if the state matches, false otherwise</returns>
private bool Matches(GpuChannel channel, ref GpuChannelPoolState poolState, bool checkTextures, bool isCompute)
{
int constantBufferUsePerStageMask = _constantBufferUsePerStage;
while (constantBufferUsePerStageMask != 0)
{
int index = BitOperations.TrailingZeroCount(constantBufferUsePerStageMask);
uint useMask = isCompute
? channel.BufferManager.GetComputeUniformBufferUseMask()
: channel.BufferManager.GetGraphicsUniformBufferUseMask(index);
if (ConstantBufferUse[index] != useMask)
{
return false;
}
constantBufferUsePerStageMask &= ~(1 << index);
}
if (checkTextures)
{
TexturePool pool = channel.TextureManager.GetTexturePool(poolState.TexturePoolGpuVa, poolState.TexturePoolMaximumId);
int cachedTextureBufferIndex = -1;
int cachedSamplerBufferIndex = -1;
int cachedStageIndex = -1;
ReadOnlySpan<int> cachedTextureBuffer = Span<int>.Empty;
ReadOnlySpan<int> cachedSamplerBuffer = Span<int>.Empty;
foreach (var kv in _allTextures)
{
TextureKey textureKey = kv.Key;
(int textureBufferIndex, int samplerBufferIndex) = TextureHandle.UnpackSlots(textureKey.CbufSlot, poolState.TextureBufferIndex);
UpdateCachedBuffer(channel,
isCompute,
ref cachedTextureBufferIndex,
ref cachedSamplerBufferIndex,
ref cachedTextureBuffer,
ref cachedSamplerBuffer,
ref cachedStageIndex,
textureBufferIndex,
samplerBufferIndex,
textureKey.StageIndex);
int packedId = TextureHandle.ReadPackedId(textureKey.Handle, cachedTextureBuffer, cachedSamplerBuffer);
int textureId = TextureHandle.UnpackTextureId(packedId);
if (pool.IsValidId(textureId))
{
ref readonly Image.TextureDescriptor descriptor = ref pool.GetDescriptorRef(textureId);
if (!MatchesTexture(kv.Value, descriptor))
{
return false;
}
}
}
}
return true;
}
/// <summary>
/// Checks if the recorded texture state matches the given texture descriptor.
/// </summary>
/// <param name="specializationState">Texture specialization state</param>
/// <param name="descriptor">Texture descriptor</param>
/// <returns>True if the state matches, false otherwise</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private bool MatchesTexture(Box<TextureSpecializationState> specializationState, in Image.TextureDescriptor descriptor)
{
if (specializationState != null)
{
if (specializationState.Value.QueriedFlags.HasFlag(QueriedTextureStateFlags.CoordNormalized) &&
specializationState.Value.CoordNormalized != descriptor.UnpackTextureCoordNormalized())
{
return false;
}
}
return true;
}
/// <summary>
/// Checks if the recorded texture state for a given texture binding matches a texture descriptor.
/// </summary>
/// <param name="stage">The shader stage</param>
/// <param name="index">The texture index</param>
/// <param name="descriptor">Texture descriptor</param>
/// <returns>True if the state matches, false otherwise</returns>
public bool MatchesTexture(ShaderStage stage, int index, in Image.TextureDescriptor descriptor)
{
Box<TextureSpecializationState> specializationState = _textureByBinding[(int)stage][index];
return MatchesTexture(specializationState, descriptor);
}
/// <summary>
/// Checks if the recorded texture state for a given image binding matches a texture descriptor.
/// </summary>
/// <param name="stage">The shader stage</param>
/// <param name="index">The texture index</param>
/// <param name="descriptor">Texture descriptor</param>
/// <returns>True if the state matches, false otherwise</returns>
public bool MatchesImage(ShaderStage stage, int index, in Image.TextureDescriptor descriptor)
{
Box<TextureSpecializationState> specializationState = _imageByBinding[(int)stage][index];
return MatchesTexture(specializationState, descriptor);
}
/// <summary>
/// Reads shader specialization state that has been serialized.
/// </summary>
/// <param name="dataReader">Data reader</param>
/// <returns>Shader specialization state</returns>
public static ShaderSpecializationState Read(ref BinarySerializer dataReader)
{
ShaderSpecializationState specState = new ShaderSpecializationState();
dataReader.Read(ref specState._queriedState);
dataReader.Read(ref specState._compute);
if (specState._compute)
{
dataReader.ReadWithMagicAndSize(ref specState.ComputeState, ComsMagic);
}
else
{
dataReader.ReadWithMagicAndSize(ref specState.GraphicsState, GfxsMagic);
}
dataReader.Read(ref specState._constantBufferUsePerStage);
int constantBufferUsePerStageMask = specState._constantBufferUsePerStage;
while (constantBufferUsePerStageMask != 0)
{
int index = BitOperations.TrailingZeroCount(constantBufferUsePerStageMask);
dataReader.Read(ref specState.ConstantBufferUse[index]);
constantBufferUsePerStageMask &= ~(1 << index);
}
bool hasPipelineState = false;
dataReader.Read(ref hasPipelineState);
if (hasPipelineState)
{
ProgramPipelineState pipelineState = default;
dataReader.ReadWithMagicAndSize(ref pipelineState, PgpsMagic);
specState.PipelineState = pipelineState;
}
if (specState._queriedState.HasFlag(QueriedStateFlags.TransformFeedback))
{
ushort tfCount = 0;
dataReader.Read(ref tfCount);
specState.TransformFeedbackDescriptors = new TransformFeedbackDescriptor[tfCount];
for (int index = 0; index < tfCount; index++)
{
dataReader.ReadWithMagicAndSize(ref specState.TransformFeedbackDescriptors[index], TfbdMagic);
}
}
ushort count = 0;
dataReader.Read(ref count);
for (int index = 0; index < count; index++)
{
TextureKey textureKey = default;
Box<TextureSpecializationState> textureState = new Box<TextureSpecializationState>();
dataReader.ReadWithMagicAndSize(ref textureKey, TexkMagic);
dataReader.ReadWithMagicAndSize(ref textureState.Value, TexsMagic);
specState._textureSpecialization[textureKey] = textureState;
}
return specState;
}
/// <summary>
/// Serializes the shader specialization state.
/// </summary>
/// <param name="dataWriter">Data writer</param>
public void Write(ref BinarySerializer dataWriter)
{
dataWriter.Write(ref _queriedState);
dataWriter.Write(ref _compute);
if (_compute)
{
dataWriter.WriteWithMagicAndSize(ref ComputeState, ComsMagic);
}
else
{
dataWriter.WriteWithMagicAndSize(ref GraphicsState, GfxsMagic);
}
dataWriter.Write(ref _constantBufferUsePerStage);
int constantBufferUsePerStageMask = _constantBufferUsePerStage;
while (constantBufferUsePerStageMask != 0)
{
int index = BitOperations.TrailingZeroCount(constantBufferUsePerStageMask);
dataWriter.Write(ref ConstantBufferUse[index]);
constantBufferUsePerStageMask &= ~(1 << index);
}
bool hasPipelineState = PipelineState.HasValue;
dataWriter.Write(ref hasPipelineState);
if (hasPipelineState)
{
ProgramPipelineState pipelineState = PipelineState.Value;
dataWriter.WriteWithMagicAndSize(ref pipelineState, PgpsMagic);
}
if (_queriedState.HasFlag(QueriedStateFlags.TransformFeedback))
{
ushort tfCount = (ushort)TransformFeedbackDescriptors.Length;
dataWriter.Write(ref tfCount);
for (int index = 0; index < TransformFeedbackDescriptors.Length; index++)
{
dataWriter.WriteWithMagicAndSize(ref TransformFeedbackDescriptors[index], TfbdMagic);
}
}
ushort count = (ushort)_textureSpecialization.Count;
dataWriter.Write(ref count);
foreach (var kv in _textureSpecialization)
{
var textureKey = kv.Key;
var textureState = kv.Value;
dataWriter.WriteWithMagicAndSize(ref textureKey, TexkMagic);
dataWriter.WriteWithMagicAndSize(ref textureState.Value, TexsMagic);
}
}
}
}

View file

@ -0,0 +1,58 @@
using Ryujinx.Common.Memory;
using System;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Gpu.Shader
{
/// <summary>
/// Transform feedback descriptor.
/// </summary>
struct TransformFeedbackDescriptor
{
// New fields should be added to the end of the struct to keep disk shader cache compatibility.
/// <summary>
/// Index of the transform feedback.
/// </summary>
public readonly int BufferIndex;
/// <summary>
/// Amount of bytes consumed per vertex.
/// </summary>
public readonly int Stride;
/// <summary>
/// Number of varyings written into the buffer.
/// </summary>
public readonly int VaryingCount;
/// <summary>
/// Location of varyings to be written into the buffer. Each byte is one location.
/// </summary>
public Array32<uint> VaryingLocations; // Making this readonly breaks AsSpan
/// <summary>
/// Creates a new transform feedback descriptor.
/// </summary>
/// <param name="bufferIndex">Index of the transform feedback</param>
/// <param name="stride">Amount of bytes consumed per vertex</param>
/// <param name="varyingCount">Number of varyings written into the buffer. Indicates size in bytes of <paramref name="varyingLocations"/></param>
/// <param name="varyingLocations">Location of varyings to be written into the buffer. Each byte is one location</param>
public TransformFeedbackDescriptor(int bufferIndex, int stride, int varyingCount, ref Array32<uint> varyingLocations)
{
BufferIndex = bufferIndex;
Stride = stride;
VaryingCount = varyingCount;
VaryingLocations = varyingLocations;
}
/// <summary>
/// Gets a span of the <see cref="VaryingLocations"/>.
/// </summary>
/// <returns>Span of varying locations</returns>
public ReadOnlySpan<byte> AsSpan()
{
return MemoryMarshal.Cast<uint, byte>(VaryingLocations.AsSpan()).Slice(0, Math.Min(128, VaryingCount));
}
}
}