GPU: Add HLE macros for popular NVN macros (#5761)

* GPU: Add HLE macros for popular NVN macros

* Remove non-vector equality check

The case where it's not hardware accelerated will do the check integer-wise anyways.

* Whitespace 😔

* Address Feedback
This commit is contained in:
riperiperi 2023-10-06 23:55:07 +01:00 committed by GitHub
parent 086564c3c8
commit f460ecc182
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 467 additions and 14 deletions

View file

@ -1,7 +1,10 @@
using Ryujinx.Common.Logging;
using Ryujinx.Common.Memory;
using Ryujinx.Graphics.Device;
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu.Engine.GPFifo;
using Ryujinx.Graphics.Gpu.Engine.Threed;
using Ryujinx.Graphics.Gpu.Engine.Types;
using System;
using System.Collections.Generic;
@ -15,9 +18,18 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
private const int ColorLayerCountOffset = 0x818;
private const int ColorStructSize = 0x40;
private const int ZetaLayerCountOffset = 0x1230;
private const int UniformBufferBindVertexOffset = 0x2410;
private const int FirstVertexOffset = 0x1434;
private const int IndirectIndexedDataEntrySize = 0x14;
private const int LogicOpOffset = 0x19c4;
private const int ShaderIdScratchOffset = 0x3470;
private const int ShaderAddressScratchOffset = 0x3488;
private const int UpdateConstantBufferAddressesBase = 0x34a8;
private const int UpdateConstantBufferSizesBase = 0x34bc;
private const int UpdateConstantBufferAddressCbu = 0x3460;
private readonly GPFifoProcessor _processor;
private readonly MacroHLEFunctionName _functionName;
@ -49,6 +61,9 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
{
switch (_functionName)
{
case MacroHLEFunctionName.BindShaderProgram:
BindShaderProgram(state, arg0);
break;
case MacroHLEFunctionName.ClearColor:
ClearColor(state, arg0);
break;
@ -58,6 +73,9 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
case MacroHLEFunctionName.DrawArraysInstanced:
DrawArraysInstanced(state, arg0);
break;
case MacroHLEFunctionName.DrawElements:
DrawElements(state, arg0);
break;
case MacroHLEFunctionName.DrawElementsInstanced:
DrawElementsInstanced(state, arg0);
break;
@ -67,6 +85,21 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
case MacroHLEFunctionName.MultiDrawElementsIndirectCount:
MultiDrawElementsIndirectCount(state, arg0);
break;
case MacroHLEFunctionName.UpdateBlendState:
UpdateBlendState(state, arg0);
break;
case MacroHLEFunctionName.UpdateColorMasks:
UpdateColorMasks(state, arg0);
break;
case MacroHLEFunctionName.UpdateUniformBufferState:
UpdateUniformBufferState(state, arg0);
break;
case MacroHLEFunctionName.UpdateUniformBufferStateCbu:
UpdateUniformBufferStateCbu(state, arg0);
break;
case MacroHLEFunctionName.UpdateUniformBufferStateCbuV2:
UpdateUniformBufferStateCbuV2(state, arg0);
break;
default:
throw new NotImplementedException(_functionName.ToString());
}
@ -75,6 +108,149 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
Fifo.Clear();
}
/// <summary>
/// Binds a shader program with the index in arg0.
/// </summary>
/// <param name="state">GPU state at the time of the call</param>
/// <param name="arg0">First argument of the call</param>
private void BindShaderProgram(IDeviceState state, int arg0)
{
int scratchOffset = ShaderIdScratchOffset + arg0 * 4;
int lastId = state.Read(scratchOffset);
int id = FetchParam().Word;
int offset = FetchParam().Word;
if (lastId == id)
{
FetchParam();
FetchParam();
return;
}
_processor.ThreedClass.SetShaderOffset(arg0, (uint)offset);
// Removes overflow on the method address into the increment portion.
// Present in the original macro.
int addrMask = unchecked((int)0xfffc0fff) << 2;
state.Write(scratchOffset & addrMask, id);
state.Write((ShaderAddressScratchOffset + arg0 * 4) & addrMask, offset);
int stage = FetchParam().Word;
uint cbAddress = (uint)FetchParam().Word;
_processor.ThreedClass.UpdateUniformBufferState(65536, cbAddress >> 24, cbAddress << 8);
int stageOffset = (stage & 0x7f) << 3;
state.Write((UniformBufferBindVertexOffset + stageOffset * 4) & addrMask, 17);
}
/// <summary>
/// Updates uniform buffer state for update or bind.
/// </summary>
/// <param name="state">GPU state at the time of the call</param>
/// <param name="arg0">First argument of the call</param>
private void UpdateUniformBufferState(IDeviceState state, int arg0)
{
uint address = (uint)state.Read(UpdateConstantBufferAddressesBase + arg0 * 4);
int size = state.Read(UpdateConstantBufferSizesBase + arg0 * 4);
_processor.ThreedClass.UpdateUniformBufferState(size, address >> 24, address << 8);
}
/// <summary>
/// Updates uniform buffer state for update.
/// </summary>
/// <param name="state">GPU state at the time of the call</param>
/// <param name="arg0">First argument of the call</param>
private void UpdateUniformBufferStateCbu(IDeviceState state, int arg0)
{
uint address = (uint)state.Read(UpdateConstantBufferAddressCbu);
UniformBufferState ubState = new()
{
Address = new()
{
High = address >> 24,
Low = address << 8
},
Size = 24320,
Offset = arg0 << 2
};
_processor.ThreedClass.UpdateUniformBufferState(ubState);
}
/// <summary>
/// Updates uniform buffer state for update.
/// </summary>
/// <param name="state">GPU state at the time of the call</param>
/// <param name="arg0">First argument of the call</param>
private void UpdateUniformBufferStateCbuV2(IDeviceState state, int arg0)
{
uint address = (uint)state.Read(UpdateConstantBufferAddressCbu);
UniformBufferState ubState = new()
{
Address = new()
{
High = address >> 24,
Low = address << 8
},
Size = 28672,
Offset = arg0 << 2
};
_processor.ThreedClass.UpdateUniformBufferState(ubState);
}
/// <summary>
/// Updates blend enable using the given argument.
/// </summary>
/// <param name="state">GPU state at the time of the call</param>
/// <param name="arg0">First argument of the call</param>
private void UpdateBlendState(IDeviceState state, int arg0)
{
state.Write(LogicOpOffset, 0);
Array8<Boolean32> enable = new();
for (int i = 0; i < 8; i++)
{
enable[i] = new Boolean32((uint)(arg0 >> (i + 8)) & 1);
}
_processor.ThreedClass.UpdateBlendEnable(ref enable);
}
/// <summary>
/// Updates color masks using the given argument and three pushed arguments.
/// </summary>
/// <param name="state">GPU state at the time of the call</param>
/// <param name="arg0">First argument of the call</param>
private void UpdateColorMasks(IDeviceState state, int arg0)
{
Array8<RtColorMask> masks = new();
int index = 0;
for (int i = 0; i < 4; i++)
{
masks[index++] = new RtColorMask((uint)arg0 & 0x1fff);
masks[index++] = new RtColorMask(((uint)arg0 >> 16) & 0x1fff);
if (i != 3)
{
arg0 = FetchParam().Word;
}
}
_processor.ThreedClass.UpdateColorMasks(ref masks);
}
/// <summary>
/// Clears one bound color target.
/// </summary>
@ -129,6 +305,36 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
indexed: false);
}
/// <summary>
/// Performs a indexed draw.
/// </summary>
/// <param name="state">GPU state at the time of the call</param>
/// <param name="arg0">First argument of the call</param>
private void DrawElements(IDeviceState state, int arg0)
{
var topology = (PrimitiveTopology)arg0;
var indexAddressHigh = FetchParam();
var indexAddressLow = FetchParam();
var indexType = FetchParam();
var firstIndex = 0;
var indexCount = FetchParam();
_processor.ThreedClass.UpdateIndexBuffer(
(uint)indexAddressHigh.Word,
(uint)indexAddressLow.Word,
(IndexType)indexType.Word);
_processor.ThreedClass.Draw(
topology,
indexCount.Word,
1,
firstIndex,
state.Read(FirstVertexOffset),
0,
indexed: true);
}
/// <summary>
/// Performs a indexed draw.
/// </summary>

View file

@ -6,11 +6,19 @@
enum MacroHLEFunctionName
{
None,
BindShaderProgram,
ClearColor,
ClearDepthStencil,
DrawArraysInstanced,
DrawElements,
DrawElementsInstanced,
DrawElementsIndirect,
MultiDrawElementsIndirectCount,
UpdateBlendState,
UpdateColorMasks,
UpdateUniformBufferState,
UpdateUniformBufferStateCbu,
UpdateUniformBufferStateCbuV2
}
}

View file

@ -46,12 +46,19 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
private static readonly TableEntry[] _table = new TableEntry[]
{
new(MacroHLEFunctionName.BindShaderProgram, new Hash128(0x5d5efb912369f60b, 0x69131ed5019f08ef), 0x68),
new(MacroHLEFunctionName.ClearColor, new Hash128(0xA9FB28D1DC43645A, 0xB177E5D2EAE67FB0), 0x28),
new(MacroHLEFunctionName.ClearDepthStencil, new Hash128(0x1B96CB77D4879F4F, 0x8557032FE0C965FB), 0x24),
new(MacroHLEFunctionName.DrawArraysInstanced, new Hash128(0x197FB416269DBC26, 0x34288C01DDA82202), 0x48),
new(MacroHLEFunctionName.DrawElements, new Hash128(0x3D7F32AE6C2702A7, 0x9353C9F41C1A244D), 0x20),
new(MacroHLEFunctionName.DrawElementsInstanced, new Hash128(0x1A501FD3D54EC8E0, 0x6CF570CF79DA74D6), 0x5c),
new(MacroHLEFunctionName.DrawElementsIndirect, new Hash128(0x86A3E8E903AF8F45, 0xD35BBA07C23860A4), 0x7c),
new(MacroHLEFunctionName.MultiDrawElementsIndirectCount, new Hash128(0x890AF57ED3FB1C37, 0x35D0C95C61F5386F), 0x19C),
new(MacroHLEFunctionName.UpdateBlendState, new Hash128(0x40F6D4E7B08D7640, 0x82167BEEAECB959F), 0x28),
new(MacroHLEFunctionName.UpdateColorMasks, new Hash128(0x9EE32420B8441DFD, 0x6E7724759A57333E), 0x24),
new(MacroHLEFunctionName.UpdateUniformBufferState, new Hash128(0x8EE66706049CB0B0, 0x51C1CF906EC86F7C), 0x20),
new(MacroHLEFunctionName.UpdateUniformBufferStateCbu, new Hash128(0xA4592676A3E581A0, 0xA39E77FE19FE04AC), 0x18),
new(MacroHLEFunctionName.UpdateUniformBufferStateCbuV2, new Hash128(0x392FA750489983D4, 0x35BACE455155D2C3), 0x18)
};
/// <summary>
@ -62,18 +69,14 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME
/// <returns>True if the host supports the HLE macro, false otherwise</returns>
private static bool IsMacroHLESupported(Capabilities caps, MacroHLEFunctionName name)
{
if (name == MacroHLEFunctionName.ClearColor ||
name == MacroHLEFunctionName.ClearDepthStencil ||
name == MacroHLEFunctionName.DrawArraysInstanced ||
name == MacroHLEFunctionName.DrawElementsInstanced ||
name == MacroHLEFunctionName.DrawElementsIndirect)
{
return true;
}
else if (name == MacroHLEFunctionName.MultiDrawElementsIndirectCount)
if (name == MacroHLEFunctionName.MultiDrawElementsIndirectCount)
{
return caps.SupportsIndirectParameters;
}
else if (name != MacroHLEFunctionName.None)
{
return true;
}
return false;
}