GPU: Migrate buffers on GPU project, pre-emptively flush device local mappings (#6794)

* GPU: Migrate buffers on GPU project, pre-emptively flush device local mappings

Essentially retreading #4540, but it's on the GPU project now instead of the backend. This allows us to have a lot more control + knowledge of where the buffer backing has been changed and allows us to pre-emptively flush pages to host memory for quicker readback. It will allow us to do other stuff in the future, but we'll get there when we get there.

Performance greatly improved in Hyrule Warriors: Age of Calamity. Performance notably improved in TOTK (average). Performance for BOTW restored to how it was before #4911, perhaps a bit better.

- Rewrites a bunch of buffer migration stuff. Might want to tighten up how dispose stuff works.
- Fixed an issue where the copy for texture pre-flush would happen _after_ the syncpoint.

TODO: remove a page from pre-flush if it isn't flushed after a certain number of copies.

* Add copy deactivation

* Fix dependent virtual buffers

* Remove logging

* Fix format issues (maybe)

* Vulkan: Remove backing swap

* Add explicit memory access types for most buffers

* Fix typo

* Add device local force expiry, change buffer inheritance behaviour

* General cleanup, OGL fix

* BufferPreFlush comments

* BufferBackingState comments

* Add an extra precaution to BufferMigration

This is very unlikely, but it's important to cover loose ends like this.

* Address some feedback

* Docs
This commit is contained in:
riperiperi 2024-05-19 20:53:37 +01:00 committed by GitHub
parent 2f427deb67
commit eb1ce41b00
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
29 changed files with 1342 additions and 523 deletions

View file

@ -6,8 +6,13 @@ namespace Ryujinx.Graphics.GAL
public enum BufferAccess
{
Default = 0,
FlushPersistent = 1 << 0,
Stream = 1 << 1,
SparseCompatible = 1 << 2,
HostMemory = 1,
DeviceMemory = 2,
DeviceMemoryMapped = 3,
MemoryTypeMask = 0xf,
Stream = 1 << 4,
SparseCompatible = 1 << 5,
}
}

View file

@ -6,6 +6,7 @@ namespace Ryujinx.Graphics.GAL
{
public readonly TargetApi Api;
public readonly string VendorName;
public readonly SystemMemoryType MemoryType;
public readonly bool HasFrontFacingBug;
public readonly bool HasVectorIndexingBug;
@ -66,6 +67,7 @@ namespace Ryujinx.Graphics.GAL
public Capabilities(
TargetApi api,
string vendorName,
SystemMemoryType memoryType,
bool hasFrontFacingBug,
bool hasVectorIndexingBug,
bool needsFragmentOutputSpecialization,
@ -120,6 +122,7 @@ namespace Ryujinx.Graphics.GAL
{
Api = api;
VendorName = vendorName;
MemoryType = memoryType;
HasFrontFacingBug = hasFrontFacingBug;
HasVectorIndexingBug = hasVectorIndexingBug;
NeedsFragmentOutputSpecialization = needsFragmentOutputSpecialization;

View file

@ -17,7 +17,6 @@ namespace Ryujinx.Graphics.GAL
void BackgroundContextAction(Action action, bool alwaysBackground = false);
BufferHandle CreateBuffer(int size, BufferAccess access = BufferAccess.Default);
BufferHandle CreateBuffer(int size, BufferAccess access, BufferHandle storageHint);
BufferHandle CreateBuffer(nint pointer, int size);
BufferHandle CreateBufferSparse(ReadOnlySpan<BufferRange> storageBuffers);

View file

@ -44,7 +44,6 @@ namespace Ryujinx.Graphics.GAL.Multithreading
}
Register<ActionCommand>(CommandType.Action);
Register<CreateBufferCommand>(CommandType.CreateBuffer);
Register<CreateBufferAccessCommand>(CommandType.CreateBufferAccess);
Register<CreateBufferSparseCommand>(CommandType.CreateBufferSparse);
Register<CreateHostBufferCommand>(CommandType.CreateHostBuffer);

View file

@ -3,7 +3,6 @@ namespace Ryujinx.Graphics.GAL.Multithreading
enum CommandType : byte
{
Action,
CreateBuffer,
CreateBufferAccess,
CreateBufferSparse,
CreateHostBuffer,

View file

@ -1,31 +0,0 @@
namespace Ryujinx.Graphics.GAL.Multithreading.Commands.Renderer
{
struct CreateBufferCommand : IGALCommand, IGALCommand<CreateBufferCommand>
{
public readonly CommandType CommandType => CommandType.CreateBuffer;
private BufferHandle _threadedHandle;
private int _size;
private BufferAccess _access;
private BufferHandle _storageHint;
public void Set(BufferHandle threadedHandle, int size, BufferAccess access, BufferHandle storageHint)
{
_threadedHandle = threadedHandle;
_size = size;
_access = access;
_storageHint = storageHint;
}
public static void Run(ref CreateBufferCommand command, ThreadedRenderer threaded, IRenderer renderer)
{
BufferHandle hint = BufferHandle.Null;
if (command._storageHint != BufferHandle.Null)
{
hint = threaded.Buffers.MapBuffer(command._storageHint);
}
threaded.Buffers.AssignBuffer(command._threadedHandle, renderer.CreateBuffer(command._size, command._access, hint));
}
}
}

View file

@ -272,15 +272,6 @@ namespace Ryujinx.Graphics.GAL.Multithreading
return handle;
}
public BufferHandle CreateBuffer(int size, BufferAccess access, BufferHandle storageHint)
{
BufferHandle handle = Buffers.CreateBufferHandle();
New<CreateBufferCommand>().Set(handle, size, access, storageHint);
QueueCommand();
return handle;
}
public BufferHandle CreateBuffer(nint pointer, int size)
{
BufferHandle handle = Buffers.CreateBufferHandle();

View file

@ -0,0 +1,29 @@
namespace Ryujinx.Graphics.GAL
{
public enum SystemMemoryType
{
/// <summary>
/// The backend manages the ownership of memory. This mode never supports host imported memory.
/// </summary>
BackendManaged,
/// <summary>
/// Device memory has similar performance to host memory, usually because it's shared between CPU/GPU.
/// Use host memory whenever possible.
/// </summary>
UnifiedMemory,
/// <summary>
/// GPU storage to host memory goes though a slow interconnect, but it would still be preferable to use it if the data is flushed back often.
/// Assumes constant buffer access to host memory is rather fast.
/// </summary>
DedicatedMemory,
/// <summary>
/// GPU storage to host memory goes though a slow interconnect, that is very slow when doing access from storage.
/// When frequently accessed, copy buffers to host memory using DMA.
/// Assumes constant buffer access to host memory is rather fast.
/// </summary>
DedicatedMemorySlowStorage
}
}