riperiperi 9f1cf6458c
Vulkan: Migrate buffers between memory types to improve GPU performance (#4540)
* Initial implementation of migration between memory heaps

- Missing OOM handling
- Missing `_map` data safety when remapping
  - Copy may not have completed yet (needs some kind of fence)
  - Map may be unmapped before it is done being used. (needs scoped access)
- SSBO accesses are all "writes" - maybe pass info in another way.
- Missing keeping map type when resizing buffers (should this be done?)

* Ensure migrated data is in place before flushing.

* Fix issue where old waitable would be signalled.

- There is a real issue where existing Auto<> references need to be replaced.

* Swap bound Auto<> instances when swapping buffer backing

* Fix conversion buffers

* Don't try move buffers if the host has shared memory.

* Make GPU methods return PinnedSpan with scope

* Storage Hint

* Fix stupidity

* Fix rebase

* Tweak rules

Attempt to sidestep BOTW slowdown

* Remove line

* Migrate only when command buffers flush

* Change backing swap log to debug

* Address some feedback

* Disallow backing swap when the flush lock is held by the current thread

* Make PinnedSpan from ReadOnlySpan explicitly unsafe

* Fix some small issues

- Index buffer swap fixed
- Allocate DeviceLocal buffers using a separate block list to images.

* Remove alternative flags

* Address feedback
2023-03-19 17:56:48 -03:00

456 lines
17 KiB
C#

using Ryujinx.Graphics.GAL;
using Silk.NET.Vulkan;
using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using VkFormat = Silk.NET.Vulkan.Format;
using VkBuffer = Silk.NET.Vulkan.Buffer;
namespace Ryujinx.Graphics.Vulkan
{
class BufferManager : IDisposable
{
private const MemoryPropertyFlags DefaultBufferMemoryFlags =
MemoryPropertyFlags.HostVisibleBit |
MemoryPropertyFlags.HostCoherentBit |
MemoryPropertyFlags.HostCachedBit;
// Some drivers don't expose a "HostCached" memory type,
// so we need those alternative flags for the allocation to succeed there.
private const MemoryPropertyFlags DefaultBufferMemoryNoCacheFlags =
MemoryPropertyFlags.HostVisibleBit |
MemoryPropertyFlags.HostCoherentBit;
private const MemoryPropertyFlags DeviceLocalBufferMemoryFlags =
MemoryPropertyFlags.DeviceLocalBit;
private const MemoryPropertyFlags DeviceLocalMappedBufferMemoryFlags =
MemoryPropertyFlags.DeviceLocalBit |
MemoryPropertyFlags.HostVisibleBit |
MemoryPropertyFlags.HostCoherentBit;
private const BufferUsageFlags DefaultBufferUsageFlags =
BufferUsageFlags.TransferSrcBit |
BufferUsageFlags.TransferDstBit |
BufferUsageFlags.UniformTexelBufferBit |
BufferUsageFlags.StorageTexelBufferBit |
BufferUsageFlags.UniformBufferBit |
BufferUsageFlags.StorageBufferBit |
BufferUsageFlags.IndexBufferBit |
BufferUsageFlags.VertexBufferBit |
BufferUsageFlags.TransformFeedbackBufferBitExt;
private readonly Device _device;
private readonly IdList<BufferHolder> _buffers;
public int BufferCount { get; private set; }
public StagingBuffer StagingBuffer { get; }
public BufferManager(VulkanRenderer gd, Device device)
{
_device = device;
_buffers = new IdList<BufferHolder>();
StagingBuffer = new StagingBuffer(gd, this);
}
public BufferHandle CreateWithHandle(VulkanRenderer gd, int size, BufferAllocationType baseType = BufferAllocationType.HostMapped, BufferHandle storageHint = default)
{
return CreateWithHandle(gd, size, out _, baseType, storageHint);
}
public BufferHandle CreateWithHandle(VulkanRenderer gd, int size, out BufferHolder holder, BufferAllocationType baseType = BufferAllocationType.HostMapped, BufferHandle storageHint = default)
{
holder = Create(gd, size, baseType: baseType, storageHint: storageHint);
if (holder == null)
{
return BufferHandle.Null;
}
BufferCount++;
ulong handle64 = (uint)_buffers.Add(holder);
return Unsafe.As<ulong, BufferHandle>(ref handle64);
}
public unsafe (VkBuffer buffer, MemoryAllocation allocation, BufferAllocationType resultType) CreateBacking(
VulkanRenderer gd,
int size,
BufferAllocationType type,
bool forConditionalRendering = false,
BufferAllocationType fallbackType = BufferAllocationType.Auto)
{
var usage = DefaultBufferUsageFlags;
if (forConditionalRendering && gd.Capabilities.SupportsConditionalRendering)
{
usage |= BufferUsageFlags.ConditionalRenderingBitExt;
}
else if (gd.Capabilities.SupportsIndirectParameters)
{
usage |= BufferUsageFlags.IndirectBufferBit;
}
var bufferCreateInfo = new BufferCreateInfo()
{
SType = StructureType.BufferCreateInfo,
Size = (ulong)size,
Usage = usage,
SharingMode = SharingMode.Exclusive
};
gd.Api.CreateBuffer(_device, in bufferCreateInfo, null, out var buffer).ThrowOnError();
gd.Api.GetBufferMemoryRequirements(_device, buffer, out var requirements);
MemoryAllocation allocation;
do
{
var allocateFlags = type switch
{
BufferAllocationType.HostMappedNoCache => DefaultBufferMemoryNoCacheFlags,
BufferAllocationType.HostMapped => DefaultBufferMemoryFlags,
BufferAllocationType.DeviceLocal => DeviceLocalBufferMemoryFlags,
BufferAllocationType.DeviceLocalMapped => DeviceLocalMappedBufferMemoryFlags,
_ => DefaultBufferMemoryFlags
};
// If an allocation with this memory type fails, fall back to the previous one.
try
{
allocation = gd.MemoryAllocator.AllocateDeviceMemory(requirements, allocateFlags, true);
}
catch (VulkanException)
{
allocation = default;
}
}
while (allocation.Memory.Handle == 0 && (--type != fallbackType));
if (allocation.Memory.Handle == 0UL)
{
gd.Api.DestroyBuffer(_device, buffer, null);
return default;
}
gd.Api.BindBufferMemory(_device, buffer, allocation.Memory, allocation.Offset);
return (buffer, allocation, type);
}
public unsafe BufferHolder Create(
VulkanRenderer gd,
int size,
bool forConditionalRendering = false,
BufferAllocationType baseType = BufferAllocationType.HostMapped,
BufferHandle storageHint = default)
{
BufferAllocationType type = baseType;
BufferHolder storageHintHolder = null;
if (baseType == BufferAllocationType.Auto)
{
if (gd.IsSharedMemory)
{
baseType = BufferAllocationType.HostMapped;
type = baseType;
}
else
{
type = size >= BufferHolder.DeviceLocalSizeThreshold ? BufferAllocationType.DeviceLocal : BufferAllocationType.HostMapped;
}
if (storageHint != BufferHandle.Null)
{
if (TryGetBuffer(storageHint, out storageHintHolder))
{
type = storageHintHolder.DesiredType;
}
}
}
(VkBuffer buffer, MemoryAllocation allocation, BufferAllocationType resultType) =
CreateBacking(gd, size, type, forConditionalRendering);
if (buffer.Handle != 0)
{
var holder = new BufferHolder(gd, _device, buffer, allocation, size, baseType, resultType);
if (storageHintHolder != null)
{
holder.InheritMetrics(storageHintHolder);
}
return holder;
}
return null;
}
public Auto<DisposableBufferView> CreateView(BufferHandle handle, VkFormat format, int offset, int size, Action invalidateView)
{
if (TryGetBuffer(handle, out var holder))
{
return holder.CreateView(format, offset, size, invalidateView);
}
return null;
}
public Auto<DisposableBuffer> GetBuffer(CommandBuffer commandBuffer, BufferHandle handle, bool isWrite, bool isSSBO = false)
{
if (TryGetBuffer(handle, out var holder))
{
return holder.GetBuffer(commandBuffer, isWrite, isSSBO);
}
return null;
}
public Auto<DisposableBuffer> GetBuffer(CommandBuffer commandBuffer, BufferHandle handle, int offset, int size, bool isWrite)
{
if (TryGetBuffer(handle, out var holder))
{
return holder.GetBuffer(commandBuffer, offset, size, isWrite);
}
return null;
}
public Auto<DisposableBuffer> GetBufferI8ToI16(CommandBufferScoped cbs, BufferHandle handle, int offset, int size)
{
if (TryGetBuffer(handle, out var holder))
{
return holder.GetBufferI8ToI16(cbs, offset, size);
}
return null;
}
public Auto<DisposableBuffer> GetAlignedVertexBuffer(CommandBufferScoped cbs, BufferHandle handle, int offset, int size, int stride, int alignment)
{
if (TryGetBuffer(handle, out var holder))
{
return holder.GetAlignedVertexBuffer(cbs, offset, size, stride, alignment);
}
return null;
}
public Auto<DisposableBuffer> GetBufferTopologyConversion(CommandBufferScoped cbs, BufferHandle handle, int offset, int size, IndexBufferPattern pattern, int indexSize)
{
if (TryGetBuffer(handle, out var holder))
{
return holder.GetBufferTopologyConversion(cbs, offset, size, pattern, indexSize);
}
return null;
}
public (Auto<DisposableBuffer>, Auto<DisposableBuffer>) GetBufferTopologyConversionIndirect(
VulkanRenderer gd,
CommandBufferScoped cbs,
BufferRange indexBuffer,
BufferRange indirectBuffer,
BufferRange drawCountBuffer,
IndexBufferPattern pattern,
int indexSize,
bool hasDrawCount,
int maxDrawCount,
int indirectDataStride)
{
BufferHolder drawCountBufferHolder = null;
if (!TryGetBuffer(indexBuffer.Handle, out var indexBufferHolder) ||
!TryGetBuffer(indirectBuffer.Handle, out var indirectBufferHolder) ||
(hasDrawCount && !TryGetBuffer(drawCountBuffer.Handle, out drawCountBufferHolder)))
{
return (null, null);
}
var indexBufferKey = new TopologyConversionIndirectCacheKey(
gd,
pattern,
indexSize,
indirectBufferHolder,
indirectBuffer.Offset,
indirectBuffer.Size);
bool hasConvertedIndexBuffer = indexBufferHolder.TryGetCachedConvertedBuffer(
indexBuffer.Offset,
indexBuffer.Size,
indexBufferKey,
out var convertedIndexBuffer);
var indirectBufferKey = new IndirectDataCacheKey(pattern);
bool hasConvertedIndirectBuffer = indirectBufferHolder.TryGetCachedConvertedBuffer(
indirectBuffer.Offset,
indirectBuffer.Size,
indirectBufferKey,
out var convertedIndirectBuffer);
var drawCountBufferKey = new DrawCountCacheKey();
bool hasCachedDrawCount = true;
if (hasDrawCount)
{
hasCachedDrawCount = drawCountBufferHolder.TryGetCachedConvertedBuffer(
drawCountBuffer.Offset,
drawCountBuffer.Size,
drawCountBufferKey,
out _);
}
if (!hasConvertedIndexBuffer || !hasConvertedIndirectBuffer || !hasCachedDrawCount)
{
// The destination index size is always I32.
int indexCount = indexBuffer.Size / indexSize;
int convertedCount = pattern.GetConvertedCount(indexCount);
if (!hasConvertedIndexBuffer)
{
convertedIndexBuffer = Create(gd, convertedCount * 4);
indexBufferKey.SetBuffer(convertedIndexBuffer.GetBuffer());
indexBufferHolder.AddCachedConvertedBuffer(indexBuffer.Offset, indexBuffer.Size, indexBufferKey, convertedIndexBuffer);
}
if (!hasConvertedIndirectBuffer)
{
convertedIndirectBuffer = Create(gd, indirectBuffer.Size);
indirectBufferHolder.AddCachedConvertedBuffer(indirectBuffer.Offset, indirectBuffer.Size, indirectBufferKey, convertedIndirectBuffer);
}
gd.PipelineInternal.EndRenderPass();
gd.HelperShader.ConvertIndexBufferIndirect(
gd,
cbs,
indirectBufferHolder,
convertedIndirectBuffer,
drawCountBuffer,
indexBufferHolder,
convertedIndexBuffer,
pattern,
indexSize,
indexBuffer.Offset,
indexBuffer.Size,
indirectBuffer.Offset,
hasDrawCount,
maxDrawCount,
indirectDataStride);
// Any modification of the indirect buffer should invalidate the index buffers that are associated with it,
// since we used the indirect data to find the range of the index buffer that is used.
var indexBufferDependency = new Dependency(
indexBufferHolder,
indexBuffer.Offset,
indexBuffer.Size,
indexBufferKey);
indirectBufferHolder.AddCachedConvertedBufferDependency(
indirectBuffer.Offset,
indirectBuffer.Size,
indirectBufferKey,
indexBufferDependency);
if (hasDrawCount)
{
if (!hasCachedDrawCount)
{
drawCountBufferHolder.AddCachedConvertedBuffer(drawCountBuffer.Offset, drawCountBuffer.Size, drawCountBufferKey, null);
}
// If we have a draw count, any modification of the draw count should invalidate all indirect buffers
// where we used it to find the range of indirect data that is actually used.
var indirectBufferDependency = new Dependency(
indirectBufferHolder,
indirectBuffer.Offset,
indirectBuffer.Size,
indirectBufferKey);
drawCountBufferHolder.AddCachedConvertedBufferDependency(
drawCountBuffer.Offset,
drawCountBuffer.Size,
drawCountBufferKey,
indirectBufferDependency);
}
}
return (convertedIndexBuffer.GetBuffer(), convertedIndirectBuffer.GetBuffer());
}
public Auto<DisposableBuffer> GetBuffer(CommandBuffer commandBuffer, BufferHandle handle, bool isWrite, out int size)
{
if (TryGetBuffer(handle, out var holder))
{
size = holder.Size;
return holder.GetBuffer(commandBuffer, isWrite);
}
size = 0;
return null;
}
public PinnedSpan<byte> GetData(BufferHandle handle, int offset, int size)
{
if (TryGetBuffer(handle, out var holder))
{
return holder.GetData(offset, size);
}
return new PinnedSpan<byte>();
}
public void SetData<T>(BufferHandle handle, int offset, ReadOnlySpan<T> data) where T : unmanaged
{
SetData(handle, offset, MemoryMarshal.Cast<T, byte>(data), null, null);
}
public void SetData(BufferHandle handle, int offset, ReadOnlySpan<byte> data, CommandBufferScoped? cbs, Action endRenderPass)
{
if (TryGetBuffer(handle, out var holder))
{
holder.SetData(offset, data, cbs, endRenderPass);
}
}
public void Delete(BufferHandle handle)
{
if (TryGetBuffer(handle, out var holder))
{
holder.Dispose();
_buffers.Remove((int)Unsafe.As<BufferHandle, ulong>(ref handle));
}
}
private bool TryGetBuffer(BufferHandle handle, out BufferHolder holder)
{
return _buffers.TryGetValue((int)Unsafe.As<BufferHandle, ulong>(ref handle), out holder);
}
protected virtual void Dispose(bool disposing)
{
if (disposing)
{
foreach (BufferHolder buffer in _buffers)
{
buffer.Dispose();
}
_buffers.Clear();
StagingBuffer.Dispose();
}
}
public void Dispose()
{
Dispose(true);
}
}
}