From d7c71c8a7be330b3f976f638e3f69fffdf34a6e2 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Tue, 13 Aug 2024 17:03:19 +0100 Subject: [PATCH] D32FS8 to D24S8 Conversion --- src/Ryujinx.Graphics.Metal/FormatConverter.cs | 49 ++++++++++++ src/Ryujinx.Graphics.Metal/FormatTable.cs | 10 ++- src/Ryujinx.Graphics.Metal/HelperShader.cs | 45 +++++++++++ .../Ryujinx.Graphics.Metal.csproj | 1 + .../Shaders/ConvertD32S8ToD24S8.metal | 66 ++++++++++++++++ src/Ryujinx.Graphics.Metal/Texture.cs | 76 ++++++++++++++++--- 6 files changed, 233 insertions(+), 14 deletions(-) create mode 100644 src/Ryujinx.Graphics.Metal/FormatConverter.cs create mode 100644 src/Ryujinx.Graphics.Metal/Shaders/ConvertD32S8ToD24S8.metal diff --git a/src/Ryujinx.Graphics.Metal/FormatConverter.cs b/src/Ryujinx.Graphics.Metal/FormatConverter.cs new file mode 100644 index 000000000..e099187b8 --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/FormatConverter.cs @@ -0,0 +1,49 @@ +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Metal +{ + class FormatConverter + { + public static void ConvertD24S8ToD32FS8(Span output, ReadOnlySpan input) + { + const float UnormToFloat = 1f / 0xffffff; + + Span outputUint = MemoryMarshal.Cast(output); + ReadOnlySpan inputUint = MemoryMarshal.Cast(input); + + int i = 0; + + for (; i < inputUint.Length; i++) + { + uint depthStencil = inputUint[i]; + uint depth = depthStencil >> 8; + uint stencil = depthStencil & 0xff; + + int j = i * 2; + + outputUint[j] = (uint)BitConverter.SingleToInt32Bits(depth * UnormToFloat); + outputUint[j + 1] = stencil; + } + } + + public static void ConvertD32FS8ToD24S8(Span output, ReadOnlySpan input) + { + Span outputUint = MemoryMarshal.Cast(output); + ReadOnlySpan inputUint = MemoryMarshal.Cast(input); + + int i = 0; + + for (; i < inputUint.Length; i += 2) + { + float depth = BitConverter.Int32BitsToSingle((int)inputUint[i]); + uint stencil = inputUint[i + 1]; + uint depthStencil = (Math.Clamp((uint)(depth * 0xffffff), 0, 0xffffff) << 8) | (stencil & 0xff); + + int j = i >> 1; + + outputUint[j] = depthStencil; + } + } + } +} diff --git a/src/Ryujinx.Graphics.Metal/FormatTable.cs b/src/Ryujinx.Graphics.Metal/FormatTable.cs index f6261488b..c1f8923f9 100644 --- a/src/Ryujinx.Graphics.Metal/FormatTable.cs +++ b/src/Ryujinx.Graphics.Metal/FormatTable.cs @@ -172,21 +172,25 @@ namespace Ryujinx.Graphics.Metal { var mtlFormat = _table[(int)format]; - if (mtlFormat == MTLPixelFormat.Depth24UnormStencil8) + if (IsD24S8(format)) { if (!MTLDevice.CreateSystemDefaultDevice().Depth24Stencil8PixelFormatSupported) { - Logger.Error?.PrintMsg(LogClass.Gpu, "Application requested Depth24Stencil8, which is unsupported on this device!"); mtlFormat = MTLPixelFormat.Depth32FloatStencil8; } } if (mtlFormat == MTLPixelFormat.Invalid) { - Logger.Error?.PrintMsg(LogClass.Gpu, $"Application requested {format}, no direct equivalent was found!"); + Logger.Error?.PrintMsg(LogClass.Gpu, $"Format {format} is not supported by the host."); } return mtlFormat; } + + public static bool IsD24S8(Format format) + { + return format == Format.D24UnormS8Uint || format == Format.S8UintD24Unorm || format == Format.X8UintD24Unorm; + } } } diff --git a/src/Ryujinx.Graphics.Metal/HelperShader.cs b/src/Ryujinx.Graphics.Metal/HelperShader.cs index d1a4b4705..8039641ea 100644 --- a/src/Ryujinx.Graphics.Metal/HelperShader.cs +++ b/src/Ryujinx.Graphics.Metal/HelperShader.cs @@ -32,6 +32,7 @@ namespace Ryujinx.Graphics.Metal private readonly List _programsColorClearU = new(); private readonly IProgram _programDepthStencilClear; private readonly IProgram _programStrideChange; + private readonly IProgram _programConvertD32S8ToD24S8; private readonly IProgram _programDepthBlit; private readonly IProgram _programDepthBlitMs; private readonly IProgram _programStencilBlit; @@ -151,6 +152,17 @@ namespace Ryujinx.Graphics.Metal new ShaderSource(strideChangeSource, ShaderStage.Compute, TargetLanguage.Msl) ], strideChangeResourceLayout, device, new ComputeSize(64, 1, 1)); + var convertD32S8ToD24S8ResourceLayout = new ResourceLayoutBuilder() + .Add(ResourceStages.Compute, ResourceType.UniformBuffer, 0) + .Add(ResourceStages.Compute, ResourceType.StorageBuffer, 1) + .Add(ResourceStages.Compute, ResourceType.StorageBuffer, 2, true).Build(); + + var convertD32S8ToD24S8Source = ReadMsl("ConvertD32S8ToD24S8.metal"); + _programConvertD32S8ToD24S8 = new Program( + [ + new ShaderSource(convertD32S8ToD24S8Source, ShaderStage.Compute, TargetLanguage.Msl) + ], convertD32S8ToD24S8ResourceLayout, device, new ComputeSize(64, 1, 1)); + var depthBlitSource = ReadMsl("DepthBlit.metal"); _programDepthBlit = new Program( [ @@ -591,6 +603,39 @@ namespace Ryujinx.Graphics.Metal _pipeline.SwapState(null); } + public unsafe void ConvertD32S8ToD24S8(CommandBufferScoped cbs, BufferHolder src, Auto dstBuffer, int pixelCount, int dstOffset) + { + int inSize = pixelCount * 2 * sizeof(int); + + var srcBuffer = src.GetBuffer(); + + const int ParamsBufferSize = sizeof(int) * 2; + + // Save current state + _pipeline.SwapState(_helperShaderState); + + Span shaderParams = stackalloc int[2]; + + shaderParams[0] = pixelCount; + shaderParams[1] = dstOffset; + + using var buffer = _renderer.BufferManager.ReserveOrCreate(cbs, ParamsBufferSize); + buffer.Holder.SetDataUnchecked(buffer.Offset, shaderParams); + _pipeline.SetUniformBuffers([new BufferAssignment(0, buffer.Range)]); + + Span> sbRanges = new Auto[2]; + + sbRanges[0] = srcBuffer; + sbRanges[1] = dstBuffer; + _pipeline.SetStorageBuffers(1, sbRanges); + + _pipeline.SetProgram(_programConvertD32S8ToD24S8); + _pipeline.DispatchCompute(1 + inSize / ConvertElementsPerWorkgroup, 1, 1, "D32S8 to D24S8 Conversion"); + + // Restore previous state + _pipeline.SwapState(null); + } + public unsafe void ClearColor( int index, ReadOnlySpan clearColor, diff --git a/src/Ryujinx.Graphics.Metal/Ryujinx.Graphics.Metal.csproj b/src/Ryujinx.Graphics.Metal/Ryujinx.Graphics.Metal.csproj index 8cbdc8bcb..0839c426a 100644 --- a/src/Ryujinx.Graphics.Metal/Ryujinx.Graphics.Metal.csproj +++ b/src/Ryujinx.Graphics.Metal/Ryujinx.Graphics.Metal.csproj @@ -18,6 +18,7 @@ + diff --git a/src/Ryujinx.Graphics.Metal/Shaders/ConvertD32S8ToD24S8.metal b/src/Ryujinx.Graphics.Metal/Shaders/ConvertD32S8ToD24S8.metal new file mode 100644 index 000000000..789527cbb --- /dev/null +++ b/src/Ryujinx.Graphics.Metal/Shaders/ConvertD32S8ToD24S8.metal @@ -0,0 +1,66 @@ +#include + +using namespace metal; + +struct StrideArguments { + int pixelCount; + int dstStartOffset; +}; + +struct InData { + uint data[1]; +}; + +struct OutData { + uint data[1]; +}; + +struct ConstantBuffers { + constant StrideArguments* stride_arguments; +}; + +struct StorageBuffers { + device InData* in_data; + device OutData* out_data; +}; + +kernel void kernelMain(constant ConstantBuffers &constant_buffers [[buffer(CONSTANT_BUFFERS_INDEX)]], + device StorageBuffers &storage_buffers [[buffer(STORAGE_BUFFERS_INDEX)]], + uint3 thread_position_in_grid [[thread_position_in_grid]], + uint3 threads_per_threadgroup [[threads_per_threadgroup]], + uint3 threadgroups_per_grid [[threads_per_grid]]) +{ + // Determine what slice of the stride copies this invocation will perform. + int invocations = int(threads_per_threadgroup.x * threadgroups_per_grid.x); + + int copiesRequired = constant_buffers.stride_arguments->pixelCount; + + // Find the copies that this invocation should perform. + + // - Copies that all invocations perform. + int allInvocationCopies = copiesRequired / invocations; + + // - Extra remainder copy that this invocation performs. + int index = int(thread_position_in_grid.x); + int extra = (index < (copiesRequired % invocations)) ? 1 : 0; + + int copyCount = allInvocationCopies + extra; + + // Finally, get the starting offset. Make sure to count extra copies. + + int startCopy = allInvocationCopies * index + min(copiesRequired % invocations, index); + + int srcOffset = startCopy * 2; + int dstOffset = constant_buffers.stride_arguments->dstStartOffset + startCopy; + + // Perform the conversion for this region. + for (int i = 0; i < copyCount; i++) + { + float depth = as_type(storage_buffers.in_data->data[srcOffset++]); + uint stencil = storage_buffers.in_data->data[srcOffset++]; + + uint rescaledDepth = uint(clamp(depth, 0.0, 1.0) * 16777215.0); + + storage_buffers.out_data->data[dstOffset++] = (rescaledDepth << 8) | (stencil & 0xff); + } +} diff --git a/src/Ryujinx.Graphics.Metal/Texture.cs b/src/Ryujinx.Graphics.Metal/Texture.cs index 82e38def1..bf13aea7f 100644 --- a/src/Ryujinx.Graphics.Metal/Texture.cs +++ b/src/Ryujinx.Graphics.Metal/Texture.cs @@ -277,9 +277,18 @@ namespace Ryujinx.Graphics.Metal var autoBuffer = Renderer.BufferManager.GetBuffer(range.Handle, true); var mtlBuffer = autoBuffer.Get(cbs, range.Offset, outSize).Value; - // TODO: D32S8 conversion via temp copy holder + if (PrepareOutputBuffer(cbs, hostSize, mtlBuffer, out MTLBuffer copyToBuffer, out BufferHolder tempCopyHolder)) + { + offset = 0; + } - CopyFromOrToBuffer(cbs, mtlBuffer, MtlTexture, hostSize, true, layer, level, 1, 1, singleSlice: true, offset: offset, stride: stride); + CopyFromOrToBuffer(cbs, copyToBuffer, MtlTexture, hostSize, true, layer, level, 1, 1, singleSlice: true, offset, stride); + + if (tempCopyHolder != null) + { + CopyDataToOutputBuffer(cbs, tempCopyHolder, autoBuffer, hostSize, range.Offset); + tempCopyHolder.Dispose(); + } } public ITexture CreateView(TextureCreateInfo info, int firstLayer, int firstLevel) @@ -287,27 +296,62 @@ namespace Ryujinx.Graphics.Metal return new Texture(Device, Renderer, Pipeline, info, _identitySwizzleHandle, firstLayer, firstLevel); } - private int GetBufferDataLength(int size) - { - // TODO: D32S8 conversion - - return size; - } - private void CopyDataToBuffer(Span storage, ReadOnlySpan input) { - // TODO: D32S8 conversion + if (NeedsD24S8Conversion()) + { + FormatConverter.ConvertD24S8ToD32FS8(storage, input); + return; + } input.CopyTo(storage); } private ReadOnlySpan GetDataFromBuffer(ReadOnlySpan storage, int size, Span output) { - // TODO: D32S8 conversion + if (NeedsD24S8Conversion()) + { + if (output.IsEmpty) + { + output = new byte[GetBufferDataLength(size)]; + } + + FormatConverter.ConvertD32FS8ToD24S8(output, storage); + return output; + } return storage; } + private bool PrepareOutputBuffer(CommandBufferScoped cbs, int hostSize, MTLBuffer target, out MTLBuffer copyTarget, out BufferHolder copyTargetHolder) + { + if (NeedsD24S8Conversion()) + { + copyTargetHolder = Renderer.BufferManager.Create(hostSize); + copyTarget = copyTargetHolder.GetBuffer().Get(cbs, 0, hostSize).Value; + + return true; + } + + copyTarget = target; + copyTargetHolder = null; + + return false; + } + + private void CopyDataToOutputBuffer(CommandBufferScoped cbs, BufferHolder hostData, Auto copyTarget, int hostSize, int dstOffset) + { + if (NeedsD24S8Conversion()) + { + Renderer.HelperShader.ConvertD32S8ToD24S8(cbs, hostData, copyTarget, hostSize / (2 * sizeof(int)), dstOffset); + } + } + + private bool NeedsD24S8Conversion() + { + return FormatTable.IsD24S8(Info.Format) && MtlFormat == MTLPixelFormat.Depth32FloatStencil8; + } + public void CopyFromOrToBuffer( CommandBufferScoped cbs, MTLBuffer buffer, @@ -564,6 +608,16 @@ namespace Ryujinx.Graphics.Metal buffer.Dispose(); } + private int GetBufferDataLength(int length) + { + if (NeedsD24S8Conversion()) + { + return length * 2; + } + + return length; + } + public void SetStorage(BufferRange buffer) { throw new NotImplementedException();