Shader: Bias textureGather instructions on AMD/Intel (#4703)

* Experimental (GLSL, forced) * SPIR-V attempt * Add capability * Fix pCount == 1 on glsl * Fix typo
2025-02-21 21:10:45 +01:00 · 2023-04-22 22:02:39 +01:00 · 2023-04-22 22:02:39 +01:00 · 8d9d508dc7
commit 8d9d508dc7
parent e27f5522e2
9 changed files with 84 additions and 9 deletions
--- a/Ryujinx.Graphics.GAL/Capabilities.cs
+++ b/Ryujinx.Graphics.GAL/Capabilities.cs
@ -48,6 +48,8 @@ namespace Ryujinx.Graphics.GAL
        public readonly float MaximumSupportedAnisotropy;
        public readonly int StorageBufferOffsetAlignment;

+        public readonly int GatherBiasPrecision;
+
        public Capabilities(
            TargetApi api,
            string vendorName,
@ -87,7 +89,8 @@ namespace Ryujinx.Graphics.GAL
            uint maximumImagesPerStage,
            int maximumComputeSharedMemorySize,
            float maximumSupportedAnisotropy,
-            int storageBufferOffsetAlignment)
+            int storageBufferOffsetAlignment,
+            int gatherBiasPrecision)
        {
            Api = api;
            VendorName = vendorName;
@ -128,6 +131,7 @@ namespace Ryujinx.Graphics.GAL
            MaximumComputeSharedMemorySize = maximumComputeSharedMemorySize;
            MaximumSupportedAnisotropy = maximumSupportedAnisotropy;
            StorageBufferOffsetAlignment = storageBufferOffsetAlignment;
+            GatherBiasPrecision = gatherBiasPrecision;
        }
    }
 }
--- a/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs
@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
        private const ushort FileFormatVersionMajor = 1;
        private const ushort FileFormatVersionMinor = 2;
        private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
-        private const uint CodeGenVersion = 4404;
+        private const uint CodeGenVersion = 4703;

        private const string SharedTocFileName = "shared.toc";
        private const string SharedDataFileName = "shared.data";
--- a/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs
@ -112,6 +112,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
            };
        }

+        public int QueryHostGatherBiasPrecision() => _context.Capabilities.GatherBiasPrecision;
+
        public bool QueryHostReducedPrecision() => _context.Capabilities.ReduceShaderPrecision;

        public bool QueryHostHasFrontFacingBug() => _context.Capabilities.HasFrontFacingBug;
--- a/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs
+++ b/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs
@ -103,11 +103,14 @@ namespace Ryujinx.Graphics.OpenGL

        public Capabilities GetCapabilities()
        {
+            bool intelWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelWindows;
+            bool amdWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.AmdWindows;
+
            return new Capabilities(
                api: TargetApi.OpenGL,
                vendorName: GpuVendor,
-                hasFrontFacingBug: HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelWindows,
-                hasVectorIndexingBug: HwCapabilities.Vendor == HwCapabilities.GpuVendor.AmdWindows,
+                hasFrontFacingBug: intelWindows,
+                hasVectorIndexingBug: amdWindows,
                needsFragmentOutputSpecialization: false,
                reduceShaderPrecision: false,
                supportsAstcCompression: HwCapabilities.SupportsAstcCompression,
@ -142,7 +145,8 @@ namespace Ryujinx.Graphics.OpenGL
                maximumImagesPerStage: 8,
                maximumComputeSharedMemorySize: HwCapabilities.MaximumComputeSharedMemorySize,
                maximumSupportedAnisotropy: HwCapabilities.MaximumSupportedAnisotropy,
-                storageBufferOffsetAlignment: HwCapabilities.StorageBufferOffsetAlignment);
+                storageBufferOffsetAlignment: HwCapabilities.StorageBufferOffsetAlignment,
+                gatherBiasPrecision: intelWindows || amdWindows ? 8 : 0); // Precision is 8 for these vendors on Vulkan.
        }

        public void SetBufferData(BufferHandle buffer, int offset, ReadOnlySpan<byte> data)
--- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs
@ -677,7 +677,28 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
                return vector;
            }

-            Append(ApplyScaling(AssemblePVector(pCount)));
+            string ApplyBias(string vector)
+            {
+                int gatherBiasPrecision = context.Config.GpuAccessor.QueryHostGatherBiasPrecision();
+                if (isGather && gatherBiasPrecision != 0)
+                {
+                    // GPU requires texture gather to be slightly offset to match NVIDIA behaviour when point is exactly between two texels.
+                    // Offset by the gather precision divided by 2 to correct for rounding.
+
+                    if (pCount == 1)
+                    {
+                        vector = $"{vector} + (1.0 / (float(textureSize({samplerName}, 0)) * float({1 << (gatherBiasPrecision + 1)})))";
+                    }
+                    else
+                    {
+                        vector = $"{vector} + (1.0 / (vec{pCount}(textureSize({samplerName}, 0).{"xyz".Substring(0, pCount)}) * float({1 << (gatherBiasPrecision + 1)})))";
+                    }
+                }
+
+                return vector;
+            }
+
+            Append(ApplyBias(ApplyScaling(AssemblePVector(pCount))));

            string AssembleDerivativesVector(int count)
            {
--- a/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs
+++ b/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs
@ -4,6 +4,7 @@ using Ryujinx.Graphics.Shader.Translation;
 using System;
 using System.Collections.Generic;
 using System.Diagnostics;
+using System.Linq;
 using System.Numerics;
 using static Spv.Specification;

@ -1556,6 +1557,33 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
                }
            }

+            SpvInstruction ApplyBias(SpvInstruction vector, SpvInstruction image)
+            {
+                int gatherBiasPrecision = context.Config.GpuAccessor.QueryHostGatherBiasPrecision();
+                if (isGather && gatherBiasPrecision != 0)
+                {
+                    // GPU requires texture gather to be slightly offset to match NVIDIA behaviour when point is exactly between two texels.
+                    // Offset by the gather precision divided by 2 to correct for rounding.
+                    var sizeType = pCount == 1 ? context.TypeS32() : context.TypeVector(context.TypeS32(), pCount);
+                    var pVectorType = pCount == 1 ? context.TypeFP32() : context.TypeVector(context.TypeFP32(), pCount);
+
+                    var bias = context.Constant(context.TypeFP32(), (float)(1 << (gatherBiasPrecision + 1)));
+                    var biasVector = context.CompositeConstruct(pVectorType, Enumerable.Repeat(bias, pCount).ToArray());
+
+                    var one = context.Constant(context.TypeFP32(), 1f);
+                    var oneVector = context.CompositeConstruct(pVectorType, Enumerable.Repeat(one, pCount).ToArray());
+
+                    var divisor = context.FMul(
+                        pVectorType,
+                        context.ConvertSToF(pVectorType, context.ImageQuerySize(sizeType, image)),
+                        biasVector);
+
+                    vector = context.FAdd(pVectorType, vector, context.FDiv(pVectorType, oneVector, divisor));
+                }
+
+                return vector;
+            }
+
            SpvInstruction pCoords = AssemblePVector(pCount);
            pCoords = ScalingHelpers.ApplyScaling(context, texOp, pCoords, intCoords, isBindless, isIndexed, isArray, pCount);

@ -1716,6 +1744,8 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
                image = context.Image(imageType, image);
            }

+            pCoords = ApplyBias(pCoords, image);
+
            var operands = operandsList.ToArray();

            SpvInstruction result;
--- a/Ryujinx.Graphics.Shader/IGpuAccessor.cs
+++ b/Ryujinx.Graphics.Shader/IGpuAccessor.cs
@ -196,6 +196,15 @@ namespace Ryujinx.Graphics.Shader
            return false;
        }

+        /// <summary>
+        /// Queries host's gather operation precision bits for biasing their coordinates. Zero means no bias.
+        /// </summary>
+        /// <returns>Bits of gather operation precision to use for coordinate bias</returns>
+        int QueryHostGatherBiasPrecision()
+        {
+            return 0;
+        }
+
        /// <summary>
        /// Queries host about whether to reduce precision to improve performance.
        /// </summary>
--- a/Ryujinx.Graphics.Vulkan/HardwareCapabilities.cs
+++ b/Ryujinx.Graphics.Vulkan/HardwareCapabilities.cs
@ -46,6 +46,7 @@ namespace Ryujinx.Graphics.Vulkan
        public readonly SampleCountFlags SupportedSampleCounts;
        public readonly PortabilitySubsetFlags PortabilitySubset;
        public readonly uint VertexBufferAlignment;
+        public readonly uint SubTexelPrecisionBits;

        public HardwareCapabilities(
            bool supportsIndexTypeUint8,
@ -77,7 +78,8 @@ namespace Ryujinx.Graphics.Vulkan
            ShaderStageFlags requiredSubgroupSizeStages,
            SampleCountFlags supportedSampleCounts,
            PortabilitySubsetFlags portabilitySubset,
-            uint vertexBufferAlignment)
+            uint vertexBufferAlignment,
+            uint subTexelPrecisionBits)
        {
            SupportsIndexTypeUint8 = supportsIndexTypeUint8;
            SupportsCustomBorderColor = supportsCustomBorderColor;
@ -109,6 +111,7 @@ namespace Ryujinx.Graphics.Vulkan
            SupportedSampleCounts = supportedSampleCounts;
            PortabilitySubset = portabilitySubset;
            VertexBufferAlignment = vertexBufferAlignment;
+            SubTexelPrecisionBits = subTexelPrecisionBits;
        }
    }
 }
--- a/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs
+++ b/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs
@ -311,7 +311,8 @@ namespace Ryujinx.Graphics.Vulkan
                propertiesSubgroupSizeControl.RequiredSubgroupSizeStages,
                supportedSampleCounts,
                portabilityFlags,
-                vertexBufferAlignment);
+                vertexBufferAlignment,
+                properties.Limits.SubTexelPrecisionBits);

            IsSharedMemory = MemoryAllocator.IsDeviceMemoryShared(_physicalDevice);

@ -576,7 +577,8 @@ namespace Ryujinx.Graphics.Vulkan
                maximumImagesPerStage: Constants.MaxImagesPerStage,
                maximumComputeSharedMemorySize: (int)limits.MaxComputeSharedMemorySize,
                maximumSupportedAnisotropy: (int)limits.MaxSamplerAnisotropy,
-                storageBufferOffsetAlignment: (int)limits.MinStorageBufferOffsetAlignment);
+                storageBufferOffsetAlignment: (int)limits.MinStorageBufferOffsetAlignment,
+                gatherBiasPrecision: IsIntelWindows || IsAmdWindows ? (int)Capabilities.SubTexelPrecisionBits : 0);
        }

        public HardwareInfo GetHardwareInfo()