shadps4-emu · LNDF · Nov 12, 2024 · Nov 12, 2024 · Nov 12, 2024 · Nov 13, 2024
diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp
@@ -1619,4 +1619,9 @@ void IREmitter::EmitPrimitive() {
     Inst(Opcode::EmitPrimitive);
 }
 
+IR::Inst* IREmitter::CopyInst(const IR::Inst& inst) {
+    auto it{block->PrependNewInst(insertion_point, inst)};
+    return &*it;
+}
+
 } // namespace Shader::IR
diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h
@@ -321,6 +321,8 @@ class IREmitter {
     void EmitVertex();
     void EmitPrimitive();
 
+    [[nodiscard]] Inst* CopyInst(const Inst& inst);
+
 private:
     IR::Block::iterator insertion_point;
 

diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
@@ -8,6 +8,7 @@
 #include "shader_recompiler/ir/breadth_first_search.h"
 #include "shader_recompiler/ir/ir_emitter.h"
 #include "shader_recompiler/ir/program.h"
+#include "video_core/amdgpu/pixel_format.h"
 #include "video_core/amdgpu/resource.h"
 
 namespace Shader::Optimization {
@@ -417,6 +418,66 @@ IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value&
     }
 }
 
+void PatchNormalization(IR::Inst& inst, IR::IREmitter& ir, const AmdGpu::Image& image) {
+    if (!image.NeedsNormalizationPatch()) {
+        return;
+    }
+
+    bool is_signed = image.GetNumberFmt() == AmdGpu::NumberFormat::Snorm;
+    bool is_atomic = IsImageAtomicInstruction(inst);
+    bool is_write = is_atomic || inst.GetOpcode() == IR::Opcode::ImageWrite;
+    int num_components = AmdGpu::NumComponents(image.GetDataFmt());
+
+    IR::F32 multipier = ir.Imm32(is_signed ? 2147483647.0f : 4294967295.0f);
+
+    const auto get_mul_vec = [&]() -> IR::F32 {
+        switch (num_components) {
+        case 1:
+            return multipier;
+        case 2:
+            return IR::F32{ir.CompositeConstruct(multipier, multipier)};
+        case 3:
+            return IR::F32{ir.CompositeConstruct(multipier, multipier, multipier)};
+        case 4:
+            return IR::F32{ir.CompositeConstruct(multipier, multipier, multipier, multipier)};
+        default:
+            UNREACHABLE();
+        }
+    };
+
+    const auto patch_read = [&]() {
+        IR::Value data = IR::Value(ir.CopyInst(inst));
+        if (is_signed) {
+            data = ir.ConvertSToF(32, 32, data);
+        } else {
+            data = ir.ConvertUToF(32, 32, data);
+        }
+
+        data = ir.FPMul(IR::F32(data), get_mul_vec());
+        inst.ReplaceUsesWith(data);
+    };
+
+    if (is_write) {
+        IR::F32 data = IR::F32{inst.Arg(2)};
+        data = ir.FPMul(data, get_mul_vec());
+
+        if (is_signed) {
+            inst.SetArg(2, ir.ConvertFToS(32, data));
+        } else {
+            inst.SetArg(2, ir.ConvertFToU(32, data));
+        }
+
+        // Atomic instructions return the old value, so we need to patch the read.
+        if (is_atomic) {
+            multipier = ir.FPRecip(multipier);
+            patch_read();
+        }
+    } else {
+        multipier = ir.FPRecip(multipier);
+        patch_read();
+    }
+}
+
 void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
                                  Descriptors& descriptors, const IR::Inst* producer,
                                  const u32 image_binding, const AmdGpu::Image& image) {
@@ -598,6 +659,8 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
         return ir.ImageSampleImplicitLod(handle, coords, bias, offset, inst_info);
     }();
     inst.ReplaceUsesWith(new_inst);
+
+    PatchNormalization(*new_inst.Inst(), ir, image);
 }
 
 void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
@@ -724,6 +787,8 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
                image.GetType() == AmdGpu::ImageType::Color2DMsaaArray) {
         inst.SetArg(4, arg);
     }
+
+    PatchNormalization(inst, ir, image);
 }
 
 void PatchDataRingInstruction(IR::Block& block, IR::Inst& inst, Info& info,

diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h
@@ -26,8 +26,16 @@ struct TextureBufferSpecialization {
 };
 
 struct ImageSpecialization {
+    enum class NormalizationSign : u8 {
+        None,
+        Signed,
+        Unsigned,
+    };
+
     AmdGpu::ImageType type = AmdGpu::ImageType::Color2D;
     bool is_integer = false;
+    NormalizationSign normalization = NormalizationSign::None;
+    u32 normalized_components = 0;
 
     auto operator<=>(const ImageSpecialization&) const = default;
 };
@@ -73,12 +81,19 @@ struct StageSpecialization {
                      [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
                          spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
                      });
-        ForEachSharp(binding, images, info->images,
-                     [](auto& spec, const auto& desc, AmdGpu::Image sharp) {
-                         spec.type = sharp.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray
-                                                              : sharp.GetType();
-                         spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
-                     });
+        ForEachSharp(
+            binding, images, info->images, [](auto& spec, const auto& desc, AmdGpu::Image sharp) {
+                spec.type =
+                    sharp.IsPartialCubemap() ? AmdGpu::ImageType::Color2DArray : sharp.GetType();
+                spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
+
+                if (sharp.NeedsNormalizationPatch()) {
+                    spec.normalization = sharp.GetNumberFmt() == AmdGpu::NumberFormat::Snorm
+                                             ? ImageSpecialization::NormalizationSign::Signed
+                                             : ImageSpecialization::NormalizationSign::Unsigned;
+                    spec.normalized_components = AmdGpu::NumComponents(sharp.GetDataFmt());
+                }
+            });
         ForEachSharp(binding, fmasks, info->fmasks,
                      [](auto& spec, const auto& desc, AmdGpu::Image sharp) {
                          spec.width = sharp.width;

diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h
@@ -291,6 +291,22 @@ struct Image {
         return static_cast<TilingMode>(tiling_index);
     }
 
+    bool NeedsNormalizationPatch() const {
+        if (GetNumberFmt() == AmdGpu::NumberFormat::Unorm ||
+            GetNumberFmt() == AmdGpu::NumberFormat::Snorm) {
+            switch (GetDataFmt()) {
+            case AmdGpu::DataFormat::Format32:
+            case AmdGpu::DataFormat::Format32_32:
+            case AmdGpu::DataFormat::Format32_32_32:
+            case AmdGpu::DataFormat::Format32_32_32_32:
+                return true;
+            default:
+                return false;
+            }
+        }
+        return false;
+    }
+
     bool IsTiled() const {
         return GetTilingMode() != TilingMode::Display_Linear;
     }

diff --git a/src/video_core/renderer_vulkan/liverpool_to_vk.cpp b/src/video_core/renderer_vulkan/liverpool_to_vk.cpp
@@ -635,6 +635,33 @@ std::span<const SurfaceFormatInfo> SurfaceFormats() {
                                 vk::Format::eBc7UnormBlock),
         CreateSurfaceFormatInfo(AmdGpu::DataFormat::FormatBc7, AmdGpu::NumberFormat::Srgb,
                                 vk::Format::eBc7SrgbBlock),
+
+        /*
+         * Shaders reading/writing these formats need to be patched to normalize/unnormalize the
+         * values This is because the Vulkan doesn't support these formats directly
+         *
+         * see shader_recompiler/ir/passes/resource_tracking_pass.cpp
+         */
+        // 32
+        CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format32, AmdGpu::NumberFormat::Unorm,
+                                vk::Format::eR32Uint),
+        CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format32, AmdGpu::NumberFormat::Snorm,
+                                vk::Format::eR32Sint),
+        // 32_32
+        CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format32_32, AmdGpu::NumberFormat::Unorm,
+                                vk::Format::eR32G32Uint),
+        CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format32_32, AmdGpu::NumberFormat::Snorm,
+                                vk::Format::eR32G32Sint),
+        // 32_32_32
+        CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format32_32_32, AmdGpu::NumberFormat::Unorm,
+                                vk::Format::eR32G32B32Uint),
+        CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format32_32_32, AmdGpu::NumberFormat::Snorm,
+                                vk::Format::eR32G32B32Sint),
+        // 32_32_32_32
+        CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format32_32_32_32, AmdGpu::NumberFormat::Unorm,
+                                vk::Format::eR32G32B32A32Uint),
+        CreateSurfaceFormatInfo(AmdGpu::DataFormat::Format32_32_32_32, AmdGpu::NumberFormat::Snorm,
+                                vk::Format::eR32G32B32A32Sint),
     };
     return formats;
 }