From 20e0407235f3e98da7a9539f25cadf794eb9157f Mon Sep 17 00:00:00 2001
From: GPUCode <geoster3d@gmail.com>
Date: Tue, 26 Dec 2023 16:28:42 +0200
Subject: [PATCH] video_core: Fix buffer_row_length computation for linear
 compressed textures

---
 src/video_core/texture_cache/decode_bc.cpp | 50 ++++++++++++----------
 src/video_core/texture_cache/decode_bc.h   |  2 +-
 src/video_core/texture_cache/util.cpp      | 16 +++----
 3 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/src/video_core/texture_cache/decode_bc.cpp b/src/video_core/texture_cache/decode_bc.cpp
index 3e26474a3e..a018c6df46 100644
--- a/src/video_core/texture_cache/decode_bc.cpp
+++ b/src/video_core/texture_cache/decode_bc.cpp
@@ -60,66 +60,72 @@ u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format) {
 }
 
 template <auto decompress, PixelFormat pixel_format>
-void DecompressBlocks(std::span<const u8> input, std::span<u8> output, Extent3D extent,
+void DecompressBlocks(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy,
                       bool is_signed = false) {
     const u32 out_bpp = ConvertedBytesPerBlock(pixel_format);
-    const u32 block_width = std::min(extent.width, BLOCK_SIZE);
-    const u32 block_height = std::min(extent.height, BLOCK_SIZE);
-    const u32 pitch = extent.width * out_bpp;
+    const u32 block_size = BlockSize(pixel_format);
+    const u32 width = copy.image_extent.width;
+    const u32 height = copy.image_extent.height * copy.image_subresource.num_layers;
+    const u32 depth = copy.image_extent.depth;
+    const u32 block_width = std::min(width, BLOCK_SIZE);
+    const u32 block_height = std::min(height, BLOCK_SIZE);
+    const u32 pitch = width * out_bpp;
     size_t input_offset = 0;
     size_t output_offset = 0;
-    for (u32 slice = 0; slice < extent.depth; ++slice) {
-        for (u32 y = 0; y < extent.height; y += block_height) {
-            size_t row_offset = 0;
-            for (u32 x = 0; x < extent.width;
-                 x += block_width, row_offset += block_width * out_bpp) {
-                const u8* src = input.data() + input_offset;
-                u8* const dst = output.data() + output_offset + row_offset;
+    for (u32 slice = 0; slice < depth; ++slice) {
+        for (u32 y = 0; y < height; y += block_height) {
+            size_t src_offset = input_offset;
+            size_t dst_offset = output_offset;
+            for (u32 x = 0; x < width; x += block_width) {
+                const u8* src = input.data() + src_offset;
+                u8* const dst = output.data() + dst_offset;
                 if constexpr (IsSigned(pixel_format)) {
-                    decompress(src, dst, x, y, extent.width, extent.height, is_signed);
+                    decompress(src, dst, x, y, width, height, is_signed);
                 } else {
-                    decompress(src, dst, x, y, extent.width, extent.height);
+                    decompress(src, dst, x, y, width, height);
                 }
-                input_offset += BlockSize(pixel_format);
+                src_offset += block_size;
+                dst_offset += block_width * out_bpp;
             }
+            input_offset += copy.buffer_row_length * block_size / block_width;
             output_offset += block_height * pitch;
         }
     }
 }
 
-void DecompressBCn(std::span<const u8> input, std::span<u8> output, Extent3D extent,
+void DecompressBCn(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy,
                    VideoCore::Surface::PixelFormat pixel_format) {
     switch (pixel_format) {
     case PixelFormat::BC1_RGBA_UNORM:
     case PixelFormat::BC1_RGBA_SRGB:
-        DecompressBlocks<bcn::DecodeBc1, PixelFormat::BC1_RGBA_UNORM>(input, output, extent);
+        DecompressBlocks<bcn::DecodeBc1, PixelFormat::BC1_RGBA_UNORM>(input, output, copy);
         break;
     case PixelFormat::BC2_UNORM:
     case PixelFormat::BC2_SRGB:
-        DecompressBlocks<bcn::DecodeBc2, PixelFormat::BC2_UNORM>(input, output, extent);
+        DecompressBlocks<bcn::DecodeBc2, PixelFormat::BC2_UNORM>(input, output, copy);
         break;
     case PixelFormat::BC3_UNORM:
     case PixelFormat::BC3_SRGB:
-        DecompressBlocks<bcn::DecodeBc3, PixelFormat::BC3_UNORM>(input, output, extent);
+        DecompressBlocks<bcn::DecodeBc3, PixelFormat::BC3_UNORM>(input, output, copy);
         break;
     case PixelFormat::BC4_SNORM:
     case PixelFormat::BC4_UNORM:
         DecompressBlocks<bcn::DecodeBc4, PixelFormat::BC4_UNORM>(
-            input, output, extent, pixel_format == PixelFormat::BC4_SNORM);
+            input, output, copy, pixel_format == PixelFormat::BC4_SNORM);
         break;
     case PixelFormat::BC5_SNORM:
     case PixelFormat::BC5_UNORM:
         DecompressBlocks<bcn::DecodeBc5, PixelFormat::BC5_UNORM>(
-            input, output, extent, pixel_format == PixelFormat::BC5_SNORM);
+            input, output, copy, pixel_format == PixelFormat::BC5_SNORM);
         break;
     case PixelFormat::BC6H_SFLOAT:
     case PixelFormat::BC6H_UFLOAT:
         DecompressBlocks<bcn::DecodeBc6, PixelFormat::BC6H_UFLOAT>(
-            input, output, extent, pixel_format == PixelFormat::BC6H_SFLOAT);
+            input, output, copy, pixel_format == PixelFormat::BC6H_SFLOAT);
         break;
     case PixelFormat::BC7_SRGB:
     case PixelFormat::BC7_UNORM:
-        DecompressBlocks<bcn::DecodeBc7, PixelFormat::BC7_UNORM>(input, output, extent);
+        DecompressBlocks<bcn::DecodeBc7, PixelFormat::BC7_UNORM>(input, output, copy);
         break;
     default:
         LOG_WARNING(HW_GPU, "Unimplemented BCn decompression {}", pixel_format);
diff --git a/src/video_core/texture_cache/decode_bc.h b/src/video_core/texture_cache/decode_bc.h
index 41d1ec0a3a..4e3b9b8ace 100644
--- a/src/video_core/texture_cache/decode_bc.h
+++ b/src/video_core/texture_cache/decode_bc.h
@@ -13,7 +13,7 @@ namespace VideoCommon {
 
 [[nodiscard]] u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format);
 
-void DecompressBCn(std::span<const u8> input, std::span<u8> output, Extent3D extent,
+void DecompressBCn(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy,
                    VideoCore::Surface::PixelFormat pixel_format);
 
 } // namespace VideoCommon
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 15596c9250..fcf70068ec 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -837,6 +837,7 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory
                                                                    std::span<u8> output) {
     const size_t guest_size_bytes = input.size_bytes();
     const u32 bpp_log2 = BytesPerBlockLog2(info.format);
+    const Extent2D tile_size = DefaultBlockSize(info.format);
     const Extent3D size = info.size;
 
     if (info.type == ImageType::Linear) {
@@ -847,7 +848,7 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory
         return {{
             .buffer_offset = 0,
             .buffer_size = guest_size_bytes,
-            .buffer_row_length = info.pitch >> bpp_log2,
+            .buffer_row_length = info.pitch * tile_size.width >> bpp_log2,
             .buffer_image_height = size.height,
             .image_subresource =
                 {
@@ -862,7 +863,6 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory
     const LevelInfo level_info = MakeLevelInfo(info);
     const s32 num_layers = info.resources.layers;
     const s32 num_levels = info.resources.levels;
-    const Extent2D tile_size = DefaultBlockSize(info.format);
     const std::array level_sizes = CalculateLevelSizes(level_info, num_levels);
     const Extent2D gob = GobSize(bpp_log2, info.block.height, info.tile_width_spacing);
     const u32 layer_size = CalculateLevelBytes(level_sizes, num_levels);
@@ -926,8 +926,6 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
 
         const auto input_offset = input.subspan(copy.buffer_offset);
         copy.buffer_offset = output_offset;
-        copy.buffer_row_length = mip_size.width;
-        copy.buffer_image_height = mip_size.height;
 
         const auto recompression_setting = Settings::values.astc_recompression.GetValue();
         const bool astc = IsPixelFormatASTC(info.format);
@@ -972,16 +970,14 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
                 bpp_div;
             output_offset += static_cast<u32>(copy.buffer_size);
         } else {
-            const Extent3D image_extent{
-                .width = copy.image_extent.width,
-                .height = copy.image_extent.height * copy.image_subresource.num_layers,
-                .depth = copy.image_extent.depth,
-            };
-            DecompressBCn(input_offset, output.subspan(output_offset), image_extent, info.format);
+            DecompressBCn(input_offset, output.subspan(output_offset), copy, info.format);
             output_offset += copy.image_extent.width * copy.image_extent.height *
                              copy.image_subresource.num_layers *
                              ConvertedBytesPerBlock(info.format);
         }
+
+        copy.buffer_row_length = mip_size.width;
+        copy.buffer_image_height = mip_size.height;
     }
 }
 
-- 
GitLab