diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 9e90c587c48e0ae9d974d3345a48a41bd95bb8c6..9b2698fad23210c540f16564d3b07e8ba7c7020f 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -544,7 +544,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
         it++;
     }
 
-    boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
+    boost::container::small_vector<std::pair<BufferCopy, BufferId>, 16> downloads;
     u64 total_size_bytes = 0;
     u64 largest_copy = 0;
     for (const IntervalSet& intervals : committed_ranges) {
@@ -914,6 +914,11 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
 
         const u32 offset = buffer.Offset(binding.cpu_addr);
         const bool is_written = ((channel_state->written_storage_buffers[stage] >> index) & 1) != 0;
+
+        if (is_written) {
+            MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
+        }
+
         if constexpr (NEEDS_BIND_STORAGE_INDEX) {
             runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written);
             ++binding_index;
@@ -931,6 +936,11 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
         const u32 size = binding.size;
         SynchronizeBuffer(buffer, binding.cpu_addr, size);
 
+        const bool is_written = ((channel_state->written_texture_buffers[stage] >> index) & 1) != 0;
+        if (is_written) {
+            MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
+        }
+
         const u32 offset = buffer.Offset(binding.cpu_addr);
         const PixelFormat format = binding.format;
         if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
@@ -962,6 +972,8 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
         const u32 size = binding.size;
         SynchronizeBuffer(buffer, binding.cpu_addr, size);
 
+        MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
+
         const u32 offset = buffer.Offset(binding.cpu_addr);
         host_bindings.buffers.push_back(&buffer);
         host_bindings.offsets.push_back(offset);
@@ -1011,6 +1023,11 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
         const u32 offset = buffer.Offset(binding.cpu_addr);
         const bool is_written =
             ((channel_state->written_compute_storage_buffers >> index) & 1) != 0;
+
+        if (is_written) {
+            MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
+        }
+
         if constexpr (NEEDS_BIND_STORAGE_INDEX) {
             runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written);
             ++binding_index;
@@ -1028,6 +1045,12 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
         const u32 size = binding.size;
         SynchronizeBuffer(buffer, binding.cpu_addr, size);
 
+        const bool is_written =
+            ((channel_state->written_compute_texture_buffers >> index) & 1) != 0;
+        if (is_written) {
+            MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, size);
+        }
+
         const u32 offset = buffer.Offset(binding.cpu_addr);
         const PixelFormat format = binding.format;
         if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
@@ -1201,16 +1224,11 @@ void BufferCache<P>::UpdateUniformBuffers(size_t stage) {
 
 template <class P>
 void BufferCache<P>::UpdateStorageBuffers(size_t stage) {
-    const u32 written_mask = channel_state->written_storage_buffers[stage];
     ForEachEnabledBit(channel_state->enabled_storage_buffers[stage], [&](u32 index) {
         // Resolve buffer
         Binding& binding = channel_state->storage_buffers[stage][index];
         const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
         binding.buffer_id = buffer_id;
-        // Mark buffer as written if needed
-        if (((written_mask >> index) & 1) != 0) {
-            MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size);
-        }
     });
 }
 
@@ -1219,10 +1237,6 @@ void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
     ForEachEnabledBit(channel_state->enabled_texture_buffers[stage], [&](u32 index) {
         Binding& binding = channel_state->texture_buffers[stage][index];
         binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
-        // Mark buffer as written if needed
-        if (((channel_state->written_texture_buffers[stage] >> index) & 1) != 0) {
-            MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
-        }
     });
 }
 
@@ -1252,7 +1266,6 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
         .size = size,
         .buffer_id = buffer_id,
     };
-    MarkWrittenBuffer(buffer_id, *cpu_addr, size);
 }
 
 template <class P>
@@ -1279,10 +1292,6 @@ void BufferCache<P>::UpdateComputeStorageBuffers() {
         // Resolve buffer
         Binding& binding = channel_state->compute_storage_buffers[index];
         binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
-        // Mark as written if needed
-        if (((channel_state->written_compute_storage_buffers >> index) & 1) != 0) {
-            MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
-        }
     });
 }
 
@@ -1291,18 +1300,11 @@ void BufferCache<P>::UpdateComputeTextureBuffers() {
     ForEachEnabledBit(channel_state->enabled_compute_texture_buffers, [&](u32 index) {
         Binding& binding = channel_state->compute_texture_buffers[index];
         binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
-        // Mark as written if needed
-        if (((channel_state->written_compute_texture_buffers >> index) & 1) != 0) {
-            MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
-        }
     });
 }
 
 template <class P>
 void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) {
-    if (memory_tracker.IsRegionCpuModified(cpu_addr, size)) {
-        SynchronizeBuffer(slot_buffers[buffer_id], cpu_addr, size);
-    }
     memory_tracker.MarkRegionAsGpuModified(cpu_addr, size);
 
     const IntervalType base_interval{cpu_addr, cpu_addr + size};