diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/macro/macro_jit_x64.h | 2 | ||||
-rw-r--r-- | src/video_core/memory_manager.cpp | 1 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 | ||||
-rw-r--r-- | src/video_core/textures/decoders.cpp | 173 | ||||
-rw-r--r-- | src/video_core/textures/texture.h | 2 | ||||
-rw-r--r-- | src/video_core/vulkan_common/vulkan_device.cpp | 16 | ||||
-rw-r--r-- | src/video_core/vulkan_common/vulkan_device.h | 10 |
7 files changed, 138 insertions, 68 deletions
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h index 7f50ac2f8..d03d480b4 100644 --- a/src/video_core/macro/macro_jit_x64.h +++ b/src/video_core/macro/macro_jit_x64.h @@ -6,7 +6,7 @@ #include <array> #include <bitset> -#include <xbyak.h> +#include <xbyak/xbyak.h> #include "common/bit_field.h" #include "common/common_types.h" #include "common/x64/xbyak_abi.h" diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 882eff880..c60ed6453 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -463,6 +463,7 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( ++page_index; page_offset = 0; remaining_size -= num_bytes; + old_page_addr = page_addr; } split(); return result; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a37ca1fdf..f316c4f92 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -281,7 +281,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw .supported_spirv = device.IsKhrSpirv1_4Supported() ? 0x00010400U : 0x00010000U, .unified_descriptor_binding = true, .support_descriptor_aliasing = true, - .support_int8 = true, + .support_int8 = device.IsInt8Supported(), .support_int16 = device.IsShaderInt16Supported(), .support_int64 = device.IsShaderInt64Supported(), .support_vertex_instance_id = false, diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index c32ae956a..c010b9353 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -84,56 +84,31 @@ template <bool TO_LINEAR> void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, u32 stride_alignment) { switch (bytes_per_pixel) { - case 1: - return SwizzleImpl<TO_LINEAR, 1>(output, input, width, height, depth, block_height, +#define BPP_CASE(x) \ + case x: \ + return SwizzleImpl<TO_LINEAR, x>(output, input, width, height, depth, block_height, \ block_depth, stride_alignment); - case 2: - return SwizzleImpl<TO_LINEAR, 2>(output, input, width, height, depth, block_height, - block_depth, stride_alignment); - case 3: - return SwizzleImpl<TO_LINEAR, 3>(output, input, width, height, depth, block_height, - block_depth, stride_alignment); - case 4: - return SwizzleImpl<TO_LINEAR, 4>(output, input, width, height, depth, block_height, - block_depth, stride_alignment); - case 6: - return SwizzleImpl<TO_LINEAR, 6>(output, input, width, height, depth, block_height, - block_depth, stride_alignment); - case 8: - return SwizzleImpl<TO_LINEAR, 8>(output, input, width, height, depth, block_height, - block_depth, stride_alignment); - case 12: - return SwizzleImpl<TO_LINEAR, 12>(output, input, width, height, depth, block_height, - block_depth, stride_alignment); - case 16: - return SwizzleImpl<TO_LINEAR, 16>(output, input, width, height, depth, block_height, - block_depth, stride_alignment); + BPP_CASE(1) + BPP_CASE(2) + BPP_CASE(3) + BPP_CASE(4) + BPP_CASE(6) + BPP_CASE(8) + BPP_CASE(12) + BPP_CASE(16) +#undef BPP_CASE default: UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel); } } -} // Anonymous namespace - -void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, - u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, - u32 stride_alignment) { - Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, - stride_alignment); -} - -void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, - u32 height, u32 depth, u32 block_height, u32 block_depth, - u32 stride_alignment) { - Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, - stride_alignment); -} +template <u32 BYTES_PER_PIXEL> void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, - u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data, - u32 block_height_bit, u32 offset_x, u32 offset_y) { + u8* swizzled_data, const u8* unswizzled_data, u32 block_height_bit, + u32 offset_x, u32 offset_y) { const u32 block_height = 1U << block_height_bit; const u32 image_width_in_gobs = - (swizzled_width * bytes_per_pixel + (GOB_SIZE_X - 1)) / GOB_SIZE_X; + (swizzled_width * BYTES_PER_PIXEL + (GOB_SIZE_X - 1)) / GOB_SIZE_X; for (u32 line = 0; line < subrect_height; ++line) { const u32 dst_y = line + offset_y; const u32 gob_address_y = @@ -143,20 +118,21 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 for (u32 x = 0; x < subrect_width; ++x) { const u32 dst_x = x + offset_x; const u32 gob_address = - gob_address_y + (dst_x * bytes_per_pixel / GOB_SIZE_X) * GOB_SIZE * block_height; - const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % GOB_SIZE_X]; - const u32 unswizzled_offset = line * source_pitch + x * bytes_per_pixel; + gob_address_y + (dst_x * BYTES_PER_PIXEL / GOB_SIZE_X) * GOB_SIZE * block_height; + const u32 swizzled_offset = gob_address + table[(dst_x * BYTES_PER_PIXEL) % GOB_SIZE_X]; + const u32 unswizzled_offset = line * source_pitch + x * BYTES_PER_PIXEL; const u8* const source_line = unswizzled_data + unswizzled_offset; u8* const dest_addr = swizzled_data + swizzled_offset; - std::memcpy(dest_addr, source_line, bytes_per_pixel); + std::memcpy(dest_addr, source_line, BYTES_PER_PIXEL); } } } -void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel, - u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) { - const u32 stride = width * bytes_per_pixel; +template <u32 BYTES_PER_PIXEL> +void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 block_height, + u32 origin_x, u32 origin_y, u8* output, const u8* input) { + const u32 stride = width * BYTES_PER_PIXEL; const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X; const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height); @@ -171,24 +147,25 @@ void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, const u32 src_offset_y = (block_y >> block_height) * block_size + ((block_y & block_height_mask) << GOB_SIZE_SHIFT); for (u32 column = 0; column < line_length_in; ++column) { - const u32 src_x = (column + origin_x) * bytes_per_pixel; + const u32 src_x = (column + origin_x) * BYTES_PER_PIXEL; const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift; const u32 swizzled_offset = src_offset_y + src_offset_x + table[src_x % GOB_SIZE_X]; - const u32 unswizzled_offset = line * pitch + column * bytes_per_pixel; + const u32 unswizzled_offset = line * pitch + column * BYTES_PER_PIXEL; - std::memcpy(output + unswizzled_offset, input + swizzled_offset, bytes_per_pixel); + std::memcpy(output + unswizzled_offset, input + swizzled_offset, BYTES_PER_PIXEL); } } } +template <u32 BYTES_PER_PIXEL> void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height, - u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x, - u32 origin_y, u8* output, const u8* input) { + u32 block_height, u32 block_depth, u32 origin_x, u32 origin_y, u8* output, + const u8* input) { UNIMPLEMENTED_IF(origin_x > 0); UNIMPLEMENTED_IF(origin_y > 0); - const u32 stride = width * bytes_per_pixel; + const u32 stride = width * BYTES_PER_PIXEL; const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X; const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); @@ -203,11 +180,93 @@ void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 widt for (u32 x = 0; x < line_length_in; ++x) { const u32 dst_offset = ((x / GOB_SIZE_X) << x_shift) + dst_offset_y + table[x % GOB_SIZE_X]; - const u32 src_offset = x * bytes_per_pixel + line * pitch; - std::memcpy(output + dst_offset, input + src_offset, bytes_per_pixel); + const u32 src_offset = x * BYTES_PER_PIXEL + line * pitch; + std::memcpy(output + dst_offset, input + src_offset, BYTES_PER_PIXEL); } } } +} // Anonymous namespace + +void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, + u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, + u32 stride_alignment) { + Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, + stride_alignment); +} + +void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, + u32 height, u32 depth, u32 block_height, u32 block_depth, + u32 stride_alignment) { + Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, + stride_alignment); +} + +void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, + u32 bytes_per_pixel, u8* swizzled_data, const u8* unswizzled_data, + u32 block_height_bit, u32 offset_x, u32 offset_y) { + switch (bytes_per_pixel) { +#define BPP_CASE(x) \ + case x: \ + return SwizzleSubrect<x>(subrect_width, subrect_height, source_pitch, swizzled_width, \ + swizzled_data, unswizzled_data, block_height_bit, offset_x, \ + offset_y); + BPP_CASE(1) + BPP_CASE(2) + BPP_CASE(3) + BPP_CASE(4) + BPP_CASE(6) + BPP_CASE(8) + BPP_CASE(12) + BPP_CASE(16) +#undef BPP_CASE + default: + UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel); + } +} + +void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel, + u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) { + switch (bytes_per_pixel) { +#define BPP_CASE(x) \ + case x: \ + return UnswizzleSubrect<x>(line_length_in, line_count, pitch, width, block_height, \ + origin_x, origin_y, output, input); + BPP_CASE(1) + BPP_CASE(2) + BPP_CASE(3) + BPP_CASE(4) + BPP_CASE(6) + BPP_CASE(8) + BPP_CASE(12) + BPP_CASE(16) +#undef BPP_CASE + default: + UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel); + } +} + +void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 height, + u32 bytes_per_pixel, u32 block_height, u32 block_depth, u32 origin_x, + u32 origin_y, u8* output, const u8* input) { + switch (bytes_per_pixel) { +#define BPP_CASE(x) \ + case x: \ + return SwizzleSliceToVoxel<x>(line_length_in, line_count, pitch, width, height, \ + block_height, block_depth, origin_x, origin_y, output, \ + input); + BPP_CASE(1) + BPP_CASE(2) + BPP_CASE(3) + BPP_CASE(4) + BPP_CASE(6) + BPP_CASE(8) + BPP_CASE(12) + BPP_CASE(16) +#undef BPP_CASE + default: + UNREACHABLE_MSG("Invalid bytes_per_pixel={}", bytes_per_pixel); + } +} void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, const u32 block_height_bit, const std::size_t copy_size, const u8* source_data, @@ -228,7 +287,7 @@ void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 u8* dest_addr = swizzle_data + swizzled_offset; count++; - std::memcpy(dest_addr, source_line, 1); + *dest_addr = *source_line; } } } diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 1a9399455..7994cb859 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -159,7 +159,7 @@ static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); return {raw, raw}; } else { const Tegra::Texture::TextureHandle handle{raw}; - return {handle.tic_id, via_header_index ? handle.tic_id : handle.tsc_id}; + return {handle.tic_id, handle.tsc_id}; } } diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 8e56a89e1..86ca4be54 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -368,18 +368,21 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR }; SetNext(next, demote); - VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8; - if (is_float16_supported) { - float16_int8 = { + if (is_int8_supported || is_float16_supported) { + VkPhysicalDeviceFloat16Int8FeaturesKHR float16_int8{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR, .pNext = nullptr, - .shaderFloat16 = true, - .shaderInt8 = false, + .shaderFloat16 = is_float16_supported, + .shaderInt8 = is_int8_supported, }; SetNext(next, float16_int8); - } else { + } + if (!is_float16_supported) { LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively"); } + if (!is_int8_supported) { + LOG_INFO(Render_Vulkan, "Device doesn't support int8 natively"); + } if (!nv_viewport_swizzle) { LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles"); @@ -909,6 +912,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { physical.GetFeatures2KHR(features); is_float16_supported = float16_int8_features.shaderFloat16; + is_int8_supported = float16_int8_features.shaderInt8; extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); } if (has_ext_subgroup_size_control) { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index c19f40746..234d74129 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -139,11 +139,16 @@ public: return is_optimal_astc_supported; } - /// Returns true if the device supports float16 natively + /// Returns true if the device supports float16 natively. bool IsFloat16Supported() const { return is_float16_supported; } + /// Returns true if the device supports int8 natively. + bool IsInt8Supported() const { + return is_int8_supported; + } + /// Returns true if the device warp size can potentially be bigger than guest's warp size. bool IsWarpSizePotentiallyBiggerThanGuest() const { return is_warp_potentially_bigger; @@ -367,7 +372,8 @@ private: u64 device_access_memory{}; ///< Total size of device local memory in bytes. u32 max_push_descriptors{}; ///< Maximum number of push descriptors bool is_optimal_astc_supported{}; ///< Support for native ASTC. - bool is_float16_supported{}; ///< Support for float16 arithmetics. + bool is_float16_supported{}; ///< Support for float16 arithmetic. + bool is_int8_supported{}; ///< Support for int8 arithmetic. bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest. bool is_formatless_image_load_supported{}; ///< Support for shader image read without format. bool is_depth_bounds_supported{}; ///< Support for depth bounds. |