diff options
Diffstat (limited to 'src/video_core')
21 files changed, 213 insertions, 177 deletions
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 26939be3f..6ea7cc6a5 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -542,7 +542,7 @@ public: BitField<12, 1, InvMemoryLayout> type; } memory_layout; union { - BitField<0, 16, u32> array_mode; + BitField<0, 16, u32> layers; BitField<16, 1, u32> volume; }; u32 layer_stride; @@ -800,8 +800,12 @@ public: u32 zeta_width; u32 zeta_height; + union { + BitField<0, 16, u32> zeta_layers; + BitField<16, 1, u32> zeta_volume; + }; - INSERT_UNION_PADDING_WORDS(0x27); + INSERT_UNION_PADDING_WORDS(0x26); u32 depth_test_enable; @@ -1507,6 +1511,7 @@ ASSERT_REG_POSITION(vertex_attrib_format, 0x458); ASSERT_REG_POSITION(rt_control, 0x487); ASSERT_REG_POSITION(zeta_width, 0x48a); ASSERT_REG_POSITION(zeta_height, 0x48b); +ASSERT_REG_POSITION(zeta_layers, 0x48c); ASSERT_REG_POSITION(depth_test_enable, 0x4B3); ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 7d7137109..e8f763ce9 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -140,71 +140,6 @@ void GPU::FlushCommands() { renderer.Rasterizer().FlushCommands(); } -u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { - ASSERT(format != RenderTargetFormat::NONE); - - switch (format) { - case RenderTargetFormat::RGBA32_FLOAT: - case RenderTargetFormat::RGBA32_UINT: - return 16; - case RenderTargetFormat::RGBA16_UINT: - case RenderTargetFormat::RGBA16_UNORM: - case RenderTargetFormat::RGBA16_FLOAT: - case RenderTargetFormat::RGBX16_FLOAT: - case RenderTargetFormat::RG32_FLOAT: - case RenderTargetFormat::RG32_UINT: - return 8; - case RenderTargetFormat::RGBA8_UNORM: - case RenderTargetFormat::RGBA8_SNORM: - case RenderTargetFormat::RGBA8_SRGB: - case RenderTargetFormat::RGBA8_UINT: - case RenderTargetFormat::RGB10_A2_UNORM: - case RenderTargetFormat::BGRA8_UNORM: - case RenderTargetFormat::BGRA8_SRGB: - case RenderTargetFormat::RG16_UNORM: - case RenderTargetFormat::RG16_SNORM: - case RenderTargetFormat::RG16_UINT: - case RenderTargetFormat::RG16_SINT: - case RenderTargetFormat::RG16_FLOAT: - case RenderTargetFormat::R32_FLOAT: - case RenderTargetFormat::R11G11B10_FLOAT: - case RenderTargetFormat::R32_UINT: - return 4; - case RenderTargetFormat::R16_UNORM: - case RenderTargetFormat::R16_SNORM: - case RenderTargetFormat::R16_UINT: - case RenderTargetFormat::R16_SINT: - case RenderTargetFormat::R16_FLOAT: - case RenderTargetFormat::RG8_UNORM: - case RenderTargetFormat::RG8_SNORM: - return 2; - case RenderTargetFormat::R8_UNORM: - case RenderTargetFormat::R8_UINT: - return 1; - default: - UNIMPLEMENTED_MSG("Unimplemented render target format {}", static_cast<u32>(format)); - return 1; - } -} - -u32 DepthFormatBytesPerPixel(DepthFormat format) { - switch (format) { - case DepthFormat::Z32_S8_X24_FLOAT: - return 8; - case DepthFormat::Z32_FLOAT: - case DepthFormat::S8_Z24_UNORM: - case DepthFormat::Z24_X8_UNORM: - case DepthFormat::Z24_S8_UNORM: - case DepthFormat::Z24_C8_UNORM: - return 4; - case DepthFormat::Z16_UNORM: - return 2; - default: - UNIMPLEMENTED_MSG("Unimplemented Depth format {}", static_cast<u32>(format)); - return 1; - } -} - // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. // So the values you see in docs might be multiplied by 4. diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 07727210c..ba8c9d665 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -57,6 +57,7 @@ enum class RenderTargetFormat : u32 { RG16_UINT = 0xDD, RG16_FLOAT = 0xDE, R11G11B10_FLOAT = 0xE0, + R32_SINT = 0xE3, R32_UINT = 0xE4, R32_FLOAT = 0xE5, B5G6R5_UNORM = 0xE8, @@ -82,12 +83,6 @@ enum class DepthFormat : u32 { Z32_S8_X24_FLOAT = 0x19, }; -/// Returns the number of bytes per pixel of each rendertarget format. -u32 RenderTargetBytesPerPixel(RenderTargetFormat format); - -/// Returns the number of bytes per pixel of each depth format. -u32 DepthFormatBytesPerPixel(DepthFormat format); - struct CommandListHeader; class DebugContext; diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index 2f2fe6859..f2c83266e 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp @@ -85,6 +85,7 @@ static constexpr ConversionArray morton_to_linear_fns = { MortonCopy<true, PixelFormat::RG32UI>, MortonCopy<true, PixelFormat::RGBX16F>, MortonCopy<true, PixelFormat::R32UI>, + MortonCopy<true, PixelFormat::R32I>, MortonCopy<true, PixelFormat::ASTC_2D_8X8>, MortonCopy<true, PixelFormat::ASTC_2D_8X5>, MortonCopy<true, PixelFormat::ASTC_2D_5X4>, @@ -166,6 +167,7 @@ static constexpr ConversionArray linear_to_morton_fns = { MortonCopy<false, PixelFormat::RG32UI>, MortonCopy<false, PixelFormat::RGBX16F>, MortonCopy<false, PixelFormat::R32UI>, + MortonCopy<false, PixelFormat::R32I>, nullptr, nullptr, nullptr, diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index d4b81cd87..cf934b0d8 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -87,6 +87,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI + {GL_R32I, GL_RED_INTEGER, GL_INT, false}, // R32I {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4 @@ -260,6 +261,13 @@ CachedSurface::~CachedSurface() = default; void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { MICROPROFILE_SCOPE(OpenGL_Texture_Download); + if (params.IsBuffer()) { + glGetNamedBufferSubData(texture_buffer.handle, 0, + static_cast<GLsizeiptr>(params.GetHostSizeInBytes()), + staging_buffer.data()); + return; + } + SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); for (u32 level = 0; level < params.emulated_levels; ++level) { @@ -398,24 +406,36 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p CachedSurfaceView::~CachedSurfaceView() = default; void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { - ASSERT(params.num_layers == 1 && params.num_levels == 1); + ASSERT(params.num_levels == 1); - const auto& owner_params = surface.GetSurfaceParams(); + const GLuint texture = surface.GetTexture(); + if (params.num_layers > 1) { + // Layered framebuffer attachments + UNIMPLEMENTED_IF(params.base_layer != 0); + + switch (params.target) { + case SurfaceTarget::Texture2DArray: + glFramebufferTexture(target, attachment, texture, params.base_level); + break; + default: + UNIMPLEMENTED(); + } + return; + } - switch (owner_params.target) { + const GLenum view_target = surface.GetTarget(); + switch (surface.GetSurfaceParams().target) { case SurfaceTarget::Texture1D: - glFramebufferTexture1D(target, attachment, surface.GetTarget(), surface.GetTexture(), - params.base_level); + glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level); break; case SurfaceTarget::Texture2D: - glFramebufferTexture2D(target, attachment, surface.GetTarget(), surface.GetTexture(), - params.base_level); + glFramebufferTexture2D(target, attachment, view_target, texture, params.base_level); break; case SurfaceTarget::Texture1DArray: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubemap: case SurfaceTarget::TextureCubeArray: - glFramebufferTextureLayer(target, attachment, surface.GetTexture(), params.base_level, + glFramebufferTextureLayer(target, attachment, texture, params.base_level, params.base_layer); break; default: diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 331808113..ef66dd141 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -159,12 +159,13 @@ struct FormatTuple { {vk::Format::eR32G32Uint, Attachable | Storage}, // RG32UI {vk::Format::eUndefined, {}}, // RGBX16F {vk::Format::eR32Uint, Attachable | Storage}, // R32UI + {vk::Format::eR32Sint, Attachable | Storage}, // R32I {vk::Format::eAstc8x8UnormBlock, {}}, // ASTC_2D_8X8 {vk::Format::eUndefined, {}}, // ASTC_2D_8X5 {vk::Format::eUndefined, {}}, // ASTC_2D_5X4 {vk::Format::eUndefined, {}}, // BGRA8_SRGB {vk::Format::eBc1RgbaSrgbBlock, {}}, // DXT1_SRGB - {vk::Format::eUndefined, {}}, // DXT23_SRGB + {vk::Format::eBc2SrgbBlock, {}}, // DXT23_SRGB {vk::Format::eBc3SrgbBlock, {}}, // DXT45_SRGB {vk::Format::eBc7SrgbBlock, {}}, // BC7U_SRGB {vk::Format::eR4G4B4A4UnormPack16, Attachable}, // R4G4B4A4U @@ -363,6 +364,8 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr return vk::Format::eR8G8B8A8Uint; case Maxwell::VertexAttribute::Size::Size_32: return vk::Format::eR32Uint; + case Maxwell::VertexAttribute::Size::Size_32_32_32_32: + return vk::Format::eR32G32B32A32Uint; default: break; } diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 588a6835f..886bde3b9 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -107,6 +107,8 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan features.occlusionQueryPrecise = true; features.fragmentStoresAndAtomics = true; features.shaderImageGatherExtended = true; + features.shaderStorageImageReadWithoutFormat = + is_shader_storage_img_read_without_format_supported; features.shaderStorageImageWriteWithoutFormat = true; features.textureCompressionASTC_LDR = is_optimal_astc_supported; @@ -465,6 +467,8 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { const auto supported_features{physical.getFeatures(dldi)}; + is_shader_storage_img_read_without_format_supported = + supported_features.shaderStorageImageReadWithoutFormat; is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); } @@ -519,6 +523,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti vk::Format::eB10G11R11UfloatPack32, vk::Format::eR32Sfloat, vk::Format::eR32Uint, + vk::Format::eR32Sint, vk::Format::eR16Sfloat, vk::Format::eR16G16B16A16Sfloat, vk::Format::eB8G8R8A8Unorm, @@ -538,6 +543,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti vk::Format::eBc6HUfloatBlock, vk::Format::eBc6HSfloatBlock, vk::Format::eBc1RgbaSrgbBlock, + vk::Format::eBc2SrgbBlock, vk::Format::eBc3SrgbBlock, vk::Format::eBc7SrgbBlock, vk::Format::eAstc4x4SrgbBlock, diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 72603f9f6..2c27ad730 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -122,6 +122,11 @@ public: return properties.limits.maxPushConstantsSize; } + /// Returns true if Shader storage Image Read Without Format supported. + bool IsShaderStorageImageReadWithoutFormatSupported() const { + return is_shader_storage_img_read_without_format_supported; + } + /// Returns true if ASTC is natively supported. bool IsOptimalAstcSupported() const { return is_optimal_astc_supported; @@ -227,6 +232,8 @@ private: bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted. bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer. bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints. + bool is_shader_storage_img_read_without_format_supported{}; ///< Support for shader storage + ///< image read without format // Telemetry parameters std::string vendor_name; ///< Device's driver name. diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 31c078f6a..3bf86da87 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -611,33 +611,34 @@ bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachmen std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffers( vk::RenderPass renderpass) { FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(), - std::numeric_limits<u32>::max()}; + std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()}; - const auto MarkAsModifiedAndPush = [&](const View& view) { - if (view == nullptr) { + const auto try_push = [&](const View& view) { + if (!view) { return false; } key.views.push_back(view->GetHandle()); key.width = std::min(key.width, view->GetWidth()); key.height = std::min(key.height, view->GetHeight()); + key.layers = std::min(key.layers, view->GetNumLayers()); return true; }; for (std::size_t index = 0; index < std::size(color_attachments); ++index) { - if (MarkAsModifiedAndPush(color_attachments[index])) { + if (try_push(color_attachments[index])) { texture_cache.MarkColorBufferInUse(index); } } - if (MarkAsModifiedAndPush(zeta_attachment)) { + if (try_push(zeta_attachment)) { texture_cache.MarkDepthBufferInUse(); } const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key); auto& framebuffer = fbentry->second; if (is_cache_miss) { - const vk::FramebufferCreateInfo framebuffer_ci({}, key.renderpass, - static_cast<u32>(key.views.size()), - key.views.data(), key.width, key.height, 1); + const vk::FramebufferCreateInfo framebuffer_ci( + {}, key.renderpass, static_cast<u32>(key.views.size()), key.views.data(), key.width, + key.height, key.layers); const auto dev = device.GetLogical(); const auto& dld = device.GetDispatchLoader(); framebuffer = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 138903d60..4dc8af6e8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -56,6 +56,7 @@ struct FramebufferCacheKey { vk::RenderPass renderpass{}; u32 width = 0; u32 height = 0; + u32 layers = 0; ImageViewsPack views; std::size_t Hash() const noexcept { @@ -66,12 +67,17 @@ struct FramebufferCacheKey { } boost::hash_combine(hash, width); boost::hash_combine(hash, height); + boost::hash_combine(hash, layers); return hash; } bool operator==(const FramebufferCacheKey& rhs) const noexcept { - return std::tie(renderpass, views, width, height) == - std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height); + return std::tie(renderpass, views, width, height, layers) == + std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height, rhs.layers); + } + + bool operator!=(const FramebufferCacheKey& rhs) const noexcept { + return !operator==(rhs); } }; diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp index 0a8ec8398..204b7c39c 100644 --- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp @@ -23,7 +23,14 @@ static std::optional<vk::BorderColor> TryConvertBorderColor(std::array<float, 4> } else if (color == std::array<float, 4>{1, 1, 1, 1}) { return vk::BorderColor::eFloatOpaqueWhite; } else { - return {}; + if (color[0] + color[1] + color[2] > 1.35f) { + // If color elements are brighter than roughly 0.5 average, use white border + return vk::BorderColor::eFloatOpaqueWhite; + } + if (color[3] > 0.5f) { + return vk::BorderColor::eFloatOpaqueBlack; + } + return vk::BorderColor::eFloatTransparentBlack; } } @@ -37,8 +44,6 @@ UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const auto border_color{tsc.GetBorderColor()}; const auto vk_border_color{TryConvertBorderColor(border_color)}; - UNIMPLEMENTED_IF_MSG(!vk_border_color, "Unimplemented border color {} {} {} {}", - border_color[0], border_color[1], border_color[2], border_color[3]); constexpr bool unnormalized_coords{false}; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index f64f5da28..2da622d15 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -86,6 +86,7 @@ struct AttributeType { struct VertexIndices { std::optional<u32> position; + std::optional<u32> layer; std::optional<u32> viewport; std::optional<u32> point_size; std::optional<u32> clip_distances; @@ -284,14 +285,20 @@ public: AddExtension("SPV_KHR_variable_pointers"); AddExtension("SPV_KHR_shader_draw_parameters"); - if (ir.UsesViewportIndex()) { - AddCapability(spv::Capability::MultiViewport); - if (device.IsExtShaderViewportIndexLayerSupported()) { + if (ir.UsesLayer() || ir.UsesViewportIndex()) { + if (ir.UsesViewportIndex()) { + AddCapability(spv::Capability::MultiViewport); + } + if (stage != ShaderType::Geometry && device.IsExtShaderViewportIndexLayerSupported()) { AddExtension("SPV_EXT_shader_viewport_index_layer"); AddCapability(spv::Capability::ShaderViewportIndexLayerEXT); } } + if (device.IsShaderStorageImageReadWithoutFormatSupported()) { + AddCapability(spv::Capability::StorageImageReadWithoutFormat); + } + if (device.IsFloat16Supported()) { AddCapability(spv::Capability::Float16); } @@ -924,13 +931,22 @@ private: VertexIndices indices; indices.position = AddBuiltIn(t_float4, spv::BuiltIn::Position, "position"); + if (ir.UsesLayer()) { + if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) { + indices.layer = AddBuiltIn(t_int, spv::BuiltIn::Layer, "layer"); + } else { + LOG_ERROR( + Render_Vulkan, + "Shader requires Layer but it's not supported on this stage with this device."); + } + } + if (ir.UsesViewportIndex()) { if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) { indices.viewport = AddBuiltIn(t_int, spv::BuiltIn::ViewportIndex, "viewport_index"); } else { - LOG_ERROR(Render_Vulkan, - "Shader requires ViewportIndex but it's not supported on this " - "stage with this device."); + LOG_ERROR(Render_Vulkan, "Shader requires ViewportIndex but it's not supported on " + "this stage with this device."); } } @@ -1292,6 +1308,13 @@ private: } case Attribute::Index::LayerViewportPointSize: switch (element) { + case 1: { + if (!out_indices.layer) { + return {}; + } + const u32 index = out_indices.layer.value(); + return {AccessElement(t_out_int, out_vertex, index), Type::Int}; + } case 2: { if (!out_indices.viewport) { return {}; @@ -1362,6 +1385,11 @@ private: UNIMPLEMENTED(); } + if (!target.id) { + // On failure we return a nullptr target.id, skip these stores. + return {}; + } + OpStore(target.id, As(Visit(src), target.type)); return {}; } @@ -1755,8 +1783,16 @@ private: } Expression ImageLoad(Operation operation) { - UNIMPLEMENTED(); - return {}; + if (!device.IsShaderStorageImageReadWithoutFormatSupported()) { + return {v_float_zero, Type::Float}; + } + + const auto& meta{std::get<MetaImage>(operation.GetMeta())}; + + const Id coords = GetCoordinates(operation, Type::Int); + const Id texel = OpImageRead(t_uint4, GetImage(operation), coords); + + return {OpCompositeExtract(t_uint, texel, meta.element), Type::Uint}; } Expression ImageStore(Operation operation) { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index d3edbe80c..22e3d34de 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -151,6 +151,10 @@ public: return params.GetMipHeight(base_level); } + u32 GetNumLayers() const { + return num_layers; + } + bool IsBufferView() const { return buffer_view; } diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 542636430..bee7d8cad 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -527,7 +527,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, const bool is_bindless = bindless_reg.has_value(); UNIMPLEMENTED_IF(texture_type == TextureType::TextureCube && is_array && is_shadow); - ASSERT_MSG(texture_type != TextureType::Texture3D || is_array || is_shadow, + ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow, "Illegal texture type"); const SamplerInfo info{texture_type, is_array, is_shadow, false}; diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 1655ccf16..9707c353d 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -155,6 +155,8 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) return PixelFormat::R16I; case Tegra::RenderTargetFormat::R32_FLOAT: return PixelFormat::R32F; + case Tegra::RenderTargetFormat::R32_SINT: + return PixelFormat::R32I; case Tegra::RenderTargetFormat::R32_UINT: return PixelFormat::R32UI; case Tegra::RenderTargetFormat::RG32_UINT: diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 0d17a93ed..d88109e5a 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -59,47 +59,48 @@ enum class PixelFormat { RG32UI = 41, RGBX16F = 42, R32UI = 43, - ASTC_2D_8X8 = 44, - ASTC_2D_8X5 = 45, - ASTC_2D_5X4 = 46, - BGRA8_SRGB = 47, - DXT1_SRGB = 48, - DXT23_SRGB = 49, - DXT45_SRGB = 50, - BC7U_SRGB = 51, - R4G4B4A4U = 52, - ASTC_2D_4X4_SRGB = 53, - ASTC_2D_8X8_SRGB = 54, - ASTC_2D_8X5_SRGB = 55, - ASTC_2D_5X4_SRGB = 56, - ASTC_2D_5X5 = 57, - ASTC_2D_5X5_SRGB = 58, - ASTC_2D_10X8 = 59, - ASTC_2D_10X8_SRGB = 60, - ASTC_2D_6X6 = 61, - ASTC_2D_6X6_SRGB = 62, - ASTC_2D_10X10 = 63, - ASTC_2D_10X10_SRGB = 64, - ASTC_2D_12X12 = 65, - ASTC_2D_12X12_SRGB = 66, - ASTC_2D_8X6 = 67, - ASTC_2D_8X6_SRGB = 68, - ASTC_2D_6X5 = 69, - ASTC_2D_6X5_SRGB = 70, - E5B9G9R9F = 71, + R32I = 44, + ASTC_2D_8X8 = 45, + ASTC_2D_8X5 = 46, + ASTC_2D_5X4 = 47, + BGRA8_SRGB = 48, + DXT1_SRGB = 49, + DXT23_SRGB = 50, + DXT45_SRGB = 51, + BC7U_SRGB = 52, + R4G4B4A4U = 53, + ASTC_2D_4X4_SRGB = 54, + ASTC_2D_8X8_SRGB = 55, + ASTC_2D_8X5_SRGB = 56, + ASTC_2D_5X4_SRGB = 57, + ASTC_2D_5X5 = 58, + ASTC_2D_5X5_SRGB = 59, + ASTC_2D_10X8 = 60, + ASTC_2D_10X8_SRGB = 61, + ASTC_2D_6X6 = 62, + ASTC_2D_6X6_SRGB = 63, + ASTC_2D_10X10 = 64, + ASTC_2D_10X10_SRGB = 65, + ASTC_2D_12X12 = 66, + ASTC_2D_12X12_SRGB = 67, + ASTC_2D_8X6 = 68, + ASTC_2D_8X6_SRGB = 69, + ASTC_2D_6X5 = 70, + ASTC_2D_6X5_SRGB = 71, + E5B9G9R9F = 72, MaxColorFormat, // Depth formats - Z32F = 72, - Z16 = 73, + Z32F = 73, + Z16 = 74, MaxDepthFormat, // DepthStencil formats - Z24S8 = 74, - S8Z24 = 75, - Z32FS8 = 76, + Z24S8 = 75, + S8Z24 = 76, + Z32FS8 = 77, MaxDepthStencilFormat, @@ -171,6 +172,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ 0, // RG32UI 0, // RGBX16F 0, // R32UI + 0, // R32I 2, // ASTC_2D_8X8 2, // ASTC_2D_8X5 2, // ASTC_2D_5X4 @@ -267,6 +269,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ 1, // RG32UI 1, // RGBX16F 1, // R32UI + 1, // R32I 8, // ASTC_2D_8X8 8, // ASTC_2D_8X5 5, // ASTC_2D_5X4 @@ -355,6 +358,7 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ 1, // RG32UI 1, // RGBX16F 1, // R32UI + 1, // R32I 8, // ASTC_2D_8X8 5, // ASTC_2D_8X5 4, // ASTC_2D_5X4 @@ -443,6 +447,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ 64, // RG32UI 64, // RGBX16F 32, // R32UI + 32, // R32I 128, // ASTC_2D_8X8 128, // ASTC_2D_8X5 128, // ASTC_2D_5X4 @@ -546,6 +551,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table SurfaceCompression::None, // RG32UI SurfaceCompression::None, // RGBX16F SurfaceCompression::None, // R32UI + SurfaceCompression::None, // R32I SurfaceCompression::Converted, // ASTC_2D_8X8 SurfaceCompression::Converted, // ASTC_2D_8X5 SurfaceCompression::Converted, // ASTC_2D_5X4 diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 81fb9f633..cc3ad8417 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -41,7 +41,7 @@ struct Table { ComponentType alpha_component; bool is_srgb; }; -constexpr std::array<Table, 74> DefinitionTable = {{ +constexpr std::array<Table, 75> DefinitionTable = {{ {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, @@ -89,6 +89,7 @@ constexpr std::array<Table, 74> DefinitionTable = {{ {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32F}, {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32UI}, + {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32I}, {TextureFormat::E5B9G9R9_SHAREDEXP, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9F}, diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 84469b7ba..002df414f 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -277,6 +277,10 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, staging_buffer.data() + host_offset, level); } + } else if (params.IsBuffer()) { + // Buffers don't have pitch or any fancy layout property. We can just memcpy them to guest + // memory. + std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size); } else { ASSERT(params.target == SurfaceTarget::Texture2D); ASSERT(params.num_levels == 1); diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 38b3a4ba8..f00839313 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -84,19 +84,16 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) { switch (params.pixel_format) { case PixelFormat::R16U: - case PixelFormat::R16F: { + case PixelFormat::R16F: params.pixel_format = PixelFormat::Z16; break; - } - case PixelFormat::R32F: { + case PixelFormat::R32F: params.pixel_format = PixelFormat::Z32F; break; - } - default: { + default: UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}", static_cast<u32>(params.pixel_format)); } - } params.type = GetFormatType(params.pixel_format); } params.type = GetFormatType(params.pixel_format); @@ -168,27 +165,29 @@ SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_tabl return params; } -SurfaceParams SurfaceParams::CreateForDepthBuffer( - Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, - u32 block_width, u32 block_height, u32 block_depth, - Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) { +SurfaceParams SurfaceParams::CreateForDepthBuffer(Core::System& system) { + const auto& regs = system.GPU().Maxwell3D().regs; + regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.type; SurfaceParams params; - params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; + params.is_tiled = regs.zeta.memory_layout.type == + Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; params.srgb_conversion = false; - params.block_width = std::min(block_width, 5U); - params.block_height = std::min(block_height, 5U); - params.block_depth = std::min(block_depth, 5U); + params.block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U); + params.block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U); + params.block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U); params.tile_width_spacing = 1; - params.pixel_format = PixelFormatFromDepthFormat(format); + params.pixel_format = PixelFormatFromDepthFormat(regs.zeta.format); params.type = GetFormatType(params.pixel_format); - params.width = zeta_width; - params.height = zeta_height; - params.target = SurfaceTarget::Texture2D; - params.depth = 1; + params.width = regs.zeta_width; + params.height = regs.zeta_height; params.pitch = 0; params.num_levels = 1; params.emulated_levels = 1; - params.is_layered = false; + + const bool is_layered = regs.zeta_layers > 1 && params.block_depth == 0; + params.is_layered = is_layered; + params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; + params.depth = is_layered ? regs.zeta_layers.Value() : 1U; return params; } @@ -214,11 +213,13 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz params.width = params.pitch / bpp; } params.height = config.height; - params.depth = 1; - params.target = SurfaceTarget::Texture2D; params.num_levels = 1; params.emulated_levels = 1; - params.is_layered = false; + + const bool is_layered = config.layers > 1 && params.block_depth == 0; + params.is_layered = is_layered; + params.depth = is_layered ? config.layers.Value() : 1; + params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; return params; } diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 9256fd6d9..995cc3818 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -35,10 +35,7 @@ public: const VideoCommon::Shader::Image& entry); /// Creates SurfaceCachedParams for a depth buffer configuration. - static SurfaceParams CreateForDepthBuffer( - Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, - u32 block_width, u32 block_height, u32 block_depth, - Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type); + static SurfaceParams CreateForDepthBuffer(Core::System& system); /// Creates SurfaceCachedParams from a framebuffer configuration. static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f4c015635..c70e4aec2 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -160,10 +160,7 @@ public: SetEmptyDepthBuffer(); return {}; } - const auto depth_params{SurfaceParams::CreateForDepthBuffer( - system, regs.zeta_width, regs.zeta_height, regs.zeta.format, - regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, - regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; + const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)}; auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true); if (depth_buffer.target) depth_buffer.target->MarkAsRenderTarget(false, NO_RT); @@ -721,7 +718,6 @@ private: std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const CacheAddr cache_addr, const SurfaceParams& params, bool preserve_contents, bool is_render) { - // Step 1 // Check Level 1 Cache for a fast structural match. If candidate surface // matches at certain level we are pretty much done. @@ -733,14 +729,18 @@ private: return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, topological_result); } + const auto struct_result = current_surface->MatchesStructure(params); - if (struct_result != MatchStructureResult::None && - (params.target != SurfaceTarget::Texture3D || - current_surface->MatchTarget(params.target))) { - if (struct_result == MatchStructureResult::FullMatch) { - return ManageStructuralMatch(current_surface, params, is_render); - } else { - return RebuildSurface(current_surface, params, is_render); + if (struct_result != MatchStructureResult::None) { + const auto& old_params = current_surface->GetSurfaceParams(); + const bool not_3d = params.target != SurfaceTarget::Texture3D && + old_params.target != SurfaceTarget::Texture3D; + if (not_3d || current_surface->MatchTarget(params.target)) { + if (struct_result == MatchStructureResult::FullMatch) { + return ManageStructuralMatch(current_surface, params, is_render); + } else { + return RebuildSurface(current_surface, params, is_render); + } } } } |