diff options
Diffstat (limited to 'src/video_core')
20 files changed, 1274 insertions, 654 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 57f31cd58..0c3038c52 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -80,6 +80,7 @@ add_library(video_core STATIC shader/decode/hfma2.cpp shader/decode/conversion.cpp shader/decode/memory.cpp + shader/decode/texture.cpp shader/decode/float_set_predicate.cpp shader/decode/integer_set_predicate.cpp shader/decode/half_set_predicate.cpp @@ -112,6 +113,8 @@ add_library(video_core STATIC if (ENABLE_VULKAN) target_sources(video_core PRIVATE renderer_vulkan/declarations.h + renderer_vulkan/maxwell_to_vk.cpp + renderer_vulkan/maxwell_to_vk.h renderer_vulkan/vk_buffer_cache.cpp renderer_vulkan/vk_buffer_cache.h renderer_vulkan/vk_device.cpp diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 669541b4b..bff1a37ff 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -39,7 +39,7 @@ bool DmaPusher::Step() { } const CommandList& command_list{dma_pushbuffer.front()}; - const CommandListHeader& command_list_header{command_list[dma_pushbuffer_subindex++]}; + const CommandListHeader command_list_header{command_list[dma_pushbuffer_subindex++]}; GPUVAddr dma_get = command_list_header.addr; GPUVAddr dma_put = dma_get + command_list_header.size * sizeof(u32); bool non_main = command_list_header.is_non_main; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index c7eb15b6a..7f613370b 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -324,11 +324,11 @@ enum class TextureQueryType : u64 { enum class TextureProcessMode : u64 { None = 0, - LZ = 1, // Unknown, appears to be the same as none. + LZ = 1, // Load LOD of zero. LB = 2, // Load Bias. - LL = 3, // Load LOD (LevelOfDetail) - LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB - LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL + LL = 3, // Load LOD. + LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB. + LLA = 7 // Load LOD. The A is unknown, does not appear to differ with LL. }; enum class TextureMiscMode : u64 { @@ -1445,6 +1445,7 @@ public: Flow, Synch, Memory, + Texture, FloatSet, FloatSetPredicate, IntegerSet, @@ -1575,14 +1576,14 @@ private: INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), INST("1110111011011---", Id::STG, Type::Memory, "STG"), - INST("110000----111---", Id::TEX, Type::Memory, "TEX"), - INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"), - INST("1101-00---------", Id::TEXS, Type::Memory, "TEXS"), - INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"), - INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"), - INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"), - INST("110111110110----", Id::TMML_B, Type::Memory, "TMML_B"), - INST("1101111101011---", Id::TMML, Type::Memory, "TMML"), + INST("110000----111---", Id::TEX, Type::Texture, "TEX"), + INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"), + INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"), + INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"), + INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"), + INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"), + INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), + INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"), INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 14a421cc1..56a203275 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -123,7 +123,7 @@ class GPU { public: explicit GPU(Core::System& system, VideoCore::RendererBase& renderer); - ~GPU(); + virtual ~GPU(); struct MethodCall { u32 method{}; diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 58046f3e9..e6a807aba 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -21,7 +21,7 @@ class ThreadManager; class GPUAsynch : public Tegra::GPU { public: explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer); - ~GPUAsynch(); + ~GPUAsynch() override; void PushGPUEntries(Tegra::CommandList&& entries) override; void SwapBuffers( diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 658f683e2..7d5a241ff 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -16,7 +16,7 @@ namespace VideoCommon { class GPUSynch : public Tegra::GPU { public: explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer); - ~GPUSynch(); + ~GPUSynch() override; void PushGPUEntries(Tegra::CommandList&& entries) override; void SwapBuffers( diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 2ad8214cc..edb148b14 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -113,9 +113,6 @@ public: /// Notify rasterizer that any caches of the specified region should be flushed and invalidated void FlushAndInvalidateRegion(VAddr addr, u64 size); - /// Waits the caller until the GPU thread is idle, used for synchronization - void WaitForIdle(); - private: /// Pushes a command to be executed by the GPU thread void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu); @@ -127,10 +124,10 @@ private: private: SynchState state; - std::thread thread; - std::thread::id thread_id; VideoCore::RendererBase& renderer; Tegra::DmaPusher& dma_pusher; + std::thread thread; + std::thread::id thread_id; }; } // namespace VideoCommon::GPUThread diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 168288088..824863561 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -118,7 +118,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::Syst glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); - LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); + LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); CheckExtensions(); } @@ -177,7 +177,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { continue; const auto& buffer = regs.vertex_array[attrib.buffer]; - LOG_TRACE(HW_GPU, + LOG_TRACE(Render_OpenGL, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), attrib.offset.Value(), attrib.IsNormalized()); @@ -343,9 +343,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { shader_program_manager->UseProgrammableFragmentShader(program_handle); break; default: - LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, - shader_config.enable.Value(), shader_config.offset); - UNREACHABLE(); + UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, + shader_config.enable.Value(), shader_config.offset); } const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); @@ -793,7 +792,10 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); - ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different"); + + if (params.pixel_format != pixel_format) { + LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different"); + } screen_info.display_texture = surface->Texture().handle; @@ -802,104 +804,87 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, void RasterizerOpenGL::SamplerInfo::Create() { sampler.Create(); - mag_filter = min_filter = Tegra::Texture::TextureFilter::Linear; - wrap_u = wrap_v = wrap_p = Tegra::Texture::WrapMode::Wrap; - uses_depth_compare = false; + mag_filter = Tegra::Texture::TextureFilter::Linear; + min_filter = Tegra::Texture::TextureFilter::Linear; + wrap_u = Tegra::Texture::WrapMode::Wrap; + wrap_v = Tegra::Texture::WrapMode::Wrap; + wrap_p = Tegra::Texture::WrapMode::Wrap; + use_depth_compare = false; depth_compare_func = Tegra::Texture::DepthCompareFunc::Never; - // default is GL_LINEAR_MIPMAP_LINEAR + // OpenGL's default is GL_LINEAR_MIPMAP_LINEAR glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - // Other attributes have correct defaults glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER); + + // Other attributes have correct defaults } void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) { - const GLuint s = sampler.handle; + const GLuint sampler_id = sampler.handle; if (mag_filter != config.mag_filter) { mag_filter = config.mag_filter; glSamplerParameteri( - s, GL_TEXTURE_MAG_FILTER, + sampler_id, GL_TEXTURE_MAG_FILTER, MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None)); } - if (min_filter != config.min_filter || mip_filter != config.mip_filter) { + if (min_filter != config.min_filter || mipmap_filter != config.mipmap_filter) { min_filter = config.min_filter; - mip_filter = config.mip_filter; - glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER, - MaxwellToGL::TextureFilterMode(min_filter, mip_filter)); + mipmap_filter = config.mipmap_filter; + glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER, + MaxwellToGL::TextureFilterMode(min_filter, mipmap_filter)); } if (wrap_u != config.wrap_u) { wrap_u = config.wrap_u; - glSamplerParameteri(s, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u)); + glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u)); } if (wrap_v != config.wrap_v) { wrap_v = config.wrap_v; - glSamplerParameteri(s, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v)); + glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v)); } if (wrap_p != config.wrap_p) { wrap_p = config.wrap_p; - glSamplerParameteri(s, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p)); + glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p)); } - if (uses_depth_compare != (config.depth_compare_enabled == 1)) { - uses_depth_compare = (config.depth_compare_enabled == 1); - if (uses_depth_compare) { - glSamplerParameteri(s, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE); - } else { - glSamplerParameteri(s, GL_TEXTURE_COMPARE_MODE, GL_NONE); - } + if (const bool enabled = config.depth_compare_enabled == 1; use_depth_compare != enabled) { + use_depth_compare = enabled; + glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE, + use_depth_compare ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE); } if (depth_compare_func != config.depth_compare_func) { depth_compare_func = config.depth_compare_func; - glSamplerParameteri(s, GL_TEXTURE_COMPARE_FUNC, + glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC, MaxwellToGL::DepthCompareFunc(depth_compare_func)); } - GLvec4 new_border_color; - if (config.srgb_conversion) { - new_border_color[0] = config.srgb_border_color_r / 255.0f; - new_border_color[1] = config.srgb_border_color_g / 255.0f; - new_border_color[2] = config.srgb_border_color_g / 255.0f; - } else { - new_border_color[0] = config.border_color_r; - new_border_color[1] = config.border_color_g; - new_border_color[2] = config.border_color_b; - } - new_border_color[3] = config.border_color_a; - - if (border_color != new_border_color) { + if (const auto new_border_color = config.GetBorderColor(); border_color != new_border_color) { border_color = new_border_color; - glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, border_color.data()); + glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, border_color.data()); } - const float anisotropic_max = static_cast<float>(1 << config.max_anisotropy.Value()); - if (anisotropic_max != max_anisotropic) { - max_anisotropic = anisotropic_max; + if (const float anisotropic = config.GetMaxAnisotropy(); max_anisotropic != anisotropic) { + max_anisotropic = anisotropic; if (GLAD_GL_ARB_texture_filter_anisotropic) { - glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic); + glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic); } else if (GLAD_GL_EXT_texture_filter_anisotropic) { - glSamplerParameterf(s, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic); + glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic); } } - const float lod_min = static_cast<float>(config.min_lod_clamp.Value()) / 256.0f; - if (lod_min != min_lod) { - min_lod = lod_min; - glSamplerParameterf(s, GL_TEXTURE_MIN_LOD, min_lod); - } - const float lod_max = static_cast<float>(config.max_lod_clamp.Value()) / 256.0f; - if (lod_max != max_lod) { - max_lod = lod_max; - glSamplerParameterf(s, GL_TEXTURE_MAX_LOD, max_lod); + if (const float min = config.GetMinLod(); min_lod != min) { + min_lod = min; + glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, min_lod); } - const u32 bias = config.mip_lod_bias.Value(); - // Sign extend the 13-bit value. - constexpr u32 mask = 1U << (13 - 1); - const float bias_lod = static_cast<s32>((bias ^ mask) - mask) / 256.f; - if (lod_bias != bias_lod) { - lod_bias = bias_lod; - glSamplerParameterf(s, GL_TEXTURE_LOD_BIAS, lod_bias); + if (const float max = config.GetMaxLod(); max_lod != max) { + max_lod = max; + glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, max_lod); + } + + if (const float bias = config.GetLodBias(); lod_bias != bias) { + lod_bias = bias; + glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, lod_bias); } } @@ -939,8 +924,8 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader size = buffer.size; if (size > MaxConstbufferSize) { - LOG_CRITICAL(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size, - MaxConstbufferSize); + LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size, + MaxConstbufferSize); size = MaxConstbufferSize; } } else { @@ -1000,10 +985,9 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); - Surface surface = res_cache.GetTextureSurface(texture, entry); - if (surface != nullptr) { + if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) { state.texture_units[current_bindpoint].texture = - entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle; + surface->Texture(entry.IsArray()).handle; surface->UpdateSwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, texture.tic.w_source); } else { @@ -1235,11 +1219,7 @@ void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) { void RasterizerOpenGL::SyncTransformFeedback() { const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; - - if (regs.tfb_enabled != 0) { - LOG_CRITICAL(Render_OpenGL, "Transform feedbacks are not implemented"); - UNREACHABLE(); - } + UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented"); } void RasterizerOpenGL::SyncPointState() { @@ -1259,12 +1239,8 @@ void RasterizerOpenGL::SyncPolygonOffset() { void RasterizerOpenGL::CheckAlphaTests() { const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; - - if (regs.alpha_test_enabled != 0 && regs.rt_control.count > 1) { - LOG_CRITICAL(Render_OpenGL, "Alpha Testing is enabled with Multiple Render Targets, " - "this behavior is undefined."); - UNREACHABLE(); - } + UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1, + "Alpha Testing is enabled with more than one rendertarget"); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 2f0524f85..7e63f8008 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -94,11 +94,12 @@ private: private: Tegra::Texture::TextureFilter mag_filter = Tegra::Texture::TextureFilter::Nearest; Tegra::Texture::TextureFilter min_filter = Tegra::Texture::TextureFilter::Nearest; - Tegra::Texture::TextureMipmapFilter mip_filter = Tegra::Texture::TextureMipmapFilter::None; + Tegra::Texture::TextureMipmapFilter mipmap_filter = + Tegra::Texture::TextureMipmapFilter::None; Tegra::Texture::WrapMode wrap_u = Tegra::Texture::WrapMode::ClampToEdge; Tegra::Texture::WrapMode wrap_v = Tegra::Texture::WrapMode::ClampToEdge; Tegra::Texture::WrapMode wrap_p = Tegra::Texture::WrapMode::ClampToEdge; - bool uses_depth_compare = false; + bool use_depth_compare = false; Tegra::Texture::DepthCompareFunc depth_compare_func = Tegra::Texture::DepthCompareFunc::Always; GLvec4 border_color = {}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 876698b37..e9eb6e921 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -400,6 +400,27 @@ static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType return format; } +/// Returns the discrepant array target +constexpr GLenum GetArrayDiscrepantTarget(SurfaceTarget target) { + switch (target) { + case SurfaceTarget::Texture1D: + return GL_TEXTURE_1D_ARRAY; + case SurfaceTarget::Texture2D: + return GL_TEXTURE_2D_ARRAY; + case SurfaceTarget::Texture3D: + return GL_NONE; + case SurfaceTarget::Texture1DArray: + return GL_TEXTURE_1D; + case SurfaceTarget::Texture2DArray: + return GL_TEXTURE_2D; + case SurfaceTarget::TextureCubemap: + return GL_TEXTURE_CUBE_MAP_ARRAY; + case SurfaceTarget::TextureCubeArray: + return GL_TEXTURE_CUBE_MAP; + } + return GL_NONE; +} + Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { u32 actual_height{std::max(1U, unaligned_height >> mip_level)}; if (IsPixelFormatASTC(pixel_format)) { @@ -795,20 +816,22 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); } -void CachedSurface::EnsureTextureView() { - if (texture_view.handle != 0) +void CachedSurface::EnsureTextureDiscrepantView() { + if (discrepant_view.handle != 0) return; - const GLenum target{TargetLayer()}; + const GLenum target{GetArrayDiscrepantTarget(params.target)}; + ASSERT(target != GL_NONE); + const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u}; constexpr GLuint min_layer = 0; constexpr GLuint min_level = 0; - glGenTextures(1, &texture_view.handle); - glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, min_level, + glGenTextures(1, &discrepant_view.handle); + glTextureView(discrepant_view.handle, target, texture.handle, gl_internal_format, min_level, params.max_mip_level, min_layer, num_layers); - ApplyTextureDefaults(texture_view.handle, params.max_mip_level); - glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, + ApplyTextureDefaults(discrepant_view.handle, params.max_mip_level); + glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, reinterpret_cast<const GLint*>(swizzle.data())); } @@ -834,8 +857,8 @@ void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x, swizzle = {new_x, new_y, new_z, new_w}; const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data()); glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); - if (texture_view.handle != 0) { - glTextureParameteriv(texture_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); + if (discrepant_view.handle != 0) { + glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 797bbdc9c..9cf6f50be 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -367,31 +367,19 @@ public: return texture; } - const OGLTexture& TextureLayer() { - if (params.is_array) { - return Texture(); + const OGLTexture& Texture(bool as_array) { + if (params.is_array == as_array) { + return texture; + } else { + EnsureTextureDiscrepantView(); + return discrepant_view; } - EnsureTextureView(); - return texture_view; } GLenum Target() const { return gl_target; } - GLenum TargetLayer() const { - using VideoCore::Surface::SurfaceTarget; - switch (params.target) { - case SurfaceTarget::Texture1D: - return GL_TEXTURE_1D_ARRAY; - case SurfaceTarget::Texture2D: - return GL_TEXTURE_2D_ARRAY; - case SurfaceTarget::TextureCubemap: - return GL_TEXTURE_CUBE_MAP_ARRAY; - } - return Target(); - } - const SurfaceParams& GetSurfaceParams() const { return params; } @@ -431,10 +419,10 @@ public: private: void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle); - void EnsureTextureView(); + void EnsureTextureDiscrepantView(); OGLTexture texture; - OGLTexture texture_view; + OGLTexture discrepant_view; std::vector<std::vector<u8>> gl_buffer; SurfaceParams params{}; GLenum gl_target{}; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 72ff6ac6a..11d1169f0 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -5,7 +5,9 @@ #include <array> #include <string> #include <string_view> +#include <utility> #include <variant> +#include <vector> #include <fmt/format.h> @@ -717,7 +719,7 @@ private: } std::string GenerateTexture(Operation operation, const std::string& func, - bool is_extra_int = false) { + const std::vector<std::pair<Type, Node>>& extras) { constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); @@ -738,36 +740,47 @@ private: expr += Visit(operation[i]); const std::size_t next = i + 1; - if (next < count || has_array || has_shadow) + if (next < count) expr += ", "; } if (has_array) { - expr += "float(ftoi(" + Visit(meta->array) + "))"; + expr += ", float(ftoi(" + Visit(meta->array) + "))"; } if (has_shadow) { - if (has_array) - expr += ", "; - expr += Visit(meta->depth_compare); + expr += ", " + Visit(meta->depth_compare); } expr += ')'; - for (const Node extra : meta->extras) { + for (const auto& extra_pair : extras) { + const auto [type, operand] = extra_pair; + if (operand == nullptr) { + continue; + } expr += ", "; - if (is_extra_int) { - if (const auto immediate = std::get_if<ImmediateNode>(extra)) { + + switch (type) { + case Type::Int: + if (const auto immediate = std::get_if<ImmediateNode>(operand)) { // Inline the string as an immediate integer in GLSL (some extra arguments are // required to be constant) expr += std::to_string(static_cast<s32>(immediate->GetValue())); } else { - expr += "ftoi(" + Visit(extra) + ')'; + expr += "ftoi(" + Visit(operand) + ')'; } - } else { - expr += Visit(extra); + break; + case Type::Float: + expr += Visit(operand); + break; + default: { + const auto type_int = static_cast<u32>(type); + UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); + expr += '0'; + break; + } } } - expr += ')'; - return expr; + return expr + ')'; } std::string Assign(Operation operation) { @@ -1146,7 +1159,7 @@ private: const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); - std::string expr = GenerateTexture(operation, "texture"); + std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}}); if (meta->sampler.IsShadow()) { expr = "vec4(" + expr + ')'; } @@ -1157,7 +1170,7 @@ private: const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); - std::string expr = GenerateTexture(operation, "textureLod"); + std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}}); if (meta->sampler.IsShadow()) { expr = "vec4(" + expr + ')'; } @@ -1168,7 +1181,8 @@ private: const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); ASSERT(meta); - return GenerateTexture(operation, "textureGather", !meta->sampler.IsShadow()) + + const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int; + return GenerateTexture(operation, "textureGather", {{type, meta->component}}) + GetSwizzle(meta->element); } @@ -1197,8 +1211,8 @@ private: ASSERT(meta); if (meta->element < 2) { - return "itof(int((" + GenerateTexture(operation, "textureQueryLod") + " * vec2(256))" + - GetSwizzle(meta->element) + "))"; + return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) + + " * vec2(256))" + GetSwizzle(meta->element) + "))"; } return "0"; } @@ -1224,9 +1238,9 @@ private: else if (next < count) expr += ", "; } - for (std::size_t i = 0; i < meta->extras.size(); ++i) { + if (meta->lod) { expr += ", "; - expr += CastOperand(Visit(meta->extras.at(i)), Type::Int); + expr += CastOperand(Visit(meta->lod), Type::Int); } expr += ')'; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp new file mode 100644 index 000000000..34bf26ff2 --- /dev/null +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -0,0 +1,483 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/common_types.h" +#include "common/logging/log.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/surface.h" + +namespace Vulkan::MaxwellToVK { + +namespace Sampler { + +vk::Filter Filter(Tegra::Texture::TextureFilter filter) { + switch (filter) { + case Tegra::Texture::TextureFilter::Linear: + return vk::Filter::eLinear; + case Tegra::Texture::TextureFilter::Nearest: + return vk::Filter::eNearest; + } + UNIMPLEMENTED_MSG("Unimplemented sampler filter={}", static_cast<u32>(filter)); + return {}; +} + +vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter) { + switch (mipmap_filter) { + case Tegra::Texture::TextureMipmapFilter::None: + // TODO(Rodrigo): None seems to be mapped to OpenGL's mag and min filters without mipmapping + // (e.g. GL_NEAREST and GL_LINEAR). Vulkan doesn't have such a thing, find out if we have to + // use an image view with a single mipmap level to emulate this. + return vk::SamplerMipmapMode::eLinear; + case Tegra::Texture::TextureMipmapFilter::Linear: + return vk::SamplerMipmapMode::eLinear; + case Tegra::Texture::TextureMipmapFilter::Nearest: + return vk::SamplerMipmapMode::eNearest; + } + UNIMPLEMENTED_MSG("Unimplemented sampler mipmap mode={}", static_cast<u32>(mipmap_filter)); + return {}; +} + +vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode) { + switch (wrap_mode) { + case Tegra::Texture::WrapMode::Wrap: + return vk::SamplerAddressMode::eRepeat; + case Tegra::Texture::WrapMode::Mirror: + return vk::SamplerAddressMode::eMirroredRepeat; + case Tegra::Texture::WrapMode::ClampToEdge: + return vk::SamplerAddressMode::eClampToEdge; + case Tegra::Texture::WrapMode::Border: + return vk::SamplerAddressMode::eClampToBorder; + case Tegra::Texture::WrapMode::ClampOGL: + // TODO(Rodrigo): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use + // eClampToBorder to get the border color of the texture, and then sample the edge to + // manually mix them. However the shader part of this is not yet implemented. + return vk::SamplerAddressMode::eClampToBorder; + case Tegra::Texture::WrapMode::MirrorOnceClampToEdge: + return vk::SamplerAddressMode::eMirrorClampToEdge; + case Tegra::Texture::WrapMode::MirrorOnceBorder: + UNIMPLEMENTED(); + return vk::SamplerAddressMode::eMirrorClampToEdge; + } + UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode)); + return {}; +} + +vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) { + switch (depth_compare_func) { + case Tegra::Texture::DepthCompareFunc::Never: + return vk::CompareOp::eNever; + case Tegra::Texture::DepthCompareFunc::Less: + return vk::CompareOp::eLess; + case Tegra::Texture::DepthCompareFunc::LessEqual: + return vk::CompareOp::eLessOrEqual; + case Tegra::Texture::DepthCompareFunc::Equal: + return vk::CompareOp::eEqual; + case Tegra::Texture::DepthCompareFunc::NotEqual: + return vk::CompareOp::eNotEqual; + case Tegra::Texture::DepthCompareFunc::Greater: + return vk::CompareOp::eGreater; + case Tegra::Texture::DepthCompareFunc::GreaterEqual: + return vk::CompareOp::eGreaterOrEqual; + case Tegra::Texture::DepthCompareFunc::Always: + return vk::CompareOp::eAlways; + } + UNIMPLEMENTED_MSG("Unimplemented sampler depth compare function={}", + static_cast<u32>(depth_compare_func)); + return {}; +} + +} // namespace Sampler + +struct FormatTuple { + vk::Format format; ///< Vulkan format + ComponentType component_type; ///< Abstracted component type + bool attachable; ///< True when this format can be used as an attachment +}; + +static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ + {vk::Format::eA8B8G8R8UnormPack32, ComponentType::UNorm, true}, // ABGR8U + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8S + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ABGR8UI + {vk::Format::eB5G6R5UnormPack16, ComponentType::UNorm, false}, // B5G6R5U + {vk::Format::eA2B10G10R10UnormPack32, ComponentType::UNorm, true}, // A2B10G10R10U + {vk::Format::eUndefined, ComponentType::Invalid, false}, // A1B5G5R5U + {vk::Format::eR8Unorm, ComponentType::UNorm, true}, // R8U + {vk::Format::eUndefined, ComponentType::Invalid, false}, // R8UI + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16F + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16U + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA16UI + {vk::Format::eUndefined, ComponentType::Invalid, false}, // R11FG11FB10F + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32UI + {vk::Format::eBc1RgbaUnormBlock, ComponentType::UNorm, false}, // DXT1 + {vk::Format::eBc2UnormBlock, ComponentType::UNorm, false}, // DXT23 + {vk::Format::eBc3UnormBlock, ComponentType::UNorm, false}, // DXT45 + {vk::Format::eBc4UnormBlock, ComponentType::UNorm, false}, // DXN1 + {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2UNORM + {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXN2SNORM + {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U + {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_UF16 + {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC6H_SF16 + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4 + {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8 + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGBA32F + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32F + {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32F + {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16F + {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16U + {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16S + {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16UI + {vk::Format::eUndefined, ComponentType::Invalid, false}, // R16I + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16 + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16F + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16UI + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16I + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG16S + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RGB32F + {vk::Format::eA8B8G8R8SrgbPack32, ComponentType::UNorm, true}, // RGBA8_SRGB + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8U + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG8S + {vk::Format::eUndefined, ComponentType::Invalid, false}, // RG32UI + {vk::Format::eUndefined, ComponentType::Invalid, false}, // R32UI + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8 + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5 + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4 + + // Compressed sRGB formats + {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8_SRGB + {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT1_SRGB + {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT23_SRGB + {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT45_SRGB + {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U_SRGB + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4_SRGB + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8_SRGB + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5_SRGB + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4_SRGB + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5 + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5_SRGB + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8 + {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8_SRGB + + // Depth formats + {vk::Format::eD32Sfloat, ComponentType::Float, true}, // Z32F + {vk::Format::eD16Unorm, ComponentType::UNorm, true}, // Z16 + + // DepthStencil formats + {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // Z24S8 + {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // S8Z24 (emulated) + {vk::Format::eUndefined, ComponentType::Invalid, false}, // Z32FS8 +}}; + +static constexpr bool IsZetaFormat(PixelFormat pixel_format) { + return pixel_format >= PixelFormat::MaxColorFormat && + pixel_format < PixelFormat::MaxDepthStencilFormat; +} + +std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type, + PixelFormat pixel_format, ComponentType component_type) { + ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); + + const auto tuple = tex_format_tuples[static_cast<u32>(pixel_format)]; + UNIMPLEMENTED_IF_MSG(tuple.format == vk::Format::eUndefined, + "Unimplemented texture format with pixel format={} and component type={}", + static_cast<u32>(pixel_format), static_cast<u32>(component_type)); + ASSERT_MSG(component_type == tuple.component_type, "Component type mismatch"); + + auto usage = vk::FormatFeatureFlagBits::eSampledImage | + vk::FormatFeatureFlagBits::eTransferDst | vk::FormatFeatureFlagBits::eTransferSrc; + if (tuple.attachable) { + usage |= IsZetaFormat(pixel_format) ? vk::FormatFeatureFlagBits::eDepthStencilAttachment + : vk::FormatFeatureFlagBits::eColorAttachment; + } + return {device.GetSupportedFormat(tuple.format, usage, format_type), tuple.attachable}; +} + +vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage) { + switch (stage) { + case Maxwell::ShaderStage::Vertex: + return vk::ShaderStageFlagBits::eVertex; + case Maxwell::ShaderStage::TesselationControl: + return vk::ShaderStageFlagBits::eTessellationControl; + case Maxwell::ShaderStage::TesselationEval: + return vk::ShaderStageFlagBits::eTessellationEvaluation; + case Maxwell::ShaderStage::Geometry: + return vk::ShaderStageFlagBits::eGeometry; + case Maxwell::ShaderStage::Fragment: + return vk::ShaderStageFlagBits::eFragment; + } + UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage)); + return {}; +} + +vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology) { + switch (topology) { + case Maxwell::PrimitiveTopology::Points: + return vk::PrimitiveTopology::ePointList; + case Maxwell::PrimitiveTopology::Lines: + return vk::PrimitiveTopology::eLineList; + case Maxwell::PrimitiveTopology::LineStrip: + return vk::PrimitiveTopology::eLineStrip; + case Maxwell::PrimitiveTopology::Triangles: + return vk::PrimitiveTopology::eTriangleList; + case Maxwell::PrimitiveTopology::TriangleStrip: + return vk::PrimitiveTopology::eTriangleStrip; + } + UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology)); + return {}; +} + +vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { + switch (type) { + case Maxwell::VertexAttribute::Type::SignedNorm: + break; + case Maxwell::VertexAttribute::Type::UnsignedNorm: + switch (size) { + case Maxwell::VertexAttribute::Size::Size_8_8_8_8: + return vk::Format::eR8G8B8A8Unorm; + default: + break; + } + break; + case Maxwell::VertexAttribute::Type::SignedInt: + break; + case Maxwell::VertexAttribute::Type::UnsignedInt: + switch (size) { + case Maxwell::VertexAttribute::Size::Size_32: + return vk::Format::eR32Uint; + default: + break; + } + case Maxwell::VertexAttribute::Type::UnsignedScaled: + case Maxwell::VertexAttribute::Type::SignedScaled: + break; + case Maxwell::VertexAttribute::Type::Float: + switch (size) { + case Maxwell::VertexAttribute::Size::Size_32_32_32_32: + return vk::Format::eR32G32B32A32Sfloat; + case Maxwell::VertexAttribute::Size::Size_32_32_32: + return vk::Format::eR32G32B32Sfloat; + case Maxwell::VertexAttribute::Size::Size_32_32: + return vk::Format::eR32G32Sfloat; + case Maxwell::VertexAttribute::Size::Size_32: + return vk::Format::eR32Sfloat; + default: + break; + } + break; + } + UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", static_cast<u32>(type), + static_cast<u32>(size)); + return {}; +} + +vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison) { + switch (comparison) { + case Maxwell::ComparisonOp::Never: + case Maxwell::ComparisonOp::NeverOld: + return vk::CompareOp::eNever; + case Maxwell::ComparisonOp::Less: + case Maxwell::ComparisonOp::LessOld: + return vk::CompareOp::eLess; + case Maxwell::ComparisonOp::Equal: + case Maxwell::ComparisonOp::EqualOld: + return vk::CompareOp::eEqual; + case Maxwell::ComparisonOp::LessEqual: + case Maxwell::ComparisonOp::LessEqualOld: + return vk::CompareOp::eLessOrEqual; + case Maxwell::ComparisonOp::Greater: + case Maxwell::ComparisonOp::GreaterOld: + return vk::CompareOp::eGreater; + case Maxwell::ComparisonOp::NotEqual: + case Maxwell::ComparisonOp::NotEqualOld: + return vk::CompareOp::eNotEqual; + case Maxwell::ComparisonOp::GreaterEqual: + case Maxwell::ComparisonOp::GreaterEqualOld: + return vk::CompareOp::eGreaterOrEqual; + case Maxwell::ComparisonOp::Always: + case Maxwell::ComparisonOp::AlwaysOld: + return vk::CompareOp::eAlways; + } + UNIMPLEMENTED_MSG("Unimplemented comparison op={}", static_cast<u32>(comparison)); + return {}; +} + +vk::IndexType IndexFormat(Maxwell::IndexFormat index_format) { + switch (index_format) { + case Maxwell::IndexFormat::UnsignedByte: + UNIMPLEMENTED_MSG("Vulkan does not support native u8 index format"); + return vk::IndexType::eUint16; + case Maxwell::IndexFormat::UnsignedShort: + return vk::IndexType::eUint16; + case Maxwell::IndexFormat::UnsignedInt: + return vk::IndexType::eUint32; + } + UNIMPLEMENTED_MSG("Unimplemented index_format={}", static_cast<u32>(index_format)); + return {}; +} + +vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op) { + switch (stencil_op) { + case Maxwell::StencilOp::Keep: + case Maxwell::StencilOp::KeepOGL: + return vk::StencilOp::eKeep; + case Maxwell::StencilOp::Zero: + case Maxwell::StencilOp::ZeroOGL: + return vk::StencilOp::eZero; + case Maxwell::StencilOp::Replace: + case Maxwell::StencilOp::ReplaceOGL: + return vk::StencilOp::eReplace; + case Maxwell::StencilOp::Incr: + case Maxwell::StencilOp::IncrOGL: + return vk::StencilOp::eIncrementAndClamp; + case Maxwell::StencilOp::Decr: + case Maxwell::StencilOp::DecrOGL: + return vk::StencilOp::eDecrementAndClamp; + case Maxwell::StencilOp::Invert: + case Maxwell::StencilOp::InvertOGL: + return vk::StencilOp::eInvert; + case Maxwell::StencilOp::IncrWrap: + case Maxwell::StencilOp::IncrWrapOGL: + return vk::StencilOp::eIncrementAndWrap; + case Maxwell::StencilOp::DecrWrap: + case Maxwell::StencilOp::DecrWrapOGL: + return vk::StencilOp::eDecrementAndWrap; + } + UNIMPLEMENTED_MSG("Unimplemented stencil op={}", static_cast<u32>(stencil_op)); + return {}; +} + +vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation) { + switch (equation) { + case Maxwell::Blend::Equation::Add: + case Maxwell::Blend::Equation::AddGL: + return vk::BlendOp::eAdd; + case Maxwell::Blend::Equation::Subtract: + case Maxwell::Blend::Equation::SubtractGL: + return vk::BlendOp::eSubtract; + case Maxwell::Blend::Equation::ReverseSubtract: + case Maxwell::Blend::Equation::ReverseSubtractGL: + return vk::BlendOp::eReverseSubtract; + case Maxwell::Blend::Equation::Min: + case Maxwell::Blend::Equation::MinGL: + return vk::BlendOp::eMin; + case Maxwell::Blend::Equation::Max: + case Maxwell::Blend::Equation::MaxGL: + return vk::BlendOp::eMax; + } + UNIMPLEMENTED_MSG("Unimplemented blend equation={}", static_cast<u32>(equation)); + return {}; +} + +vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) { + switch (factor) { + case Maxwell::Blend::Factor::Zero: + case Maxwell::Blend::Factor::ZeroGL: + return vk::BlendFactor::eZero; + case Maxwell::Blend::Factor::One: + case Maxwell::Blend::Factor::OneGL: + return vk::BlendFactor::eOne; + case Maxwell::Blend::Factor::SourceColor: + case Maxwell::Blend::Factor::SourceColorGL: + return vk::BlendFactor::eSrcColor; + case Maxwell::Blend::Factor::OneMinusSourceColor: + case Maxwell::Blend::Factor::OneMinusSourceColorGL: + return vk::BlendFactor::eOneMinusSrcColor; + case Maxwell::Blend::Factor::SourceAlpha: + case Maxwell::Blend::Factor::SourceAlphaGL: + return vk::BlendFactor::eSrcAlpha; + case Maxwell::Blend::Factor::OneMinusSourceAlpha: + case Maxwell::Blend::Factor::OneMinusSourceAlphaGL: + return vk::BlendFactor::eOneMinusSrcAlpha; + case Maxwell::Blend::Factor::DestAlpha: + case Maxwell::Blend::Factor::DestAlphaGL: + return vk::BlendFactor::eDstAlpha; + case Maxwell::Blend::Factor::OneMinusDestAlpha: + case Maxwell::Blend::Factor::OneMinusDestAlphaGL: + return vk::BlendFactor::eOneMinusDstAlpha; + case Maxwell::Blend::Factor::DestColor: + case Maxwell::Blend::Factor::DestColorGL: + return vk::BlendFactor::eDstColor; + case Maxwell::Blend::Factor::OneMinusDestColor: + case Maxwell::Blend::Factor::OneMinusDestColorGL: + return vk::BlendFactor::eOneMinusDstColor; + case Maxwell::Blend::Factor::SourceAlphaSaturate: + case Maxwell::Blend::Factor::SourceAlphaSaturateGL: + return vk::BlendFactor::eSrcAlphaSaturate; + case Maxwell::Blend::Factor::Source1Color: + case Maxwell::Blend::Factor::Source1ColorGL: + return vk::BlendFactor::eSrc1Color; + case Maxwell::Blend::Factor::OneMinusSource1Color: + case Maxwell::Blend::Factor::OneMinusSource1ColorGL: + return vk::BlendFactor::eOneMinusSrc1Color; + case Maxwell::Blend::Factor::Source1Alpha: + case Maxwell::Blend::Factor::Source1AlphaGL: + return vk::BlendFactor::eSrc1Alpha; + case Maxwell::Blend::Factor::OneMinusSource1Alpha: + case Maxwell::Blend::Factor::OneMinusSource1AlphaGL: + return vk::BlendFactor::eOneMinusSrc1Alpha; + case Maxwell::Blend::Factor::ConstantColor: + case Maxwell::Blend::Factor::ConstantColorGL: + return vk::BlendFactor::eConstantColor; + case Maxwell::Blend::Factor::OneMinusConstantColor: + case Maxwell::Blend::Factor::OneMinusConstantColorGL: + return vk::BlendFactor::eOneMinusConstantColor; + case Maxwell::Blend::Factor::ConstantAlpha: + case Maxwell::Blend::Factor::ConstantAlphaGL: + return vk::BlendFactor::eConstantAlpha; + case Maxwell::Blend::Factor::OneMinusConstantAlpha: + case Maxwell::Blend::Factor::OneMinusConstantAlphaGL: + return vk::BlendFactor::eOneMinusConstantAlpha; + } + UNIMPLEMENTED_MSG("Unimplemented blend factor={}", static_cast<u32>(factor)); + return {}; +} + +vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face) { + switch (front_face) { + case Maxwell::Cull::FrontFace::ClockWise: + return vk::FrontFace::eClockwise; + case Maxwell::Cull::FrontFace::CounterClockWise: + return vk::FrontFace::eCounterClockwise; + } + UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face)); + return {}; +} + +vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face) { + switch (cull_face) { + case Maxwell::Cull::CullFace::Front: + return vk::CullModeFlagBits::eFront; + case Maxwell::Cull::CullFace::Back: + return vk::CullModeFlagBits::eBack; + case Maxwell::Cull::CullFace::FrontAndBack: + return vk::CullModeFlagBits::eFrontAndBack; + } + UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face)); + return {}; +} + +vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) { + switch (swizzle) { + case Tegra::Texture::SwizzleSource::Zero: + return vk::ComponentSwizzle::eZero; + case Tegra::Texture::SwizzleSource::R: + return vk::ComponentSwizzle::eR; + case Tegra::Texture::SwizzleSource::G: + return vk::ComponentSwizzle::eG; + case Tegra::Texture::SwizzleSource::B: + return vk::ComponentSwizzle::eB; + case Tegra::Texture::SwizzleSource::A: + return vk::ComponentSwizzle::eA; + case Tegra::Texture::SwizzleSource::OneInt: + case Tegra::Texture::SwizzleSource::OneFloat: + return vk::ComponentSwizzle::eOne; + } + UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", static_cast<u32>(swizzle)); + return {}; +} + +} // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h new file mode 100644 index 000000000..4cadc0721 --- /dev/null +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -0,0 +1,58 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <utility> +#include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/surface.h" +#include "video_core/textures/texture.h" + +namespace Vulkan::MaxwellToVK { + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; +using PixelFormat = VideoCore::Surface::PixelFormat; +using ComponentType = VideoCore::Surface::ComponentType; + +namespace Sampler { + +vk::Filter Filter(Tegra::Texture::TextureFilter filter); + +vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter); + +vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode); + +vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func); + +} // namespace Sampler + +std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type, + PixelFormat pixel_format, ComponentType component_type); + +vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage); + +vk::PrimitiveTopology PrimitiveTopology(Maxwell::PrimitiveTopology topology); + +vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size); + +vk::CompareOp ComparisonOp(Maxwell::ComparisonOp comparison); + +vk::IndexType IndexFormat(Maxwell::IndexFormat index_format); + +vk::StencilOp StencilOp(Maxwell::StencilOp stencil_op); + +vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation); + +vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor); + +vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face); + +vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face); + +vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); + +} // namespace Vulkan::MaxwellToVK diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 78a4e5f0e..00242ecbe 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -122,8 +122,7 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag FormatType format_type) const { const auto it = format_properties.find(wanted_format); if (it == format_properties.end()) { - LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", - static_cast<u32>(wanted_format)); + LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", vk::to_string(wanted_format)); UNREACHABLE(); return true; } @@ -219,11 +218,19 @@ std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( format_properties.emplace(format, physical.getFormatProperties(format, dldi)); }; AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32); - AddFormatQuery(vk::Format::eR5G6B5UnormPack16); + AddFormatQuery(vk::Format::eB5G6R5UnormPack16); + AddFormatQuery(vk::Format::eA2B10G10R10UnormPack32); + AddFormatQuery(vk::Format::eR8G8B8A8Srgb); + AddFormatQuery(vk::Format::eR8Unorm); AddFormatQuery(vk::Format::eD32Sfloat); + AddFormatQuery(vk::Format::eD16Unorm); AddFormatQuery(vk::Format::eD16UnormS8Uint); AddFormatQuery(vk::Format::eD24UnormS8Uint); AddFormatQuery(vk::Format::eD32SfloatS8Uint); + AddFormatQuery(vk::Format::eBc1RgbaUnormBlock); + AddFormatQuery(vk::Format::eBc2UnormBlock); + AddFormatQuery(vk::Format::eBc3UnormBlock); + AddFormatQuery(vk::Format::eBc4UnormBlock); return format_properties; } diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 740ac3118..e4c438792 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -165,6 +165,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, + {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 38f01ca50..ea3c71eed 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -17,24 +17,6 @@ using Tegra::Shader::Attribute; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; using Tegra::Shader::Register; -using Tegra::Shader::TextureMiscMode; -using Tegra::Shader::TextureProcessMode; -using Tegra::Shader::TextureType; - -static std::size_t GetCoordCount(TextureType texture_type) { - switch (texture_type) { - case TextureType::Texture1D: - return 1; - case TextureType::Texture2D: - return 2; - case TextureType::Texture3D: - case TextureType::TextureCube: - return 3; - default: - UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type)); - return 0; - } -} u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; @@ -247,194 +229,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } break; } - case OpCode::Id::TEX: { - UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - - if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); - } - - const TextureType texture_type{instr.tex.texture_type}; - const bool is_array = instr.tex.array != 0; - const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); - const auto process_mode = instr.tex.GetTextureProcessMode(); - WriteTexInstructionFloat( - bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); - break; - } - case OpCode::Id::TEXS: { - const TextureType texture_type{instr.texs.GetTextureType()}; - const bool is_array{instr.texs.IsArrayTexture()}; - const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); - const auto process_mode = instr.texs.GetTextureProcessMode(); - - if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete"); - } - - const Node4 components = - GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); - - if (instr.texs.fp32_flag) { - WriteTexsInstructionFloat(bb, instr, components); - } else { - WriteTexsInstructionHalfFloat(bb, instr, components); - } - break; - } - case OpCode::Id::TLD4: { - ASSERT(instr.tld4.array == 0); - UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), - "NDV is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), - "PTP is not implemented"); - - if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete"); - } - - const auto texture_type = instr.tld4.texture_type.Value(); - const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); - const bool is_array = instr.tld4.array != 0; - WriteTexInstructionFloat(bb, instr, - GetTld4Code(instr, texture_type, depth_compare, is_array)); - break; - } - case OpCode::Id::TLD4S: { - UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete"); - } - - const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); - const Node op_a = GetRegister(instr.gpr8); - const Node op_b = GetRegister(instr.gpr20); - - // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. - std::vector<Node> coords; - if (depth_compare) { - // Note: TLD4S coordinate encoding works just like TEXS's - const Node op_y = GetRegister(instr.gpr8.Value() + 1); - coords.push_back(op_a); - coords.push_back(op_y); - coords.push_back(op_b); - } else { - coords.push_back(op_a); - coords.push_back(op_b); - } - std::vector<Node> extras; - extras.push_back(Immediate(static_cast<u32>(instr.tld4s.component))); - - const auto& sampler = - GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - auto coords_copy = coords; - MetaTexture meta{sampler, {}, {}, extras, element}; - values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); - } - - WriteTexsInstructionFloat(bb, instr, values); - break; - } - case OpCode::Id::TXQ: { - if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); - } - - // TODO: The new commits on the texture refactor, change the way samplers work. - // Sadly, not all texture instructions specify the type of texture their sampler - // uses. This must be fixed at a later instance. - const auto& sampler = - GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); - - u32 indexer = 0; - switch (instr.txq.query_type) { - case Tegra::Shader::TextureQueryType::Dimension: { - for (u32 element = 0; element < 4; ++element) { - if (!instr.txq.IsComponentEnabled(element)) { - continue; - } - MetaTexture meta{sampler, {}, {}, {}, element}; - const Node value = - Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); - SetTemporal(bb, indexer++, value); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled texture query type: {}", - static_cast<u32>(instr.txq.query_type.Value())); - } - break; - } - case OpCode::Id::TMML: { - UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), - "NDV is not implemented"); - - if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); - } - - auto texture_type = instr.tmml.texture_type.Value(); - const bool is_array = instr.tmml.array != 0; - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); - - std::vector<Node> coords; - - // TODO: Add coordinates for different samplers once other texture types are implemented. - switch (texture_type) { - case TextureType::Texture1D: - coords.push_back(GetRegister(instr.gpr8)); - break; - case TextureType::Texture2D: - coords.push_back(GetRegister(instr.gpr8.Value() + 0)); - coords.push_back(GetRegister(instr.gpr8.Value() + 1)); - break; - default: - UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type)); - - // Fallback to interpreting as a 2D texture for now - coords.push_back(GetRegister(instr.gpr8.Value() + 0)); - coords.push_back(GetRegister(instr.gpr8.Value() + 1)); - texture_type = TextureType::Texture2D; - } - - for (u32 element = 0; element < 2; ++element) { - auto params = coords; - MetaTexture meta{sampler, {}, {}, {}, element}; - const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); - SetTemporal(bb, element, value); - } - for (u32 element = 0; element < 2; ++element) { - SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element)); - } - - break; - } - case OpCode::Id::TLDS: { - const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; - const bool is_array{instr.tlds.IsArrayTexture()}; - - UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), - "AOFFI is not implemented"); - UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); - - if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) { - LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); - } - - WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); - break; - } default: UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); } @@ -442,291 +236,4 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { return pc; } -const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, - bool is_array, bool is_shadow) { - const auto offset = static_cast<std::size_t>(sampler.index.Value()); - - // If this sampler has already been used, return the existing mapping. - const auto itr = - std::find_if(used_samplers.begin(), used_samplers.end(), - [&](const Sampler& entry) { return entry.GetOffset() == offset; }); - if (itr != used_samplers.end()) { - ASSERT(itr->GetType() == type && itr->IsArray() == is_array && - itr->IsShadow() == is_shadow); - return *itr; - } - - // Otherwise create a new mapping for this sampler - const std::size_t next_index = used_samplers.size(); - const Sampler entry{offset, next_index, type, is_array, is_shadow}; - return *used_samplers.emplace(entry).first; -} - -void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { - u32 dest_elem = 0; - for (u32 elem = 0; elem < 4; ++elem) { - if (!instr.tex.IsComponentEnabled(elem)) { - // Skip disabled components - continue; - } - SetTemporal(bb, dest_elem++, components[elem]); - } - // After writing values in temporals, move them to the real registers - for (u32 i = 0; i < dest_elem; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); - } -} - -void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, - const Node4& components) { - // TEXS has two destination registers and a swizzle. The first two elements in the swizzle - // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 - - u32 dest_elem = 0; - for (u32 component = 0; component < 4; ++component) { - if (!instr.texs.IsComponentEnabled(component)) - continue; - SetTemporal(bb, dest_elem++, components[component]); - } - - for (u32 i = 0; i < dest_elem; ++i) { - if (i < 2) { - // Write the first two swizzle components to gpr0 and gpr0+1 - SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); - } else { - ASSERT(instr.texs.HasTwoDestinations()); - // Write the rest of the swizzle components to gpr28 and gpr28+1 - SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); - } - } -} - -void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, - const Node4& components) { - // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half - // float instruction). - - Node4 values; - u32 dest_elem = 0; - for (u32 component = 0; component < 4; ++component) { - if (!instr.texs.IsComponentEnabled(component)) - continue; - values[dest_elem++] = components[component]; - } - if (dest_elem == 0) - return; - - std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); - - const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); - if (dest_elem <= 2) { - SetRegister(bb, instr.gpr0, first_value); - return; - } - - SetTemporal(bb, 0, first_value); - SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); - - SetRegister(bb, instr.gpr0, GetTemporal(0)); - SetRegister(bb, instr.gpr28, GetTemporal(1)); -} - -Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, std::vector<Node> coords, - Node array, Node depth_compare, u32 bias_offset) { - const bool is_array = array; - const bool is_shadow = depth_compare; - - UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) || - (texture_type == TextureType::TextureCube && is_array && is_shadow), - "This method is not supported."); - - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow); - - const bool lod_needed = process_mode == TextureProcessMode::LZ || - process_mode == TextureProcessMode::LL || - process_mode == TextureProcessMode::LLA; - - // LOD selection (either via bias or explicit textureLod) not supported in GL for - // sampler2DArrayShadow and samplerCubeArrayShadow. - const bool gl_lod_supported = - !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) || - (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow)); - - const OperationCode read_method = - lod_needed && gl_lod_supported ? OperationCode::TextureLod : OperationCode::Texture; - - UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); - - std::vector<Node> extras; - if (process_mode != TextureProcessMode::None && gl_lod_supported) { - if (process_mode == TextureProcessMode::LZ) { - extras.push_back(Immediate(0.0f)); - } else { - // If present, lod or bias are always stored in the register indexed by the gpr20 - // field with an offset depending on the usage of the other registers - extras.push_back(GetRegister(instr.gpr20.Value() + bias_offset)); - } - } - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - auto copy_coords = coords; - MetaTexture meta{sampler, array, depth_compare, extras, element}; - values[element] = Operation(read_method, meta, std::move(copy_coords)); - } - - return values; -} - -Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, bool depth_compare, bool is_array) { - const bool lod_bias_enabled = - (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); - - const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( - texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used - const u64 coord_register = array_register + (is_array ? 1 : 0); - - std::vector<Node> coords; - for (std::size_t i = 0; i < coord_count; ++i) { - coords.push_back(GetRegister(coord_register + i)); - } - // 1D.DC in OpenGL the 2nd component is ignored. - if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { - coords.push_back(Immediate(0.0f)); - } - - const Node array = is_array ? GetRegister(array_register) : nullptr; - - Node dc{}; - if (depth_compare) { - // Depth is always stored in the register signaled by gpr20 or in the next register if lod - // or bias are used - const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); - dc = GetRegister(depth_register); - } - - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0); -} - -Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, - TextureProcessMode process_mode, bool depth_compare, bool is_array) { - const bool lod_bias_enabled = - (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); - - const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( - texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4); - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used - const u64 coord_register = array_register + (is_array ? 1 : 0); - const u64 last_coord_register = - (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) - ? static_cast<u64>(instr.gpr20.Value()) - : coord_register + 1; - const u32 bias_offset = coord_count > 2 ? 1 : 0; - - std::vector<Node> coords; - for (std::size_t i = 0; i < coord_count; ++i) { - const bool last = (i == (coord_count - 1)) && (coord_count > 1); - coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); - } - - const Node array = is_array ? GetRegister(array_register) : nullptr; - - Node dc{}; - if (depth_compare) { - // Depth is always stored in the register signaled by gpr20 or in the next register if lod - // or bias are used - const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); - dc = GetRegister(depth_register); - } - - return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset); -} - -Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, - bool is_array) { - const std::size_t coord_count = GetCoordCount(texture_type); - const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); - const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); - - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used - const u64 coord_register = array_register + (is_array ? 1 : 0); - - std::vector<Node> coords; - for (size_t i = 0; i < coord_count; ++i) - coords.push_back(GetRegister(coord_register + i)); - - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - auto coords_copy = coords; - MetaTexture meta{sampler, GetRegister(array_register), {}, {}, element}; - values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); - } - - return values; -} - -Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { - const std::size_t type_coord_count = GetCoordCount(texture_type); - const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; - - // If enabled arrays index is always stored in the gpr8 field - const u64 array_register = instr.gpr8.Value(); - // if is array gpr20 is used - const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); - - const u64 last_coord_register = - ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array - ? static_cast<u64>(instr.gpr20.Value()) - : coord_register + 1; - - std::vector<Node> coords; - for (std::size_t i = 0; i < type_coord_count; ++i) { - const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); - coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); - } - - const Node array = is_array ? GetRegister(array_register) : nullptr; - // When lod is used always is in gpr20 - const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); - - const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); - - Node4 values; - for (u32 element = 0; element < values.size(); ++element) { - auto coords_copy = coords; - MetaTexture meta{sampler, array, {}, {lod}, element}; - values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); - } - return values; -} - -std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( - TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, - std::size_t max_coords, std::size_t max_inputs) { - const std::size_t coord_count = GetCoordCount(texture_type); - - std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); - const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); - if (total_coord_count > max_coords || total_reg_count > max_inputs) { - UNIMPLEMENTED_MSG("Unsupported Texture operation"); - total_coord_count = std::min(total_coord_count, max_coords); - } - // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later. - total_coord_count += - (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0; - - return {coord_count, total_coord_count}; -} - } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp new file mode 100644 index 000000000..a99ae19bf --- /dev/null +++ b/src/video_core/shader/decode/texture.cpp @@ -0,0 +1,534 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <vector> +#include <fmt/format.h> + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; +using Tegra::Shader::Register; +using Tegra::Shader::TextureMiscMode; +using Tegra::Shader::TextureProcessMode; +using Tegra::Shader::TextureType; + +static std::size_t GetCoordCount(TextureType texture_type) { + switch (texture_type) { + case TextureType::Texture1D: + return 1; + case TextureType::Texture2D: + return 2; + case TextureType::Texture3D: + case TextureType::TextureCube: + return 3; + default: + UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type)); + return 0; + } +} + +u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { + const Instruction instr = {program_code[pc]}; + const auto opcode = OpCode::Decode(instr); + + switch (opcode->get().GetId()) { + case OpCode::Id::TEX: { + UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), + "AOFFI is not implemented"); + + if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); + } + + const TextureType texture_type{instr.tex.texture_type}; + const bool is_array = instr.tex.array != 0; + const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); + const auto process_mode = instr.tex.GetTextureProcessMode(); + WriteTexInstructionFloat( + bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); + break; + } + case OpCode::Id::TEXS: { + const TextureType texture_type{instr.texs.GetTextureType()}; + const bool is_array{instr.texs.IsArrayTexture()}; + const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); + const auto process_mode = instr.texs.GetTextureProcessMode(); + + if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete"); + } + + const Node4 components = + GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); + + if (instr.texs.fp32_flag) { + WriteTexsInstructionFloat(bb, instr, components); + } else { + WriteTexsInstructionHalfFloat(bb, instr, components); + } + break; + } + case OpCode::Id::TLD4: { + ASSERT(instr.tld4.array == 0); + UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI), + "AOFFI is not implemented"); + UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), + "NDV is not implemented"); + UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), + "PTP is not implemented"); + + if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete"); + } + + const auto texture_type = instr.tld4.texture_type.Value(); + const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); + const bool is_array = instr.tld4.array != 0; + WriteTexInstructionFloat(bb, instr, + GetTld4Code(instr, texture_type, depth_compare, is_array)); + break; + } + case OpCode::Id::TLD4S: { + UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI), + "AOFFI is not implemented"); + if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete"); + } + + const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); + const Node op_a = GetRegister(instr.gpr8); + const Node op_b = GetRegister(instr.gpr20); + + // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. + std::vector<Node> coords; + if (depth_compare) { + // Note: TLD4S coordinate encoding works just like TEXS's + const Node op_y = GetRegister(instr.gpr8.Value() + 1); + coords.push_back(op_a); + coords.push_back(op_y); + coords.push_back(op_b); + } else { + coords.push_back(op_a); + coords.push_back(op_b); + } + const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); + + const auto& sampler = + GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); + + Node4 values; + for (u32 element = 0; element < values.size(); ++element) { + auto coords_copy = coords; + MetaTexture meta{sampler, {}, {}, {}, {}, component, element}; + values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); + } + + WriteTexsInstructionFloat(bb, instr, values); + break; + } + case OpCode::Id::TXQ: { + if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); + } + + // TODO: The new commits on the texture refactor, change the way samplers work. + // Sadly, not all texture instructions specify the type of texture their sampler + // uses. This must be fixed at a later instance. + const auto& sampler = + GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); + + u32 indexer = 0; + switch (instr.txq.query_type) { + case Tegra::Shader::TextureQueryType::Dimension: { + for (u32 element = 0; element < 4; ++element) { + if (!instr.txq.IsComponentEnabled(element)) { + continue; + } + MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; + const Node value = + Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); + SetTemporal(bb, indexer++, value); + } + for (u32 i = 0; i < indexer; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); + } + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled texture query type: {}", + static_cast<u32>(instr.txq.query_type.Value())); + } + break; + } + case OpCode::Id::TMML: { + UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), + "NDV is not implemented"); + + if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); + } + + auto texture_type = instr.tmml.texture_type.Value(); + const bool is_array = instr.tmml.array != 0; + const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); + + std::vector<Node> coords; + + // TODO: Add coordinates for different samplers once other texture types are implemented. + switch (texture_type) { + case TextureType::Texture1D: + coords.push_back(GetRegister(instr.gpr8)); + break; + case TextureType::Texture2D: + coords.push_back(GetRegister(instr.gpr8.Value() + 0)); + coords.push_back(GetRegister(instr.gpr8.Value() + 1)); + break; + default: + UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type)); + + // Fallback to interpreting as a 2D texture for now + coords.push_back(GetRegister(instr.gpr8.Value() + 0)); + coords.push_back(GetRegister(instr.gpr8.Value() + 1)); + texture_type = TextureType::Texture2D; + } + + for (u32 element = 0; element < 2; ++element) { + auto params = coords; + MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; + const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); + SetTemporal(bb, element, value); + } + for (u32 element = 0; element < 2; ++element) { + SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element)); + } + + break; + } + case OpCode::Id::TLDS: { + const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; + const bool is_array{instr.tlds.IsArrayTexture()}; + + UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), + "AOFFI is not implemented"); + UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); + + if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) { + LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete"); + } + + WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); + } + + return pc; +} + +const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, + bool is_array, bool is_shadow) { + const auto offset = static_cast<std::size_t>(sampler.index.Value()); + + // If this sampler has already been used, return the existing mapping. + const auto itr = + std::find_if(used_samplers.begin(), used_samplers.end(), + [&](const Sampler& entry) { return entry.GetOffset() == offset; }); + if (itr != used_samplers.end()) { + ASSERT(itr->GetType() == type && itr->IsArray() == is_array && + itr->IsShadow() == is_shadow); + return *itr; + } + + // Otherwise create a new mapping for this sampler + const std::size_t next_index = used_samplers.size(); + const Sampler entry{offset, next_index, type, is_array, is_shadow}; + return *used_samplers.emplace(entry).first; +} + +void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { + u32 dest_elem = 0; + for (u32 elem = 0; elem < 4; ++elem) { + if (!instr.tex.IsComponentEnabled(elem)) { + // Skip disabled components + continue; + } + SetTemporal(bb, dest_elem++, components[elem]); + } + // After writing values in temporals, move them to the real registers + for (u32 i = 0; i < dest_elem; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); + } +} + +void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, + const Node4& components) { + // TEXS has two destination registers and a swizzle. The first two elements in the swizzle + // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 + + u32 dest_elem = 0; + for (u32 component = 0; component < 4; ++component) { + if (!instr.texs.IsComponentEnabled(component)) + continue; + SetTemporal(bb, dest_elem++, components[component]); + } + + for (u32 i = 0; i < dest_elem; ++i) { + if (i < 2) { + // Write the first two swizzle components to gpr0 and gpr0+1 + SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); + } else { + ASSERT(instr.texs.HasTwoDestinations()); + // Write the rest of the swizzle components to gpr28 and gpr28+1 + SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); + } + } +} + +void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, + const Node4& components) { + // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half + // float instruction). + + Node4 values; + u32 dest_elem = 0; + for (u32 component = 0; component < 4; ++component) { + if (!instr.texs.IsComponentEnabled(component)) + continue; + values[dest_elem++] = components[component]; + } + if (dest_elem == 0) + return; + + std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); + + const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); + if (dest_elem <= 2) { + SetRegister(bb, instr.gpr0, first_value); + return; + } + + SetTemporal(bb, 0, first_value); + SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); + + SetRegister(bb, instr.gpr0, GetTemporal(0)); + SetRegister(bb, instr.gpr28, GetTemporal(1)); +} + +Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, + TextureProcessMode process_mode, std::vector<Node> coords, + Node array, Node depth_compare, u32 bias_offset) { + const bool is_array = array; + const bool is_shadow = depth_compare; + + UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) || + (texture_type == TextureType::TextureCube && is_array && is_shadow), + "This method is not supported."); + + const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow); + + const bool lod_needed = process_mode == TextureProcessMode::LZ || + process_mode == TextureProcessMode::LL || + process_mode == TextureProcessMode::LLA; + + // LOD selection (either via bias or explicit textureLod) not supported in GL for + // sampler2DArrayShadow and samplerCubeArrayShadow. + const bool gl_lod_supported = + !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) || + (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow)); + + const OperationCode read_method = + (lod_needed && gl_lod_supported) ? OperationCode::TextureLod : OperationCode::Texture; + + UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); + + Node bias = {}; + Node lod = {}; + if (process_mode != TextureProcessMode::None && gl_lod_supported) { + switch (process_mode) { + case TextureProcessMode::LZ: + lod = Immediate(0.0f); + break; + case TextureProcessMode::LB: + // If present, lod or bias are always stored in the register indexed by the gpr20 + // field with an offset depending on the usage of the other registers + bias = GetRegister(instr.gpr20.Value() + bias_offset); + break; + case TextureProcessMode::LL: + lod = GetRegister(instr.gpr20.Value() + bias_offset); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented process mode={}", static_cast<u32>(process_mode)); + break; + } + } + + Node4 values; + for (u32 element = 0; element < values.size(); ++element) { + auto copy_coords = coords; + MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element}; + values[element] = Operation(read_method, meta, std::move(copy_coords)); + } + + return values; +} + +Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, + TextureProcessMode process_mode, bool depth_compare, bool is_array) { + const bool lod_bias_enabled = + (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); + + const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( + texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); + // If enabled arrays index is always stored in the gpr8 field + const u64 array_register = instr.gpr8.Value(); + // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used + const u64 coord_register = array_register + (is_array ? 1 : 0); + + std::vector<Node> coords; + for (std::size_t i = 0; i < coord_count; ++i) { + coords.push_back(GetRegister(coord_register + i)); + } + // 1D.DC in OpenGL the 2nd component is ignored. + if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { + coords.push_back(Immediate(0.0f)); + } + + const Node array = is_array ? GetRegister(array_register) : nullptr; + + Node dc{}; + if (depth_compare) { + // Depth is always stored in the register signaled by gpr20 or in the next register if lod + // or bias are used + const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); + dc = GetRegister(depth_register); + } + + return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0); +} + +Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, + TextureProcessMode process_mode, bool depth_compare, bool is_array) { + const bool lod_bias_enabled = + (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); + + const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( + texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4); + // If enabled arrays index is always stored in the gpr8 field + const u64 array_register = instr.gpr8.Value(); + // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used + const u64 coord_register = array_register + (is_array ? 1 : 0); + const u64 last_coord_register = + (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) + ? static_cast<u64>(instr.gpr20.Value()) + : coord_register + 1; + const u32 bias_offset = coord_count > 2 ? 1 : 0; + + std::vector<Node> coords; + for (std::size_t i = 0; i < coord_count; ++i) { + const bool last = (i == (coord_count - 1)) && (coord_count > 1); + coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); + } + + const Node array = is_array ? GetRegister(array_register) : nullptr; + + Node dc{}; + if (depth_compare) { + // Depth is always stored in the register signaled by gpr20 or in the next register if lod + // or bias are used + const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); + dc = GetRegister(depth_register); + } + + return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset); +} + +Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, + bool is_array) { + const std::size_t coord_count = GetCoordCount(texture_type); + const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); + const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); + + // If enabled arrays index is always stored in the gpr8 field + const u64 array_register = instr.gpr8.Value(); + // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used + const u64 coord_register = array_register + (is_array ? 1 : 0); + + std::vector<Node> coords; + for (size_t i = 0; i < coord_count; ++i) + coords.push_back(GetRegister(coord_register + i)); + + const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); + + Node4 values; + for (u32 element = 0; element < values.size(); ++element) { + auto coords_copy = coords; + MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element}; + values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); + } + + return values; +} + +Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { + const std::size_t type_coord_count = GetCoordCount(texture_type); + const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; + + // If enabled arrays index is always stored in the gpr8 field + const u64 array_register = instr.gpr8.Value(); + // if is array gpr20 is used + const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); + + const u64 last_coord_register = + ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array + ? static_cast<u64>(instr.gpr20.Value()) + : coord_register + 1; + + std::vector<Node> coords; + for (std::size_t i = 0; i < type_coord_count; ++i) { + const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); + coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); + } + + const Node array = is_array ? GetRegister(array_register) : nullptr; + // When lod is used always is in gpr20 + const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); + + const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); + + Node4 values; + for (u32 element = 0; element < values.size(); ++element) { + auto coords_copy = coords; + MetaTexture meta{sampler, array, {}, {}, lod, {}, element}; + values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); + } + return values; +} + +std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( + TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, + std::size_t max_coords, std::size_t max_inputs) { + const std::size_t coord_count = GetCoordCount(texture_type); + + std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); + const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); + if (total_coord_count > max_coords || total_reg_count > max_inputs) { + UNIMPLEMENTED_MSG("Unsupported Texture operation"); + total_coord_count = std::min(total_coord_count, max_coords); + } + // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later. + total_coord_count += + (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0; + + return {coord_count, total_coord_count}; +} + +} // namespace VideoCommon::Shader
\ No newline at end of file diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 52c7f2c4e..5bc3a3900 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -290,7 +290,9 @@ struct MetaTexture { const Sampler& sampler; Node array{}; Node depth_compare{}; - std::vector<Node> extras; + Node bias{}; + Node lod{}; + Node component{}; u32 element{}; }; @@ -614,6 +616,7 @@ private: u32 DecodeHfma2(NodeBlock& bb, u32 pc); u32 DecodeConversion(NodeBlock& bb, u32 pc); u32 DecodeMemory(NodeBlock& bb, u32 pc); + u32 DecodeTexture(NodeBlock& bb, u32 pc); u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 0fc5530f2..8c278c0e2 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -4,6 +4,7 @@ #pragma once +#include <array> #include "common/assert.h" #include "common/bit_field.h" #include "common/common_funcs.h" @@ -293,7 +294,7 @@ struct TSCEntry { union { BitField<0, 2, TextureFilter> mag_filter; BitField<4, 2, TextureFilter> min_filter; - BitField<6, 2, TextureMipmapFilter> mip_filter; + BitField<6, 2, TextureMipmapFilter> mipmap_filter; BitField<9, 1, u32> cubemap_interface_filtering; BitField<12, 13, u32> mip_lod_bias; }; @@ -306,10 +307,33 @@ struct TSCEntry { BitField<12, 8, u32> srgb_border_color_g; BitField<20, 8, u32> srgb_border_color_b; }; - float border_color_r; - float border_color_g; - float border_color_b; - float border_color_a; + std::array<f32, 4> border_color; + + float GetMaxAnisotropy() const { + return static_cast<float>(1U << max_anisotropy); + } + + float GetMinLod() const { + return static_cast<float>(min_lod_clamp) / 256.0f; + } + + float GetMaxLod() const { + return static_cast<float>(max_lod_clamp) / 256.0f; + } + + float GetLodBias() const { + // Sign extend the 13-bit value. + constexpr u32 mask = 1U << (13 - 1); + return static_cast<float>((mip_lod_bias ^ mask) - mask) / 256.0f; + } + + std::array<float, 4> GetBorderColor() const { + if (srgb_conversion) { + return {srgb_border_color_r / 255.0f, srgb_border_color_g / 255.0f, + srgb_border_color_b / 255.0f, border_color[3]}; + } + return border_color; + } }; static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); |