diff options
Diffstat (limited to 'src/video_core')
23 files changed, 117 insertions, 79 deletions
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 16e0697c4..1097e5c49 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -83,7 +83,7 @@ private: u32 subchannel; ///< Current subchannel u32 method_count; ///< Current method count u32 length_pending; ///< Large NI command length pending - bool non_incrementing; ///< Current command’s NI flag + bool non_incrementing; ///< Current command's NI flag }; DmaState dma_state{}; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 9989825f8..269df9437 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -217,9 +217,9 @@ enum class StoreType : u64 { Signed8 = 1, Unsigned16 = 2, Signed16 = 3, - Bytes32 = 4, - Bytes64 = 5, - Bytes128 = 6, + Bits32 = 4, + Bits64 = 5, + Bits128 = 6, }; enum class IMinMaxExchange : u64 { @@ -981,6 +981,10 @@ union Instruction { } return false; } + + bool IsComponentEnabled(std::size_t component) const { + return ((1ULL << component) & component_mask) != 0; + } } txq; union { @@ -1248,11 +1252,19 @@ union Instruction { union { BitField<20, 14, u64> offset; BitField<34, 5, u64> index; + + u64 GetOffset() const { + return offset * 4; + } } cbuf34; union { BitField<20, 16, s64> offset; BitField<36, 5, u64> index; + + s64 GetOffset() const { + return offset; + } } cbuf36; // Unsure about the size of this one. diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index ff5310848..4c08bb148 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -49,11 +49,6 @@ public: return false; } - /// Attempt to use a faster method to fill a region - virtual bool AccelerateFill(const void* config) { - return false; - } - /// Attempt to use a faster method to display the framebuffer to screen virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7831bc8cc..53b52753c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -477,9 +477,9 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { cached_pages.add({pages_interval, delta}); } -void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool using_color_fb, - bool using_depth_fb, bool preserve_contents, - std::optional<std::size_t> single_color_target) { +std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers( + OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents, + std::optional<std::size_t> single_color_target) { MICROPROFILE_SCOPE(OpenGL_Framebuffer); const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); const auto& regs = gpu.regs; @@ -491,7 +491,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or // single color targets). This is done because the guest registers may not change but the // host framebuffer may contain different attachments - return; + return current_depth_stencil_usage; } current_framebuffer_config_state = fb_config_state; @@ -561,12 +561,14 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us depth_surface->MarkAsModified(true, res_cache); fbkey.zeta = depth_surface->Texture().handle; - fbkey.stencil_enable = regs.stencil_enable; + fbkey.stencil_enable = regs.stencil_enable && + depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil; } SetupCachedFramebuffer(fbkey, current_state); - SyncViewport(current_state); + + return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable}; } void RasterizerOpenGL::Clear() { @@ -634,8 +636,8 @@ void RasterizerOpenGL::Clear() { return; } - ConfigureFramebuffers(clear_state, use_color, use_depth || use_stencil, false, - regs.clear_buffers.RT.Value()); + const auto [clear_depth, clear_stencil] = ConfigureFramebuffers( + clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value()); if (regs.clear_flags.scissor) { SyncScissorTest(clear_state); } @@ -650,11 +652,11 @@ void RasterizerOpenGL::Clear() { glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); } - if (use_depth && use_stencil) { + if (clear_depth && clear_stencil) { glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil); - } else if (use_depth) { + } else if (clear_depth) { glClearBufferfv(GL_DEPTH, 0, ®s.clear_depth); - } else if (use_stencil) { + } else if (clear_stencil) { glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); } } @@ -781,11 +783,6 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs return true; } -bool RasterizerOpenGL::AccelerateFill(const void* config) { - UNREACHABLE(); - return true; -} - bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) { if (!framebuffer_addr) { @@ -957,7 +954,7 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader } } else { // Buffer is accessed directly, upload just what we use - size = used_buffer.GetSize() * sizeof(float); + size = used_buffer.GetSize(); } // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index a103692f9..7f2bf0f8b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -56,7 +56,6 @@ public: void FlushAndInvalidateRegion(VAddr addr, u64 size) override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst) override; - bool AccelerateFill(const void* config) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; bool AccelerateDrawBatch(bool is_indexed) override; @@ -122,10 +121,12 @@ private: * @param using_depth_fb If true, configure the depth/stencil framebuffer. * @param preserve_contents If true, tries to preserve data from a previously used framebuffer. * @param single_color_target Specifies if a single color buffer target should be used. + * @returns If depth (first) or stencil (second) are being stored in the bound zeta texture + * (requires using_depth_fb to be true) */ - void ConfigureFramebuffers(OpenGLState& current_state, bool use_color_fb = true, - bool using_depth_fb = true, bool preserve_contents = true, - std::optional<std::size_t> single_color_target = {}); + std::pair<bool, bool> ConfigureFramebuffers( + OpenGLState& current_state, bool use_color_fb = true, bool using_depth_fb = true, + bool preserve_contents = true, std::optional<std::size_t> single_color_target = {}); /// Configures the current constbuffers to use for the draw command. void SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, @@ -214,6 +215,7 @@ private: std::map<FramebufferCacheKey, OGLFramebuffer> framebuffer_cache; FramebufferConfigState current_framebuffer_config_state; + std::pair<bool, bool> current_depth_stencil_usage{}; std::array<SamplerInfo, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_samplers; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 42e4e7aa1..a79eee03e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -719,7 +719,6 @@ void CachedSurface::FlushGLBuffer() { glPixelStorei(GL_PACK_ROW_LENGTH, 0); ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width, params.height); - ASSERT(params.type != SurfaceType::Fill); const u8* const texture_src_data = Memory::GetPointer(params.addr); ASSERT(texture_src_data); if (params.is_tiled) { @@ -863,9 +862,6 @@ void CachedSurface::EnsureTextureView() { MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64)); void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) { - if (params.type == SurfaceType::Fill) - return; - MICROPROFILE_SCOPE(OpenGL_TextureUL); for (u32 i = 0; i < params.max_mip_level; i++) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 004245431..36035d0d2 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -543,8 +543,9 @@ private: if (const auto immediate = std::get_if<ImmediateNode>(offset)) { // Direct access const u32 offset_imm = immediate->GetValue(); - return fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), offset_imm / 4, - offset_imm % 4); + ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); + return fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), + offset_imm / (4 * 4), (offset_imm / 4) % 4); } else if (std::holds_alternative<OperationNode>(*offset)) { // Indirect access diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index e7847f614..51b8d55d4 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp @@ -25,7 +25,7 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc) { } else if (instr.is_b_gpr) { return GetRegister(instr.gpr20); } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); } }(); diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index a237dcb92..37eef2bf2 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -35,7 +35,7 @@ u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 p switch (opcode->get().GetId()) { case OpCode::Id::HADD2_C: case OpCode::Id::HMUL2_C: - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); case OpCode::Id::HADD2_R: case OpCode::Id::HMUL2_R: return GetRegister(instr.gpr20); diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 4a8cc1a1c..cc9a76a19 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -26,7 +26,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u3 } else if (instr.is_b_gpr) { return GetRegister(instr.gpr20); } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); } }(); diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index ee18d3a99..728a393a1 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -48,7 +48,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) { if (instr.is_b_gpr) { return GetRegister(instr.gpr20); } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); } }(); const bool input_signed = instr.conversion.is_input_signed; @@ -72,7 +72,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) { if (instr.is_b_gpr) { return GetRegister(instr.gpr20); } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); } }(); @@ -110,7 +110,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) { if (instr.is_b_gpr) { return GetRegister(instr.gpr20); } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); } }(); diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp index be8dc2230..52f39d3ff 100644 --- a/src/video_core/shader/decode/ffma.cpp +++ b/src/video_core/shader/decode/ffma.cpp @@ -27,14 +27,14 @@ u32 ShaderIR::DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc) { auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> { switch (opcode->get().GetId()) { case OpCode::Id::FFMA_CR: { - return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), + return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), GetRegister(instr.gpr39)}; } case OpCode::Id::FFMA_RR: return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; case OpCode::Id::FFMA_RC: { return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; + GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; } case OpCode::Id::FFMA_IMM: return {GetImmediate19(instr), GetRegister(instr.gpr39)}; diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp index ba846f1bd..9f9da2278 100644 --- a/src/video_core/shader/decode/float_set.cpp +++ b/src/video_core/shader/decode/float_set.cpp @@ -25,7 +25,7 @@ u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { } else if (instr.is_b_gpr) { return GetRegister(instr.gpr20); } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); } }(); diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp index e88b04d18..dd3aef6f2 100644 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ b/src/video_core/shader/decode/float_set_predicate.cpp @@ -25,7 +25,7 @@ u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u3 } else if (instr.is_b_gpr) { return GetRegister(instr.gpr20); } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); } }(); op_b = GetOperandAbsNegFloat(op_b, instr.fsetp.abs_b, false); diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp index 4a6b945f9..43a0a9e10 100644 --- a/src/video_core/shader/decode/hfma2.cpp +++ b/src/video_core/shader/decode/hfma2.cpp @@ -39,13 +39,14 @@ u32 ShaderIR::DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc) { neg_b = instr.hfma2.negate_b; neg_c = instr.hfma2.negate_c; return {instr.hfma2.saturate, instr.hfma2.type_b, - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), instr.hfma2.type_reg39, - GetRegister(instr.gpr39)}; + GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), + instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; case OpCode::Id::HFMA2_RC: neg_b = instr.hfma2.negate_b; neg_c = instr.hfma2.negate_c; return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), - instr.hfma2.type_b, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; + instr.hfma2.type_b, + GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; case OpCode::Id::HFMA2_RR: neg_b = instr.hfma2.rr.negate_b; neg_c = instr.hfma2.rr.negate_c; diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp index 85e67b03b..16eb3985f 100644 --- a/src/video_core/shader/decode/integer_set.cpp +++ b/src/video_core/shader/decode/integer_set.cpp @@ -23,7 +23,7 @@ u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { } else if (instr.is_b_gpr) { return GetRegister(instr.gpr20); } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); } }(); diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp index c8b105a08..daf97174b 100644 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ b/src/video_core/shader/decode/integer_set_predicate.cpp @@ -25,7 +25,7 @@ u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, } else if (instr.is_b_gpr) { return GetRegister(instr.gpr20); } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); } }(); diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 04cb386b7..3dd26da20 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -80,7 +80,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { Node index = GetRegister(instr.gpr8); const Node op_a = - GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, index); + GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); switch (instr.ld_c.type.Value()) { case Tegra::Shader::UniformType::Single: @@ -89,7 +89,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { case Tegra::Shader::UniformType::Double: { const Node op_b = - GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, index); + GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); SetTemporal(bb, 0, op_a); SetTemporal(bb, 1, op_b); @@ -104,19 +104,42 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { } case OpCode::Id::LD_L: { UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}", - static_cast<unsigned>(instr.ld_l.unknown.Value())); - - const Node index = Operation(OperationCode::IAdd, GetRegister(instr.gpr8), - Immediate(static_cast<s32>(instr.smem_imm))); - const Node lmem = GetLocalMemory(index); + static_cast<u32>(instr.ld_l.unknown.Value())); + + const auto GetLmem = [&](s32 offset) { + ASSERT(offset % 4 == 0); + const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset); + const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), + immediate_offset); + return GetLocalMemory(address); + }; switch (instr.ldst_sl.type.Value()) { - case Tegra::Shader::StoreType::Bytes32: - SetRegister(bb, instr.gpr0, lmem); + case Tegra::Shader::StoreType::Bits32: + case Tegra::Shader::StoreType::Bits64: + case Tegra::Shader::StoreType::Bits128: { + const u32 count = [&]() { + switch (instr.ldst_sl.type.Value()) { + case Tegra::Shader::StoreType::Bits32: + return 1; + case Tegra::Shader::StoreType::Bits64: + return 2; + case Tegra::Shader::StoreType::Bits128: + return 4; + default: + UNREACHABLE(); + return 0; + } + }(); + for (u32 i = 0; i < count; ++i) + SetTemporal(bb, i, GetLmem(i * 4)); + for (u32 i = 0; i < count; ++i) + SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); break; + } default: UNIMPLEMENTED_MSG("LD_L Unhandled type: {}", - static_cast<unsigned>(instr.ldst_sl.type.Value())); + static_cast<u32>(instr.ldst_sl.type.Value())); } break; } @@ -142,7 +165,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { ASSERT(cbuf != nullptr); const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); ASSERT(cbuf_offset_imm != nullptr); - const auto cbuf_offset = cbuf_offset_imm->GetValue() * 4; + const auto cbuf_offset = cbuf_offset_imm->GetValue(); bb.push_back(Comment( fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); @@ -202,12 +225,20 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}", static_cast<u32>(instr.st_l.unknown.Value())); - const Node index = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), - Immediate(static_cast<s32>(instr.smem_imm))); + const auto GetLmemAddr = [&](s32 offset) { + ASSERT(offset % 4 == 0); + const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset); + return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); + }; switch (instr.ldst_sl.type.Value()) { - case Tegra::Shader::StoreType::Bytes32: - SetLocalMemory(bb, index, GetRegister(instr.gpr0)); + case Tegra::Shader::StoreType::Bits128: + SetLocalMemory(bb, GetLmemAddr(12), GetRegister(instr.gpr0.Value() + 3)); + SetLocalMemory(bb, GetLmemAddr(8), GetRegister(instr.gpr0.Value() + 2)); + case Tegra::Shader::StoreType::Bits64: + SetLocalMemory(bb, GetLmemAddr(4), GetRegister(instr.gpr0.Value() + 1)); + case Tegra::Shader::StoreType::Bits32: + SetLocalMemory(bb, GetLmemAddr(0), GetRegister(instr.gpr0)); break; default: UNIMPLEMENTED_MSG("ST_L Unhandled type: {}", @@ -324,15 +355,18 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { const auto& sampler = GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); + u32 indexer = 0; switch (instr.txq.query_type) { case Tegra::Shader::TextureQueryType::Dimension: { for (u32 element = 0; element < 4; ++element) { - MetaTexture meta{sampler, element}; - const Node value = Operation(OperationCode::F4TextureQueryDimensions, - std::move(meta), GetRegister(instr.gpr8)); - SetTemporal(bb, element, value); + if (instr.txq.IsComponentEnabled(element)) { + MetaTexture meta{sampler, element}; + const Node value = Operation(OperationCode::F4TextureQueryDimensions, + std::move(meta), GetRegister(instr.gpr8)); + SetTemporal(bb, indexer++, value); + } } - for (u32 i = 0; i < 4; ++i) { + for (u32 i = 0; i < indexer; ++i) { SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); } break; @@ -734,4 +768,4 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( return {coord_count, total_coord_count}; } -} // namespace VideoCommon::Shader
\ No newline at end of file +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index 85026bb37..6623f8ff9 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -23,7 +23,7 @@ u32 ShaderIR::DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc) { } else if (instr.is_b_gpr) { return GetRegister(instr.gpr20); } else { - return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); } }(); diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 0cd9cd1cc..9cb864500 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -32,13 +32,14 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc) { auto [is_merge, op_b, op_c] = [&]() -> std::tuple<bool, Node, Node> { switch (opcode->get().GetId()) { case OpCode::Id::XMAD_CR: - return {instr.xmad.merge_56, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), + return {instr.xmad.merge_56, + GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), GetRegister(instr.gpr39)}; case OpCode::Id::XMAD_RR: return {instr.xmad.merge_37, GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; case OpCode::Id::XMAD_RC: return {false, GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; + GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; case OpCode::Id::XMAD_IMM: return {instr.xmad.merge_37, Immediate(static_cast<u32>(instr.xmad.imm20_16)), GetRegister(instr.gpr39)}; diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index c4ecb2e3c..6e42e3dfb 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -249,7 +249,7 @@ public: } u32 GetSize() const { - return max_offset + 1; + return max_offset + sizeof(float); } private: diff --git a/src/video_core/surface.h b/src/video_core/surface.h index edd3816ba..b783e4b27 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -109,8 +109,7 @@ enum class SurfaceType { ColorTexture = 0, Depth = 1, DepthStencil = 2, - Fill = 3, - Invalid = 4, + Invalid = 3, }; enum class SurfaceTarget { diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index e7c78bee2..bdb40dacf 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -182,7 +182,7 @@ struct TICEntry { }; union { BitField<0, 16, u32> height_minus_1; - BitField<16, 15, u32> depth_minus_1; + BitField<16, 14, u32> depth_minus_1; }; union { BitField<6, 13, u32> mip_lod_bias; |