summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/dma_pusher.h2
-rw-r--r--src/video_core/engines/shader_bytecode.h18
-rw-r--r--src/video_core/rasterizer_interface.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp31
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h10
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp5
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp2
-rw-r--r--src/video_core/shader/decode/arithmetic_half.cpp2
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp2
-rw-r--r--src/video_core/shader/decode/conversion.cpp6
-rw-r--r--src/video_core/shader/decode/ffma.cpp4
-rw-r--r--src/video_core/shader/decode/float_set.cpp2
-rw-r--r--src/video_core/shader/decode/float_set_predicate.cpp2
-rw-r--r--src/video_core/shader/decode/hfma2.cpp7
-rw-r--r--src/video_core/shader/decode/integer_set.cpp2
-rw-r--r--src/video_core/shader/decode/integer_set_predicate.cpp2
-rw-r--r--src/video_core/shader/decode/memory.cpp76
-rw-r--r--src/video_core/shader/decode/shift.cpp2
-rw-r--r--src/video_core/shader/decode/xmad.cpp5
-rw-r--r--src/video_core/shader/shader_ir.h2
-rw-r--r--src/video_core/surface.h3
-rw-r--r--src/video_core/textures/texture.h2
23 files changed, 117 insertions, 79 deletions
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 16e0697c4..1097e5c49 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -83,7 +83,7 @@ private:
u32 subchannel; ///< Current subchannel
u32 method_count; ///< Current method count
u32 length_pending; ///< Large NI command length pending
- bool non_incrementing; ///< Current command’s NI flag
+ bool non_incrementing; ///< Current command's NI flag
};
DmaState dma_state{};
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 9989825f8..269df9437 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -217,9 +217,9 @@ enum class StoreType : u64 {
Signed8 = 1,
Unsigned16 = 2,
Signed16 = 3,
- Bytes32 = 4,
- Bytes64 = 5,
- Bytes128 = 6,
+ Bits32 = 4,
+ Bits64 = 5,
+ Bits128 = 6,
};
enum class IMinMaxExchange : u64 {
@@ -981,6 +981,10 @@ union Instruction {
}
return false;
}
+
+ bool IsComponentEnabled(std::size_t component) const {
+ return ((1ULL << component) & component_mask) != 0;
+ }
} txq;
union {
@@ -1248,11 +1252,19 @@ union Instruction {
union {
BitField<20, 14, u64> offset;
BitField<34, 5, u64> index;
+
+ u64 GetOffset() const {
+ return offset * 4;
+ }
} cbuf34;
union {
BitField<20, 16, s64> offset;
BitField<36, 5, u64> index;
+
+ s64 GetOffset() const {
+ return offset;
+ }
} cbuf36;
// Unsure about the size of this one.
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index ff5310848..4c08bb148 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -49,11 +49,6 @@ public:
return false;
}
- /// Attempt to use a faster method to fill a region
- virtual bool AccelerateFill(const void* config) {
- return false;
- }
-
/// Attempt to use a faster method to display the framebuffer to screen
virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 7831bc8cc..53b52753c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -477,9 +477,9 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
cached_pages.add({pages_interval, delta});
}
-void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool using_color_fb,
- bool using_depth_fb, bool preserve_contents,
- std::optional<std::size_t> single_color_target) {
+std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
+ OpenGLState& current_state, bool using_color_fb, bool using_depth_fb, bool preserve_contents,
+ std::optional<std::size_t> single_color_target) {
MICROPROFILE_SCOPE(OpenGL_Framebuffer);
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs;
@@ -491,7 +491,7 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us
// Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
// single color targets). This is done because the guest registers may not change but the
// host framebuffer may contain different attachments
- return;
+ return current_depth_stencil_usage;
}
current_framebuffer_config_state = fb_config_state;
@@ -561,12 +561,14 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us
depth_surface->MarkAsModified(true, res_cache);
fbkey.zeta = depth_surface->Texture().handle;
- fbkey.stencil_enable = regs.stencil_enable;
+ fbkey.stencil_enable = regs.stencil_enable &&
+ depth_surface->GetSurfaceParams().type == SurfaceType::DepthStencil;
}
SetupCachedFramebuffer(fbkey, current_state);
-
SyncViewport(current_state);
+
+ return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable};
}
void RasterizerOpenGL::Clear() {
@@ -634,8 +636,8 @@ void RasterizerOpenGL::Clear() {
return;
}
- ConfigureFramebuffers(clear_state, use_color, use_depth || use_stencil, false,
- regs.clear_buffers.RT.Value());
+ const auto [clear_depth, clear_stencil] = ConfigureFramebuffers(
+ clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value());
if (regs.clear_flags.scissor) {
SyncScissorTest(clear_state);
}
@@ -650,11 +652,11 @@ void RasterizerOpenGL::Clear() {
glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
}
- if (use_depth && use_stencil) {
+ if (clear_depth && clear_stencil) {
glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
- } else if (use_depth) {
+ } else if (clear_depth) {
glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth);
- } else if (use_stencil) {
+ } else if (clear_stencil) {
glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
}
}
@@ -781,11 +783,6 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs
return true;
}
-bool RasterizerOpenGL::AccelerateFill(const void* config) {
- UNREACHABLE();
- return true;
-}
-
bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr, u32 pixel_stride) {
if (!framebuffer_addr) {
@@ -957,7 +954,7 @@ void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::Shader
}
} else {
// Buffer is accessed directly, upload just what we use
- size = used_buffer.GetSize() * sizeof(float);
+ size = used_buffer.GetSize();
}
// Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index a103692f9..7f2bf0f8b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -56,7 +56,6 @@ public:
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
const Tegra::Engines::Fermi2D::Regs::Surface& dst) override;
- bool AccelerateFill(const void* config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
bool AccelerateDrawBatch(bool is_indexed) override;
@@ -122,10 +121,12 @@ private:
* @param using_depth_fb If true, configure the depth/stencil framebuffer.
* @param preserve_contents If true, tries to preserve data from a previously used framebuffer.
* @param single_color_target Specifies if a single color buffer target should be used.
+ * @returns If depth (first) or stencil (second) are being stored in the bound zeta texture
+ * (requires using_depth_fb to be true)
*/
- void ConfigureFramebuffers(OpenGLState& current_state, bool use_color_fb = true,
- bool using_depth_fb = true, bool preserve_contents = true,
- std::optional<std::size_t> single_color_target = {});
+ std::pair<bool, bool> ConfigureFramebuffers(
+ OpenGLState& current_state, bool use_color_fb = true, bool using_depth_fb = true,
+ bool preserve_contents = true, std::optional<std::size_t> single_color_target = {});
/// Configures the current constbuffers to use for the draw command.
void SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
@@ -214,6 +215,7 @@ private:
std::map<FramebufferCacheKey, OGLFramebuffer> framebuffer_cache;
FramebufferConfigState current_framebuffer_config_state;
+ std::pair<bool, bool> current_depth_stencil_usage{};
std::array<SamplerInfo, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_samplers;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 42e4e7aa1..a79eee03e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -719,7 +719,6 @@ void CachedSurface::FlushGLBuffer() {
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer[0], params.pixel_format, params.width,
params.height);
- ASSERT(params.type != SurfaceType::Fill);
const u8* const texture_src_data = Memory::GetPointer(params.addr);
ASSERT(texture_src_data);
if (params.is_tiled) {
@@ -863,9 +862,6 @@ void CachedSurface::EnsureTextureView() {
MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
- if (params.type == SurfaceType::Fill)
- return;
-
MICROPROFILE_SCOPE(OpenGL_TextureUL);
for (u32 i = 0; i < params.max_mip_level; i++)
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 004245431..36035d0d2 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -543,8 +543,9 @@ private:
if (const auto immediate = std::get_if<ImmediateNode>(offset)) {
// Direct access
const u32 offset_imm = immediate->GetValue();
- return fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), offset_imm / 4,
- offset_imm % 4);
+ ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
+ return fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
+ offset_imm / (4 * 4), (offset_imm / 4) % 4);
} else if (std::holds_alternative<OperationNode>(*offset)) {
// Indirect access
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index e7847f614..51b8d55d4 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -25,7 +25,7 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc) {
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+ return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
}
}();
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index a237dcb92..37eef2bf2 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -35,7 +35,7 @@ u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 p
switch (opcode->get().GetId()) {
case OpCode::Id::HADD2_C:
case OpCode::Id::HMUL2_C:
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+ return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
case OpCode::Id::HADD2_R:
case OpCode::Id::HMUL2_R:
return GetRegister(instr.gpr20);
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 4a8cc1a1c..cc9a76a19 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -26,7 +26,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u3
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+ return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
}
}();
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index ee18d3a99..728a393a1 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -48,7 +48,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) {
if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+ return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
}
}();
const bool input_signed = instr.conversion.is_input_signed;
@@ -72,7 +72,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) {
if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+ return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
}
}();
@@ -110,7 +110,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) {
if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+ return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
}
}();
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
index be8dc2230..52f39d3ff 100644
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -27,14 +27,14 @@ u32 ShaderIR::DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc) {
auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::FFMA_CR: {
- return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
+ return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
GetRegister(instr.gpr39)};
}
case OpCode::Id::FFMA_RR:
return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
case OpCode::Id::FFMA_RC: {
return {GetRegister(instr.gpr39),
- GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
+ GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
}
case OpCode::Id::FFMA_IMM:
return {GetImmediate19(instr), GetRegister(instr.gpr39)};
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp
index ba846f1bd..9f9da2278 100644
--- a/src/video_core/shader/decode/float_set.cpp
+++ b/src/video_core/shader/decode/float_set.cpp
@@ -25,7 +25,7 @@ u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+ return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
}
}();
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp
index e88b04d18..dd3aef6f2 100644
--- a/src/video_core/shader/decode/float_set_predicate.cpp
+++ b/src/video_core/shader/decode/float_set_predicate.cpp
@@ -25,7 +25,7 @@ u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u3
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+ return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
}
}();
op_b = GetOperandAbsNegFloat(op_b, instr.fsetp.abs_b, false);
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
index 4a6b945f9..43a0a9e10 100644
--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@@ -39,13 +39,14 @@ u32 ShaderIR::DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc) {
neg_b = instr.hfma2.negate_b;
neg_c = instr.hfma2.negate_c;
return {instr.hfma2.saturate, instr.hfma2.type_b,
- GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), instr.hfma2.type_reg39,
- GetRegister(instr.gpr39)};
+ GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
+ instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
case OpCode::Id::HFMA2_RC:
neg_b = instr.hfma2.negate_b;
neg_c = instr.hfma2.negate_c;
return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
- instr.hfma2.type_b, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
+ instr.hfma2.type_b,
+ GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
case OpCode::Id::HFMA2_RR:
neg_b = instr.hfma2.rr.negate_b;
neg_c = instr.hfma2.rr.negate_c;
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp
index 85e67b03b..16eb3985f 100644
--- a/src/video_core/shader/decode/integer_set.cpp
+++ b/src/video_core/shader/decode/integer_set.cpp
@@ -23,7 +23,7 @@ u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc) {
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+ return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
}
}();
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp
index c8b105a08..daf97174b 100644
--- a/src/video_core/shader/decode/integer_set_predicate.cpp
+++ b/src/video_core/shader/decode/integer_set_predicate.cpp
@@ -25,7 +25,7 @@ u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code,
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+ return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
}
}();
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 04cb386b7..3dd26da20 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -80,7 +80,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
Node index = GetRegister(instr.gpr8);
const Node op_a =
- GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, index);
+ GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
switch (instr.ld_c.type.Value()) {
case Tegra::Shader::UniformType::Single:
@@ -89,7 +89,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
case Tegra::Shader::UniformType::Double: {
const Node op_b =
- GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, index);
+ GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index);
SetTemporal(bb, 0, op_a);
SetTemporal(bb, 1, op_b);
@@ -104,19 +104,42 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
}
case OpCode::Id::LD_L: {
UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}",
- static_cast<unsigned>(instr.ld_l.unknown.Value()));
-
- const Node index = Operation(OperationCode::IAdd, GetRegister(instr.gpr8),
- Immediate(static_cast<s32>(instr.smem_imm)));
- const Node lmem = GetLocalMemory(index);
+ static_cast<u32>(instr.ld_l.unknown.Value()));
+
+ const auto GetLmem = [&](s32 offset) {
+ ASSERT(offset % 4 == 0);
+ const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
+ const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8),
+ immediate_offset);
+ return GetLocalMemory(address);
+ };
switch (instr.ldst_sl.type.Value()) {
- case Tegra::Shader::StoreType::Bytes32:
- SetRegister(bb, instr.gpr0, lmem);
+ case Tegra::Shader::StoreType::Bits32:
+ case Tegra::Shader::StoreType::Bits64:
+ case Tegra::Shader::StoreType::Bits128: {
+ const u32 count = [&]() {
+ switch (instr.ldst_sl.type.Value()) {
+ case Tegra::Shader::StoreType::Bits32:
+ return 1;
+ case Tegra::Shader::StoreType::Bits64:
+ return 2;
+ case Tegra::Shader::StoreType::Bits128:
+ return 4;
+ default:
+ UNREACHABLE();
+ return 0;
+ }
+ }();
+ for (u32 i = 0; i < count; ++i)
+ SetTemporal(bb, i, GetLmem(i * 4));
+ for (u32 i = 0; i < count; ++i)
+ SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
break;
+ }
default:
UNIMPLEMENTED_MSG("LD_L Unhandled type: {}",
- static_cast<unsigned>(instr.ldst_sl.type.Value()));
+ static_cast<u32>(instr.ldst_sl.type.Value()));
}
break;
}
@@ -142,7 +165,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
ASSERT(cbuf != nullptr);
const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
ASSERT(cbuf_offset_imm != nullptr);
- const auto cbuf_offset = cbuf_offset_imm->GetValue() * 4;
+ const auto cbuf_offset = cbuf_offset_imm->GetValue();
bb.push_back(Comment(
fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
@@ -202,12 +225,20 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}",
static_cast<u32>(instr.st_l.unknown.Value()));
- const Node index = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8),
- Immediate(static_cast<s32>(instr.smem_imm)));
+ const auto GetLmemAddr = [&](s32 offset) {
+ ASSERT(offset % 4 == 0);
+ const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset);
+ return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
+ };
switch (instr.ldst_sl.type.Value()) {
- case Tegra::Shader::StoreType::Bytes32:
- SetLocalMemory(bb, index, GetRegister(instr.gpr0));
+ case Tegra::Shader::StoreType::Bits128:
+ SetLocalMemory(bb, GetLmemAddr(12), GetRegister(instr.gpr0.Value() + 3));
+ SetLocalMemory(bb, GetLmemAddr(8), GetRegister(instr.gpr0.Value() + 2));
+ case Tegra::Shader::StoreType::Bits64:
+ SetLocalMemory(bb, GetLmemAddr(4), GetRegister(instr.gpr0.Value() + 1));
+ case Tegra::Shader::StoreType::Bits32:
+ SetLocalMemory(bb, GetLmemAddr(0), GetRegister(instr.gpr0));
break;
default:
UNIMPLEMENTED_MSG("ST_L Unhandled type: {}",
@@ -324,15 +355,18 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
const auto& sampler =
GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
+ u32 indexer = 0;
switch (instr.txq.query_type) {
case Tegra::Shader::TextureQueryType::Dimension: {
for (u32 element = 0; element < 4; ++element) {
- MetaTexture meta{sampler, element};
- const Node value = Operation(OperationCode::F4TextureQueryDimensions,
- std::move(meta), GetRegister(instr.gpr8));
- SetTemporal(bb, element, value);
+ if (instr.txq.IsComponentEnabled(element)) {
+ MetaTexture meta{sampler, element};
+ const Node value = Operation(OperationCode::F4TextureQueryDimensions,
+ std::move(meta), GetRegister(instr.gpr8));
+ SetTemporal(bb, indexer++, value);
+ }
}
- for (u32 i = 0; i < 4; ++i) {
+ for (u32 i = 0; i < indexer; ++i) {
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
}
break;
@@ -734,4 +768,4 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
return {coord_count, total_coord_count};
}
-} // namespace VideoCommon::Shader \ No newline at end of file
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index 85026bb37..6623f8ff9 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -23,7 +23,7 @@ u32 ShaderIR::DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc) {
} else if (instr.is_b_gpr) {
return GetRegister(instr.gpr20);
} else {
- return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset);
+ return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
}
}();
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 0cd9cd1cc..9cb864500 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -32,13 +32,14 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc) {
auto [is_merge, op_b, op_c] = [&]() -> std::tuple<bool, Node, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::XMAD_CR:
- return {instr.xmad.merge_56, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
+ return {instr.xmad.merge_56,
+ GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
GetRegister(instr.gpr39)};
case OpCode::Id::XMAD_RR:
return {instr.xmad.merge_37, GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
case OpCode::Id::XMAD_RC:
return {false, GetRegister(instr.gpr39),
- GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
+ GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
case OpCode::Id::XMAD_IMM:
return {instr.xmad.merge_37, Immediate(static_cast<u32>(instr.xmad.imm20_16)),
GetRegister(instr.gpr39)};
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index c4ecb2e3c..6e42e3dfb 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -249,7 +249,7 @@ public:
}
u32 GetSize() const {
- return max_offset + 1;
+ return max_offset + sizeof(float);
}
private:
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index edd3816ba..b783e4b27 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -109,8 +109,7 @@ enum class SurfaceType {
ColorTexture = 0,
Depth = 1,
DepthStencil = 2,
- Fill = 3,
- Invalid = 4,
+ Invalid = 3,
};
enum class SurfaceTarget {
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index e7c78bee2..bdb40dacf 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -182,7 +182,7 @@ struct TICEntry {
};
union {
BitField<0, 16, u32> height_minus_1;
- BitField<16, 15, u32> depth_minus_1;
+ BitField<16, 14, u32> depth_minus_1;
};
union {
BitField<6, 13, u32> mip_lod_bias;