diff options
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 22 | ||||
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 13 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 10 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 6 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 9 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 30 | ||||
-rw-r--r-- | src/video_core/textures/decoders.cpp | 2 |
7 files changed, 87 insertions, 5 deletions
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 988a6433e..cc1f90de6 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -478,7 +478,9 @@ public: u32 depth_write_enabled; - INSERT_PADDING_WORDS(0x8); + INSERT_PADDING_WORDS(0x7); + + u32 d3d_cull_mode; BitField<0, 3, ComparisonOp> depth_test_func; @@ -498,7 +500,13 @@ public: u32 enable[NumRenderTargets]; } blend; - INSERT_PADDING_WORDS(0x2D); + INSERT_PADDING_WORDS(0xB); + + union { + BitField<4, 1, u32> triangle_rast_flip; + } screen_y_control; + + INSERT_PADDING_WORDS(0x21); u32 vb_element_base; @@ -528,7 +536,12 @@ public: } } tic; - INSERT_PADDING_WORDS(0x22); + INSERT_PADDING_WORDS(0x21); + + union { + BitField<2, 1, u32> coord_origin; + BitField<3, 10, u32> enable; + } point_coord_replace; struct { u32 code_address_high; @@ -818,11 +831,14 @@ ASSERT_REG_POSITION(rt_control, 0x487); ASSERT_REG_POSITION(depth_test_enable, 0x4B3); ASSERT_REG_POSITION(independent_blend_enable, 0x4B9); ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); +ASSERT_REG_POSITION(d3d_cull_mode, 0x4C2); ASSERT_REG_POSITION(depth_test_func, 0x4C3); ASSERT_REG_POSITION(blend, 0x4CF); +ASSERT_REG_POSITION(screen_y_control, 0x4EB); ASSERT_REG_POSITION(vb_element_base, 0x50D); ASSERT_REG_POSITION(tsc, 0x557); ASSERT_REG_POSITION(tic, 0x55D); +ASSERT_REG_POSITION(point_coord_replace, 0x581); ASSERT_REG_POSITION(code_address, 0x582); ASSERT_REG_POSITION(draw, 0x585); ASSERT_REG_POSITION(index_array, 0x5F2); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index f277eaffa..da1aaeeee 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -329,6 +329,19 @@ union Instruction { } isetp; union { + BitField<0, 3, u64> pred0; + BitField<3, 3, u64> pred3; + BitField<12, 3, u64> pred12; + BitField<15, 1, u64> neg_pred12; + BitField<24, 2, PredOperation> cond; + BitField<29, 3, u64> pred29; + BitField<32, 1, u64> neg_pred29; + BitField<39, 3, u64> pred39; + BitField<42, 1, u64> neg_pred39; + BitField<45, 2, PredOperation> op; + } psetp; + + union { BitField<39, 3, u64> pred39; BitField<42, 1, u64> neg_pred; BitField<43, 1, u64> neg_a; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 8072c8cb2..bacb389e1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -774,6 +774,16 @@ void RasterizerOpenGL::SyncCullMode() { if (state.cull.enabled) { state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); + + // If the GPU is configured to flip the rasterized triangles, then we need to flip the + // notion of front and back. Note: We flip the triangles when the value of the register is 0 + // because OpenGL already does it for us. + if (regs.screen_y_control.triangle_rast_flip == 0) { + if (state.cull.front_face == GL_CCW) + state.cull.front_face = GL_CW; + else if (state.cull.front_face == GL_CW) + state.cull.front_face = GL_CCW; + } } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 50469c05c..57d7763ff 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -108,7 +108,8 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, false}, // Z24S8 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, - false}, // S8Z24 + false}, // S8Z24 + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F }}; static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { @@ -191,7 +192,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>, MortonCopy<true, PixelFormat::Z24S8>, - MortonCopy<true, PixelFormat::S8Z24>, + MortonCopy<true, PixelFormat::S8Z24>, MortonCopy<true, PixelFormat::Z32F>, }; static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), @@ -213,6 +214,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), MortonCopy<false, PixelFormat::ABGR8>, MortonCopy<false, PixelFormat::Z24S8>, MortonCopy<false, PixelFormat::S8Z24>, + MortonCopy<false, PixelFormat::Z32F>, }; // Allocate an uninitialized texture of appropriate size and format for the surface diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 8005a81b8..b4d7f8ebe 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -42,6 +42,7 @@ struct SurfaceParams { // DepthStencil formats Z24S8 = 13, S8Z24 = 14, + Z32F = 15, MaxDepthStencilFormat, @@ -94,6 +95,7 @@ struct SurfaceParams { 4, // ASTC_2D_4X4 1, // Z24S8 1, // S8Z24 + 1, // Z32F }}; ASSERT(static_cast<size_t>(format) < compression_factor_table.size()); @@ -120,6 +122,7 @@ struct SurfaceParams { 32, // ASTC_2D_4X4 32, // Z24S8 32, // S8Z24 + 32, // Z32F }}; ASSERT(static_cast<size_t>(format) < bpp_table.size()); @@ -135,6 +138,8 @@ struct SurfaceParams { return PixelFormat::S8Z24; case Tegra::DepthFormat::Z24_S8_UNORM: return PixelFormat::Z24S8; + case Tegra::DepthFormat::Z32_FLOAT: + return PixelFormat::Z32F; default: LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); @@ -235,6 +240,8 @@ struct SurfaceParams { return Tegra::DepthFormat::S8_Z24_UNORM; case PixelFormat::Z24S8: return Tegra::DepthFormat::Z24_S8_UNORM; + case PixelFormat::Z32F: + return Tegra::DepthFormat::Z32_FLOAT; default: UNREACHABLE(); } @@ -284,6 +291,8 @@ struct SurfaceParams { case Tegra::DepthFormat::S8_Z24_UNORM: case Tegra::DepthFormat::Z24_S8_UNORM: return ComponentType::UNorm; + case Tegra::DepthFormat::Z32_FLOAT: + return ComponentType::Float; default: LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index ec9956edb..a4b730e1c 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1477,6 +1477,36 @@ private: } break; } + case OpCode::Type::PredicateSetPredicate: { + std::string op_a = + GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); + std::string op_b = + GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); + + using Tegra::Shader::Pred; + // We can't use the constant predicate as destination. + ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); + + std::string second_pred = + GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); + + std::string combiner = GetPredicateCombiner(instr.psetp.op); + + std::string predicate = + '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')'; + + // Set the primary predicate to the result of Predicate OP SecondPredicate + SetPredicate(instr.psetp.pred3, + '(' + predicate + ") " + combiner + " (" + second_pred + ')'); + + if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { + // Set the secondary predicate to the result of !Predicate OP SecondPredicate, + // if enabled + SetPredicate(instr.psetp.pred0, + "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); + } + break; + } case OpCode::Type::FloatSet: { std::string op_a = instr.fset.neg_a ? "-" : ""; op_a += regs.GetRegisterAsFloat(instr.gpr8); diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 7b06fea3e..d5ab4e4f9 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -78,6 +78,7 @@ static u32 DepthBytesPerPixel(DepthFormat format) { switch (format) { case DepthFormat::S8_Z24_UNORM: case DepthFormat::Z24_S8_UNORM: + case DepthFormat::Z32_FLOAT: return 4; default: UNIMPLEMENTED_MSG("Format not implemented"); @@ -132,6 +133,7 @@ std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 wid switch (format) { case DepthFormat::S8_Z24_UNORM: case DepthFormat::Z24_S8_UNORM: + case DepthFormat::Z32_FLOAT: CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data, unswizzled_data.data(), true, block_height); break; |