diff options
Diffstat (limited to 'src/video_core/engines')
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 63 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 76 | ||||
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 7 |
3 files changed, 127 insertions, 19 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 7357d20d1..6de07ea56 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -37,21 +37,52 @@ void Maxwell3D::InitializeRegisterDefaults() { regs.viewport[viewport].depth_range_near = 0.0f; regs.viewport[viewport].depth_range_far = 1.0f; } + // Doom and Bomberman seems to use the uninitialized registers and just enable blend + // so initialize blend registers with sane values + regs.blend.equation_rgb = Regs::Blend::Equation::Add; + regs.blend.factor_source_rgb = Regs::Blend::Factor::One; + regs.blend.factor_dest_rgb = Regs::Blend::Factor::Zero; + regs.blend.equation_a = Regs::Blend::Equation::Add; + regs.blend.factor_source_a = Regs::Blend::Factor::One; + regs.blend.factor_dest_a = Regs::Blend::Factor::Zero; + for (std::size_t blend_index = 0; blend_index < Regs::NumRenderTargets; blend_index++) { + regs.independent_blend[blend_index].equation_rgb = Regs::Blend::Equation::Add; + regs.independent_blend[blend_index].factor_source_rgb = Regs::Blend::Factor::One; + regs.independent_blend[blend_index].factor_dest_rgb = Regs::Blend::Factor::Zero; + regs.independent_blend[blend_index].equation_a = Regs::Blend::Equation::Add; + regs.independent_blend[blend_index].factor_source_a = Regs::Blend::Factor::One; + regs.independent_blend[blend_index].factor_dest_a = Regs::Blend::Factor::Zero; + } + regs.stencil_front_op_fail = Regs::StencilOp::Keep; + regs.stencil_front_op_zfail = Regs::StencilOp::Keep; + regs.stencil_front_op_zpass = Regs::StencilOp::Keep; + regs.stencil_front_func_func = Regs::ComparisonOp::Always; + regs.stencil_front_func_mask = 0xFFFFFFFF; + regs.stencil_front_mask = 0xFFFFFFFF; + regs.stencil_two_side_enable = 1; + regs.stencil_back_op_fail = Regs::StencilOp::Keep; + regs.stencil_back_op_zfail = Regs::StencilOp::Keep; + regs.stencil_back_op_zpass = Regs::StencilOp::Keep; + regs.stencil_back_func_func = Regs::ComparisonOp::Always; + regs.stencil_back_func_mask = 0xFFFFFFFF; + regs.stencil_back_mask = 0xFFFFFFFF; } void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { // Reset the current macro. executing_macro = 0; - // The requested macro must have been uploaded already. - auto macro_code = uploaded_macros.find(method); - if (macro_code == uploaded_macros.end()) { - LOG_ERROR(HW_GPU, "Macro {:04X} was not uploaded", method); + // Lookup the macro offset + const u32 entry{(method - MacroRegistersStart) >> 1}; + const auto& search{macro_offsets.find(entry)}; + if (search == macro_offsets.end()) { + LOG_CRITICAL(HW_GPU, "macro not found for method 0x{:X}!", method); + UNREACHABLE(); return; } // Execute the current macro. - macro_interpreter.Execute(macro_code->second, std::move(parameters)); + macro_interpreter.Execute(search->second, std::move(parameters)); } void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { @@ -90,13 +121,23 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr); } - regs.reg_array[method] = value; + if (regs.reg_array[method] != value) { + regs.reg_array[method] = value; + if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) && + method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) { + dirty_flags.vertex_attrib_format = true; + } + } switch (method) { case MAXWELL3D_REG_INDEX(macros.data): { ProcessMacroUpload(value); break; } + case MAXWELL3D_REG_INDEX(macros.bind): { + ProcessMacroBind(value); + break; + } case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]): case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]): case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]): @@ -158,9 +199,13 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) { } void Maxwell3D::ProcessMacroUpload(u32 data) { - // Store the uploaded macro code to interpret them when they're called. - auto& macro = uploaded_macros[regs.macros.entry * 2 + MacroRegistersStart]; - macro.push_back(data); + ASSERT_MSG(regs.macros.upload_address < macro_memory.size(), + "upload_address exceeded macro_memory size!"); + macro_memory[regs.macros.upload_address++] = data; +} + +void Maxwell3D::ProcessMacroBind(u32 data) { + macro_offsets[regs.macros.entry] = data; } void Maxwell3D::ProcessQueryGet() { diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 443affc36..91ca57883 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -345,6 +345,14 @@ public: Invert = 6, IncrWrap = 7, DecrWrap = 8, + KeepOGL = 0x1E00, + ZeroOGL = 0, + ReplaceOGL = 0x1E01, + IncrOGL = 0x1E02, + DecrOGL = 0x1E03, + InvertOGL = 0x150A, + IncrWrapOGL = 0x8507, + DecrWrapOGL = 0x8508, }; enum class MemoryLayout : u32 { @@ -462,6 +470,16 @@ public: } }; + struct ColorMask { + union { + u32 raw; + BitField<0, 4, u32> R; + BitField<4, 4, u32> G; + BitField<8, 4, u32> B; + BitField<12, 4, u32> A; + }; + }; + bool IsShaderConfigEnabled(std::size_t index) const { // The VertexB is always enabled. if (index == static_cast<std::size_t>(Regs::ShaderProgram::VertexB)) { @@ -475,12 +493,13 @@ public: INSERT_PADDING_WORDS(0x45); struct { - INSERT_PADDING_WORDS(1); + u32 upload_address; u32 data; u32 entry; + u32 bind; } macros; - INSERT_PADDING_WORDS(0x189); + INSERT_PADDING_WORDS(0x188); u32 tfb_enabled; @@ -570,7 +589,11 @@ public: u32 stencil_back_mask; u32 stencil_back_func_mask; - INSERT_PADDING_WORDS(0x13); + INSERT_PADDING_WORDS(0xC); + + u32 color_mask_common; + + INSERT_PADDING_WORDS(0x6); u32 rt_separate_frag_data; @@ -645,8 +668,14 @@ public: ComparisonOp depth_test_func; float alpha_test_ref; ComparisonOp alpha_test_func; - - INSERT_PADDING_WORDS(0x9); + u32 draw_tfb_stride; + struct { + float r; + float g; + float b; + float a; + } blend_color; + INSERT_PADDING_WORDS(0x4); struct { u32 separate_alpha; @@ -840,8 +869,9 @@ public: BitField<6, 4, u32> RT; BitField<10, 11, u32> layer; } clear_buffers; - - INSERT_PADDING_WORDS(0x4B); + INSERT_PADDING_WORDS(0xB); + std::array<ColorMask, NumRenderTargets> color_mask; + INSERT_PADDING_WORDS(0x38); struct { u32 query_address_high; @@ -982,6 +1012,12 @@ public: State state{}; MemoryManager& memory_manager; + struct DirtyFlags { + bool vertex_attrib_format = true; + }; + + DirtyFlags dirty_flags; + /// Reads a register value located at the input method address u32 GetRegisterValue(u32 method) const; @@ -994,12 +1030,25 @@ public: /// Returns the texture information for a specific texture in a specific shader stage. Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; + /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than + /// we've seen used. + using MacroMemory = std::array<u32, 0x40000>; + + /// Gets a reference to macro memory. + const MacroMemory& GetMacroMemory() const { + return macro_memory; + } + private: void InitializeRegisterDefaults(); VideoCore::RasterizerInterface& rasterizer; - std::unordered_map<u32, std::vector<u32>> uploaded_macros; + /// Start offsets of each macro in macro_memory + std::unordered_map<u32, u32> macro_offsets; + + /// Memory for macro code + MacroMemory macro_memory; /// Macro method that is currently being executed / being fed parameters. u32 executing_macro = 0; @@ -1022,9 +1071,12 @@ private: */ void CallMacroMethod(u32 method, std::vector<u32> parameters); - /// Handles writes to the macro uploading registers. + /// Handles writes to the macro uploading register. void ProcessMacroUpload(u32 data); + /// Handles writes to the macro bind register. + void ProcessMacroBind(u32 data); + /// Handles a write to the CLEAR_BUFFERS register. void ProcessClearBuffers(); @@ -1058,6 +1110,7 @@ ASSERT_REG_POSITION(scissor_test, 0x380); ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); +ASSERT_REG_POSITION(color_mask_common, 0x3E4); ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); ASSERT_REG_POSITION(zeta, 0x3F8); ASSERT_REG_POSITION(vertex_attrib_format, 0x458); @@ -1070,6 +1123,10 @@ ASSERT_REG_POSITION(depth_write_enabled, 0x4BA); ASSERT_REG_POSITION(alpha_test_enabled, 0x4BB); ASSERT_REG_POSITION(d3d_cull_mode, 0x4C2); ASSERT_REG_POSITION(depth_test_func, 0x4C3); +ASSERT_REG_POSITION(alpha_test_ref, 0x4C4); +ASSERT_REG_POSITION(alpha_test_func, 0x4C5); +ASSERT_REG_POSITION(draw_tfb_stride, 0x4C6); +ASSERT_REG_POSITION(blend_color, 0x4C7); ASSERT_REG_POSITION(blend, 0x4CF); ASSERT_REG_POSITION(stencil_enable, 0x4E0); ASSERT_REG_POSITION(stencil_front_op_fail, 0x4E1); @@ -1100,6 +1157,7 @@ ASSERT_REG_POSITION(instanced_arrays, 0x620); ASSERT_REG_POSITION(cull, 0x646); ASSERT_REG_POSITION(logic_op, 0x671); ASSERT_REG_POSITION(clear_buffers, 0x674); +ASSERT_REG_POSITION(color_mask, 0x680); ASSERT_REG_POSITION(query, 0x6C0); ASSERT_REG_POSITION(vertex_array[0], 0x700); ASSERT_REG_POSITION(independent_blend, 0x780); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index b84da512f..83a6fd875 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -578,6 +578,10 @@ union Instruction { } fmul32; union { + BitField<52, 1, u64> generates_cc; + } op_32; + + union { BitField<48, 1, u64> is_signed; } shift; @@ -1231,6 +1235,7 @@ union Instruction { BitField<60, 1, u64> is_b_gpr; BitField<59, 1, u64> is_c_gpr; BitField<20, 24, s64> smem_imm; + BitField<0, 5, ControlCode> flow_control_code; Attribute attribute; Sampler sampler; @@ -1658,4 +1663,4 @@ private: } }; -} // namespace Tegra::Shader
\ No newline at end of file +} // namespace Tegra::Shader |