diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/command_processor.cpp | 212 | ||||
-rw-r--r-- | src/video_core/regs_framebuffer.h | 7 | ||||
-rw-r--r-- | src/video_core/regs_pipeline.h | 8 | ||||
-rw-r--r-- | src/video_core/regs_rasterizer.h | 8 | ||||
-rw-r--r-- | src/video_core/regs_texturing.h | 19 | ||||
-rw-r--r-- | src/video_core/shader/shader.h | 7 | ||||
-rw-r--r-- | src/video_core/shader/shader_interpreter.cpp | 2 | ||||
-rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 2 | ||||
-rw-r--r-- | src/video_core/shader/shader_jit_x64_compiler.cpp | 4 | ||||
-rw-r--r-- | src/video_core/shader/shader_jit_x64_compiler.h | 14 | ||||
-rw-r--r-- | src/video_core/swrasterizer/clipper.cpp | 7 | ||||
-rw-r--r-- | src/video_core/swrasterizer/rasterizer.h | 6 |
12 files changed, 204 insertions, 92 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 49a93e980..4633a1df1 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -32,12 +32,13 @@ namespace Pica { namespace CommandProcessor { -static int float_regs_counter = 0; +static int vs_float_regs_counter = 0; +static u32 vs_uniform_write_buffer[4]; -static u32 uniform_write_buffer[4]; +static int gs_float_regs_counter = 0; +static u32 gs_uniform_write_buffer[4]; static int default_attr_counter = 0; - static u32 default_attr_write_buffer[3]; // Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF @@ -48,6 +49,97 @@ static const u32 expand_bits_to_bytes[] = { MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240)); +static const char* GetShaderSetupTypeName(Shader::ShaderSetup& setup) { + if (&setup == &g_state.vs) { + return "vertex shader"; + } + if (&setup == &g_state.gs) { + return "geometry shader"; + } + return "unknown shader"; +} + +static void WriteUniformBoolReg(Shader::ShaderSetup& setup, u32 value) { + for (unsigned i = 0; i < setup.uniforms.b.size(); ++i) + setup.uniforms.b[i] = (value & (1 << i)) != 0; +} + +static void WriteUniformIntReg(Shader::ShaderSetup& setup, unsigned index, + const Math::Vec4<u8>& values) { + ASSERT(index < setup.uniforms.i.size()); + setup.uniforms.i[index] = values; + LOG_TRACE(HW_GPU, "Set %s integer uniform %d to %02x %02x %02x %02x", + GetShaderSetupTypeName(setup), index, values.x, values.y, values.z, values.w); +} + +static void WriteUniformFloatReg(ShaderRegs& config, Shader::ShaderSetup& setup, + int& float_regs_counter, u32 uniform_write_buffer[4], u32 value) { + auto& uniform_setup = config.uniform_setup; + + // TODO: Does actual hardware indeed keep an intermediate buffer or does + // it directly write the values? + uniform_write_buffer[float_regs_counter++] = value; + + // Uniforms are written in a packed format such that four float24 values are encoded in + // three 32-bit numbers. We write to internal memory once a full such vector is + // written. + if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) || + (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) { + float_regs_counter = 0; + + auto& uniform = setup.uniforms.f[uniform_setup.index]; + + if (uniform_setup.index >= 96) { + LOG_ERROR(HW_GPU, "Invalid %s float uniform index %d", GetShaderSetupTypeName(setup), + (int)uniform_setup.index); + } else { + + // NOTE: The destination component order indeed is "backwards" + if (uniform_setup.IsFloat32()) { + for (auto i : {0, 1, 2, 3}) + uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i])); + } else { + // TODO: Untested + uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8); + uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | + ((uniform_write_buffer[1] >> 16) & 0xFFFF)); + uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | + ((uniform_write_buffer[2] >> 24) & 0xFF)); + uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF); + } + + LOG_TRACE(HW_GPU, "Set %s float uniform %x to (%f %f %f %f)", + GetShaderSetupTypeName(setup), (int)uniform_setup.index, + uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(), + uniform.w.ToFloat32()); + + // TODO: Verify that this actually modifies the register! + uniform_setup.index.Assign(uniform_setup.index + 1); + } + } +} + +static void WriteProgramCode(ShaderRegs& config, Shader::ShaderSetup& setup, + unsigned max_program_code_length, u32 value) { + if (config.program.offset >= max_program_code_length) { + LOG_ERROR(HW_GPU, "Invalid %s program offset %d", GetShaderSetupTypeName(setup), + (int)config.program.offset); + } else { + setup.program_code[config.program.offset] = value; + config.program.offset++; + } +} + +static void WriteSwizzlePatterns(ShaderRegs& config, Shader::ShaderSetup& setup, u32 value) { + if (config.swizzle_patterns.offset >= setup.swizzle_data.size()) { + LOG_ERROR(HW_GPU, "Invalid %s swizzle pattern offset %d", GetShaderSetupTypeName(setup), + (int)config.swizzle_patterns.offset); + } else { + setup.swizzle_data[config.swizzle_patterns.offset] = value; + config.swizzle_patterns.offset++; + } +} + static void WritePicaReg(u32 id, u32 value, u32 mask) { auto& regs = g_state.regs; @@ -330,21 +422,70 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { break; } - case PICA_REG_INDEX(vs.bool_uniforms): - for (unsigned i = 0; i < 16; ++i) - g_state.vs.uniforms.b[i] = (regs.vs.bool_uniforms.Value() & (1 << i)) != 0; + case PICA_REG_INDEX(gs.bool_uniforms): + WriteUniformBoolReg(g_state.gs, g_state.regs.gs.bool_uniforms.Value()); + break; + case PICA_REG_INDEX_WORKAROUND(gs.int_uniforms[0], 0x281): + case PICA_REG_INDEX_WORKAROUND(gs.int_uniforms[1], 0x282): + case PICA_REG_INDEX_WORKAROUND(gs.int_uniforms[2], 0x283): + case PICA_REG_INDEX_WORKAROUND(gs.int_uniforms[3], 0x284): { + unsigned index = (id - PICA_REG_INDEX_WORKAROUND(gs.int_uniforms[0], 0x281)); + auto values = regs.gs.int_uniforms[index]; + WriteUniformIntReg(g_state.gs, index, + Math::Vec4<u8>(values.x, values.y, values.z, values.w)); + break; + } + + case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[0], 0x291): + case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[1], 0x292): + case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[2], 0x293): + case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[3], 0x294): + case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[4], 0x295): + case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[5], 0x296): + case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[6], 0x297): + case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[7], 0x298): { + WriteUniformFloatReg(g_state.regs.gs, g_state.gs, gs_float_regs_counter, + gs_uniform_write_buffer, value); + break; + } + + case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[0], 0x29c): + case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[1], 0x29d): + case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[2], 0x29e): + case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[3], 0x29f): + case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[4], 0x2a0): + case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[5], 0x2a1): + case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[6], 0x2a2): + case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[7], 0x2a3): { + WriteProgramCode(g_state.regs.gs, g_state.gs, 4096, value); + break; + } + + case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[0], 0x2a6): + case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[1], 0x2a7): + case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[2], 0x2a8): + case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[3], 0x2a9): + case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[4], 0x2aa): + case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[5], 0x2ab): + case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[6], 0x2ac): + case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[7], 0x2ad): { + WriteSwizzlePatterns(g_state.regs.gs, g_state.gs, value); + break; + } + + case PICA_REG_INDEX(vs.bool_uniforms): + WriteUniformBoolReg(g_state.vs, g_state.regs.vs.bool_uniforms.Value()); break; case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1): case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[1], 0x2b2): case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3): case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4): { - int index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1)); + unsigned index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1)); auto values = regs.vs.int_uniforms[index]; - g_state.vs.uniforms.i[index] = Math::Vec4<u8>(values.x, values.y, values.z, values.w); - LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x", index, values.x.Value(), - values.y.Value(), values.z.Value(), values.w.Value()); + WriteUniformIntReg(g_state.vs, index, + Math::Vec4<u8>(values.x, values.y, values.z, values.w)); break; } @@ -356,51 +497,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[5], 0x2c6): case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7): case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8): { - auto& uniform_setup = regs.vs.uniform_setup; - - // TODO: Does actual hardware indeed keep an intermediate buffer or does - // it directly write the values? - uniform_write_buffer[float_regs_counter++] = value; - - // Uniforms are written in a packed format such that four float24 values are encoded in - // three 32-bit numbers. We write to internal memory once a full such vector is - // written. - if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) || - (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) { - float_regs_counter = 0; - - auto& uniform = g_state.vs.uniforms.f[uniform_setup.index]; - - if (uniform_setup.index > 95) { - LOG_ERROR(HW_GPU, "Invalid VS uniform index %d", (int)uniform_setup.index); - break; - } - - // NOTE: The destination component order indeed is "backwards" - if (uniform_setup.IsFloat32()) { - for (auto i : {0, 1, 2, 3}) - uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i])); - } else { - // TODO: Untested - uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8); - uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | - ((uniform_write_buffer[1] >> 16) & 0xFFFF)); - uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | - ((uniform_write_buffer[2] >> 24) & 0xFF)); - uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF); - } - - LOG_TRACE(HW_GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index, - uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(), - uniform.w.ToFloat32()); - - // TODO: Verify that this actually modifies the register! - uniform_setup.index.Assign(uniform_setup.index + 1); - } + WriteUniformFloatReg(g_state.regs.vs, g_state.vs, vs_float_regs_counter, + vs_uniform_write_buffer, value); break; } - // Load shader program code case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[0], 0x2cc): case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[1], 0x2cd): case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[2], 0x2ce): @@ -409,12 +510,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[5], 0x2d1): case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2): case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3): { - g_state.vs.program_code[regs.vs.program.offset] = value; - regs.vs.program.offset++; + WriteProgramCode(g_state.regs.vs, g_state.vs, 512, value); break; } - // Load swizzle pattern data case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[0], 0x2d6): case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[1], 0x2d7): case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[2], 0x2d8): @@ -423,8 +522,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[5], 0x2db): case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc): case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd): { - g_state.vs.swizzle_data[regs.vs.swizzle_patterns.offset] = value; - regs.vs.swizzle_patterns.offset++; + WriteSwizzlePatterns(g_state.regs.vs, g_state.vs, value); break; } diff --git a/src/video_core/regs_framebuffer.h b/src/video_core/regs_framebuffer.h index 9ddc79243..a50bd4111 100644 --- a/src/video_core/regs_framebuffer.h +++ b/src/video_core/regs_framebuffer.h @@ -211,13 +211,14 @@ struct FramebufferRegs { BitField<0, 2, u32> allow_depth_stencil_write; // 0 = disable, else enable }; - DepthFormat depth_format; // TODO: Should be a BitField! + BitField<0, 2, DepthFormat> depth_format; + BitField<16, 3, ColorFormat> color_format; INSERT_PADDING_WORDS(0x4); - u32 depth_buffer_address; - u32 color_buffer_address; + BitField<0, 28, u32> depth_buffer_address; + BitField<0, 28, u32> color_buffer_address; union { // Apparently, the framebuffer width is stored as expected, diff --git a/src/video_core/regs_pipeline.h b/src/video_core/regs_pipeline.h index 0a4ec6e1e..31c747d77 100644 --- a/src/video_core/regs_pipeline.h +++ b/src/video_core/regs_pipeline.h @@ -22,10 +22,10 @@ struct PipelineRegs { }; struct { - BitField<0, 29, u32> base_address; + BitField<1, 28, u32> base_address; PAddr GetPhysicalBaseAddress() const { - return base_address * 8; + return base_address * 16; } // Descriptor for internal vertex attributes @@ -99,7 +99,7 @@ struct PipelineRegs { // This e.g. allows to load different attributes from different memory locations struct { // Source attribute data offset from the base address - u32 data_offset; + BitField<0, 28, u32> data_offset; union { BitField<0, 4, u32> comp0; @@ -180,6 +180,8 @@ struct PipelineRegs { // kicked off. // 2) Games can configure these registers to provide a command list subroutine mechanism. + // TODO: verify the bit length of these two fields + // According to 3dbrew, the bit length of them are 21 and 29, respectively BitField<0, 20, u32> size[2]; ///< Size (in bytes / 8) of each channel's command buffer BitField<0, 28, u32> addr[2]; ///< Physical address / 8 of each channel's command buffer u32 trigger[2]; ///< Triggers execution of the channel's command buffer when written to diff --git a/src/video_core/regs_rasterizer.h b/src/video_core/regs_rasterizer.h index a471a3b38..2874fd127 100644 --- a/src/video_core/regs_rasterizer.h +++ b/src/video_core/regs_rasterizer.h @@ -92,13 +92,13 @@ struct RasterizerRegs { BitField<0, 2, ScissorMode> mode; union { - BitField<0, 16, u32> x1; - BitField<16, 16, u32> y1; + BitField<0, 10, u32> x1; + BitField<16, 10, u32> y1; }; union { - BitField<0, 16, u32> x2; - BitField<16, 16, u32> y2; + BitField<0, 10, u32> x2; + BitField<16, 10, u32> y2; }; } scissor_test; diff --git a/src/video_core/regs_texturing.h b/src/video_core/regs_texturing.h index 20f9495ed..e4038b41b 100644 --- a/src/video_core/regs_texturing.h +++ b/src/video_core/regs_texturing.h @@ -29,6 +29,11 @@ struct TexturingRegs { ClampToBorder = 1, Repeat = 2, MirroredRepeat = 3, + // Mode 4-7 produces some weird result and may be just invalid: + // 4: Positive coord: clamp to edge; negative coord: repeat + // 5: Positive coord: clamp to border; negative coord: repeat + // 6: Repeat + // 7: Repeat }; enum TextureFilter : u32 { @@ -45,22 +50,22 @@ struct TexturingRegs { } border_color; union { - BitField<0, 16, u32> height; - BitField<16, 16, u32> width; + BitField<0, 11, u32> height; + BitField<16, 11, u32> width; }; union { BitField<1, 1, TextureFilter> mag_filter; BitField<2, 1, TextureFilter> min_filter; - BitField<8, 2, WrapMode> wrap_t; - BitField<12, 2, WrapMode> wrap_s; - BitField<28, 2, TextureType> - type; ///< @note Only valid for texture 0 according to 3DBrew. + BitField<8, 3, WrapMode> wrap_t; + BitField<12, 3, WrapMode> wrap_s; + /// @note Only valid for texture 0 according to 3DBrew. + BitField<28, 3, TextureType> type; }; INSERT_PADDING_WORDS(0x1); - u32 address; + BitField<0, 28, u32> address; PAddr GetPhysicalAddress() const { return address * 8; diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 38ea717ab..e156f6aef 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -24,6 +24,9 @@ namespace Pica { namespace Shader { +constexpr unsigned MAX_PROGRAM_CODE_LENGTH = 4096; +constexpr unsigned MAX_SWIZZLE_DATA_LENGTH = 4096; + struct AttributeBuffer { alignas(16) Math::Vec4<float24> attr[16]; }; @@ -144,8 +147,8 @@ struct ShaderSetup { return offsetof(ShaderSetup, uniforms.i) + index * sizeof(Math::Vec4<u8>); } - std::array<u32, 1024> program_code; - std::array<u32, 1024> swizzle_data; + std::array<u32, MAX_PROGRAM_CODE_LENGTH> program_code; + std::array<u32, MAX_SWIZZLE_DATA_LENGTH> swizzle_data; /// Data private to ShaderEngines struct EngineData { diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index f4d1c46c5..aa1cec81f 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -653,7 +653,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData } void InterpreterEngine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) { - ASSERT(entry_point < 1024); + ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH); setup.engine_data.entry_point = entry_point; } diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 0ee0dd9ef..73c21871c 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -15,7 +15,7 @@ JitX64Engine::JitX64Engine() = default; JitX64Engine::~JitX64Engine() = default; void JitX64Engine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) { - ASSERT(entry_point < 1024); + ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH); setup.engine_data.entry_point = entry_point; u64 code_hash = Common::ComputeHash64(&setup.program_code, sizeof(setup.program_code)); diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp index 2dbc8b147..5d9b6448c 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.cpp +++ b/src/video_core/shader/shader_jit_x64_compiler.cpp @@ -834,8 +834,8 @@ void JitShader::FindReturnOffsets() { std::sort(return_offsets.begin(), return_offsets.end()); } -void JitShader::Compile(const std::array<u32, 1024>* program_code_, - const std::array<u32, 1024>* swizzle_data_) { +void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code_, + const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>* swizzle_data_) { program_code = program_code_; swizzle_data = swizzle_data_; diff --git a/src/video_core/shader/shader_jit_x64_compiler.h b/src/video_core/shader/shader_jit_x64_compiler.h index f27675560..31af0ca48 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.h +++ b/src/video_core/shader/shader_jit_x64_compiler.h @@ -22,8 +22,8 @@ namespace Pica { namespace Shader { -/// Memory allocated for each compiled shader (64Kb) -constexpr size_t MAX_SHADER_SIZE = 1024 * 64; +/// Memory allocated for each compiled shader +constexpr size_t MAX_SHADER_SIZE = MAX_PROGRAM_CODE_LENGTH * 64; /** * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64 @@ -37,8 +37,8 @@ public: program(&setup, &state, instruction_labels[offset].getAddress()); } - void Compile(const std::array<u32, 1024>* program_code, - const std::array<u32, 1024>* swizzle_data); + void Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code, + const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>* swizzle_data); void Compile_ADD(Instruction instr); void Compile_DP3(Instruction instr); @@ -104,11 +104,11 @@ private: */ void FindReturnOffsets(); - const std::array<u32, 1024>* program_code = nullptr; - const std::array<u32, 1024>* swizzle_data = nullptr; + const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code = nullptr; + const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>* swizzle_data = nullptr; /// Mapping of Pica VS instructions to pointers in the emitted code - std::array<Xbyak::Label, 1024> instruction_labels; + std::array<Xbyak::Label, MAX_PROGRAM_CODE_LENGTH> instruction_labels; /// Offsets in code where a return needs to be inserted std::vector<unsigned> return_offsets; diff --git a/src/video_core/swrasterizer/clipper.cpp b/src/video_core/swrasterizer/clipper.cpp index 2d80822d9..6fb923756 100644 --- a/src/video_core/swrasterizer/clipper.cpp +++ b/src/video_core/swrasterizer/clipper.cpp @@ -69,13 +69,14 @@ static void InitScreenCoordinates(Vertex& vtx) { viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.rasterizer.viewport_corner.y)); float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; - vtx.color *= inv_w; - vtx.view *= inv_w; + vtx.pos.w = inv_w; vtx.quat *= inv_w; + vtx.color *= inv_w; vtx.tc0 *= inv_w; vtx.tc1 *= inv_w; + vtx.tc0_w *= inv_w; + vtx.view *= inv_w; vtx.tc2 *= inv_w; - vtx.pos.w = inv_w; vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; diff --git a/src/video_core/swrasterizer/rasterizer.h b/src/video_core/swrasterizer/rasterizer.h index 3a72ac343..2f0877581 100644 --- a/src/video_core/swrasterizer/rasterizer.h +++ b/src/video_core/swrasterizer/rasterizer.h @@ -23,13 +23,15 @@ struct Vertex : Shader::OutputVertex { pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor); // TODO: Should perform perspective correct interpolation here... + quat = quat * factor + vtx.quat * (float24::FromFloat32(1) - factor); + color = color * factor + vtx.color * (float24::FromFloat32(1) - factor); tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor); tc1 = tc1 * factor + vtx.tc1 * (float24::FromFloat32(1) - factor); + tc0_w = tc0_w * factor + vtx.tc0_w * (float24::FromFloat32(1) - factor); + view = view * factor + vtx.view * (float24::FromFloat32(1) - factor); tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor); screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor); - - color = color * factor + vtx.color * (float24::FromFloat32(1) - factor); } // Linear interpolation |