diff options
Diffstat (limited to 'src/video_core/renderer_opengl/gl_rasterizer.cpp')
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 456 |
1 files changed, 377 insertions, 79 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 822739088..b3dc6aa19 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -19,6 +19,7 @@ #include "core/hw/gpu.h" #include "video_core/pica.h" +#include "video_core/pica_state.h" #include "video_core/utils.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_gen.h" @@ -36,7 +37,7 @@ static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) { stage.GetAlphaMultiplier() == 1); } -RasterizerOpenGL::RasterizerOpenGL() : last_fb_color_addr(0), last_fb_depth_addr(0) { } +RasterizerOpenGL::RasterizerOpenGL() : cached_fb_color_addr(0), cached_fb_depth_addr(0) { } RasterizerOpenGL::~RasterizerOpenGL() { } void RasterizerOpenGL::InitObjects() { @@ -75,6 +76,12 @@ void RasterizerOpenGL::InitObjects() { glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1); glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2); + glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat)); + glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT); + + glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view)); + glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW); + SetShader(); // Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation @@ -120,12 +127,26 @@ void RasterizerOpenGL::InitObjects() { glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fb_color_texture.texture.handle, 0); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0); + for (size_t i = 0; i < lighting_lut.size(); ++i) { + lighting_lut[i].Create(); + state.lighting_lut[i].texture_1d = lighting_lut[i].handle; + + glActiveTexture(GL_TEXTURE3 + i); + glBindTexture(GL_TEXTURE_1D, state.lighting_lut[i].texture_1d); + + glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + } + state.Apply(); + ASSERT_MSG(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE, "OpenGL rasterizer framebuffer setup failed, status %X", glCheckFramebufferStatus(GL_FRAMEBUFFER)); } void RasterizerOpenGL::Reset() { SyncCullMode(); + SyncDepthModifiers(); SyncBlendEnabled(); SyncBlendFuncs(); SyncBlendColor(); @@ -135,15 +156,37 @@ void RasterizerOpenGL::Reset() { SetShader(); - res_cache.FullFlush(); + res_cache.InvalidateAll(); +} + +/** + * This is a helper function to resolve an issue with opposite quaternions being interpolated by + * OpenGL. See below for a detailed description of this issue (yuriks): + * + * For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you + * interpolate two quaternions that are opposite, instead of going from one rotation to another + * using the shortest path, you'll go around the longest path. You can test if two quaternions are + * opposite by checking if Dot(Q1, W2) < 0. In that case, you can flip either of them, therefore + * making Dot(-Q1, W2) positive. + * + * NOTE: This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This + * should be correct for nearly all cases, however a more correct implementation (but less trivial + * and perhaps unnecessary) would be to handle this per-fragment, by interpolating the quaternions + * manually using two Lerps, and doing this correction before each Lerp. + */ +static bool AreQuaternionsOpposite(Math::Vec4<Pica::float24> qa, Math::Vec4<Pica::float24> qb) { + Math::Vec4f a{ qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32() }; + Math::Vec4f b{ qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32() }; + + return (Math::Dot(a, b) < 0.f); } void RasterizerOpenGL::AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1, const Pica::Shader::OutputVertex& v2) { - vertex_batch.emplace_back(v0); - vertex_batch.emplace_back(v1); - vertex_batch.emplace_back(v2); + vertex_batch.emplace_back(v0, false); + vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat)); + vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat)); } void RasterizerOpenGL::DrawTriangles() { @@ -155,6 +198,13 @@ void RasterizerOpenGL::DrawTriangles() { state.draw.shader_dirty = false; } + for (unsigned index = 0; index < lighting_lut.size(); index++) { + if (uniform_block_data.lut_dirty[index]) { + SyncLightingLUT(index); + uniform_block_data.lut_dirty[index] = false; + } + } + if (uniform_block_data.dirty) { glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW); uniform_block_data.dirty = false; @@ -168,19 +218,17 @@ void RasterizerOpenGL::DrawTriangles() { // Flush the resource cache at the current depth and color framebuffer addresses for render-to-texture const auto& regs = Pica::g_state.regs; - PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); - u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(regs.framebuffer.color_format) - * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); + u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) + * fb_color_texture.width * fb_color_texture.height; - PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); - u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format) - * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); + u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) + * fb_depth_texture.width * fb_depth_texture.height; - res_cache.NotifyFlush(cur_fb_color_addr, cur_fb_color_size, true); - res_cache.NotifyFlush(cur_fb_depth_addr, cur_fb_depth_size, true); + res_cache.InvalidateInRange(cached_fb_color_addr, cached_fb_color_size, true); + res_cache.InvalidateInRange(cached_fb_depth_addr, cached_fb_depth_size, true); } -void RasterizerOpenGL::CommitFramebuffer() { +void RasterizerOpenGL::FlushFramebuffer() { CommitColorBuffer(); CommitDepthBuffer(); } @@ -188,15 +236,18 @@ void RasterizerOpenGL::CommitFramebuffer() { void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { const auto& regs = Pica::g_state.regs; - if (!Settings::values.use_hw_renderer) - return; - switch(id) { // Culling case PICA_REG_INDEX(cull_mode): SyncCullMode(); break; + // Depth modifiers + case PICA_REG_INDEX(viewport_depth_range): + case PICA_REG_INDEX(viewport_depth_far_plane): + SyncDepthModifiers(); + break; + // Blending case PICA_REG_INDEX(output_merger.alphablend_enable): SyncBlendEnabled(); @@ -281,54 +332,203 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { case PICA_REG_INDEX(tev_combiner_buffer_color): SyncCombinerColor(); break; + + // Fragment lighting specular 0 color + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].specular_0, 0x140 + 0 * 0x10): + SyncLightSpecular0(0); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].specular_0, 0x140 + 1 * 0x10): + SyncLightSpecular0(1); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].specular_0, 0x140 + 2 * 0x10): + SyncLightSpecular0(2); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].specular_0, 0x140 + 3 * 0x10): + SyncLightSpecular0(3); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].specular_0, 0x140 + 4 * 0x10): + SyncLightSpecular0(4); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].specular_0, 0x140 + 5 * 0x10): + SyncLightSpecular0(5); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].specular_0, 0x140 + 6 * 0x10): + SyncLightSpecular0(6); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].specular_0, 0x140 + 7 * 0x10): + SyncLightSpecular0(7); + break; + + // Fragment lighting specular 1 color + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].specular_1, 0x141 + 0 * 0x10): + SyncLightSpecular1(0); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].specular_1, 0x141 + 1 * 0x10): + SyncLightSpecular1(1); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].specular_1, 0x141 + 2 * 0x10): + SyncLightSpecular1(2); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].specular_1, 0x141 + 3 * 0x10): + SyncLightSpecular1(3); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].specular_1, 0x141 + 4 * 0x10): + SyncLightSpecular1(4); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].specular_1, 0x141 + 5 * 0x10): + SyncLightSpecular1(5); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].specular_1, 0x141 + 6 * 0x10): + SyncLightSpecular1(6); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].specular_1, 0x141 + 7 * 0x10): + SyncLightSpecular1(7); + break; + + // Fragment lighting diffuse color + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].diffuse, 0x142 + 0 * 0x10): + SyncLightDiffuse(0); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].diffuse, 0x142 + 1 * 0x10): + SyncLightDiffuse(1); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].diffuse, 0x142 + 2 * 0x10): + SyncLightDiffuse(2); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].diffuse, 0x142 + 3 * 0x10): + SyncLightDiffuse(3); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].diffuse, 0x142 + 4 * 0x10): + SyncLightDiffuse(4); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].diffuse, 0x142 + 5 * 0x10): + SyncLightDiffuse(5); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].diffuse, 0x142 + 6 * 0x10): + SyncLightDiffuse(6); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].diffuse, 0x142 + 7 * 0x10): + SyncLightDiffuse(7); + break; + + // Fragment lighting ambient color + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].ambient, 0x143 + 0 * 0x10): + SyncLightAmbient(0); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].ambient, 0x143 + 1 * 0x10): + SyncLightAmbient(1); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].ambient, 0x143 + 2 * 0x10): + SyncLightAmbient(2); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].ambient, 0x143 + 3 * 0x10): + SyncLightAmbient(3); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].ambient, 0x143 + 4 * 0x10): + SyncLightAmbient(4); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].ambient, 0x143 + 5 * 0x10): + SyncLightAmbient(5); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].ambient, 0x143 + 6 * 0x10): + SyncLightAmbient(6); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].ambient, 0x143 + 7 * 0x10): + SyncLightAmbient(7); + break; + + // Fragment lighting position + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].x, 0x144 + 0 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].z, 0x145 + 0 * 0x10): + SyncLightPosition(0); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].x, 0x144 + 1 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].z, 0x145 + 1 * 0x10): + SyncLightPosition(1); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].x, 0x144 + 2 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].z, 0x145 + 2 * 0x10): + SyncLightPosition(2); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].x, 0x144 + 3 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].z, 0x145 + 3 * 0x10): + SyncLightPosition(3); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].x, 0x144 + 4 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].z, 0x145 + 4 * 0x10): + SyncLightPosition(4); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].x, 0x144 + 5 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].z, 0x145 + 5 * 0x10): + SyncLightPosition(5); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].x, 0x144 + 6 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].z, 0x145 + 6 * 0x10): + SyncLightPosition(6); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].x, 0x144 + 7 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].z, 0x145 + 7 * 0x10): + SyncLightPosition(7); + break; + + // Fragment lighting global ambient color (emission + ambient * ambient) + case PICA_REG_INDEX_WORKAROUND(lighting.global_ambient, 0x1c0): + SyncGlobalAmbient(); + break; + + // Fragment lighting lookup tables + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[0], 0x1c8): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[1], 0x1c9): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[2], 0x1ca): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[3], 0x1cb): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): + { + auto& lut_config = regs.lighting.lut_config; + uniform_block_data.lut_dirty[lut_config.type / 4] = true; + break; + } + } } -void RasterizerOpenGL::NotifyPreRead(PAddr addr, u32 size) { +void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { const auto& regs = Pica::g_state.regs; - if (!Settings::values.use_hw_renderer) - return; + u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) + * fb_color_texture.width * fb_color_texture.height; - PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); - u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(regs.framebuffer.color_format) - * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); - - PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); - u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format) - * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); + u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) + * fb_depth_texture.width * fb_depth_texture.height; // If source memory region overlaps 3DS framebuffers, commit them before the copy happens - if (MathUtil::IntervalsIntersect(addr, size, cur_fb_color_addr, cur_fb_color_size)) + if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size)) CommitColorBuffer(); - if (MathUtil::IntervalsIntersect(addr, size, cur_fb_depth_addr, cur_fb_depth_size)) + if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size)) CommitDepthBuffer(); } -void RasterizerOpenGL::NotifyFlush(PAddr addr, u32 size) { +void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) { const auto& regs = Pica::g_state.regs; - if (!Settings::values.use_hw_renderer) - return; - - PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); - u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(regs.framebuffer.color_format) - * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); + u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) + * fb_color_texture.width * fb_color_texture.height; - PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); - u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format) - * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); + u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) + * fb_depth_texture.width * fb_depth_texture.height; // If modified memory region overlaps 3DS framebuffers, reload their contents into OpenGL - if (MathUtil::IntervalsIntersect(addr, size, cur_fb_color_addr, cur_fb_color_size)) + if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size)) ReloadColorBuffer(); - if (MathUtil::IntervalsIntersect(addr, size, cur_fb_depth_addr, cur_fb_depth_size)) + if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size)) ReloadDepthBuffer(); // Notify cache of flush in case the region touches a cached resource - res_cache.NotifyFlush(addr, size); + res_cache.InvalidateInRange(addr, size); } void RasterizerOpenGL::SamplerInfo::Create() { @@ -499,27 +699,48 @@ void RasterizerOpenGL::SetShader() { uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]"); if (uniform_tex != -1) { glUniform1i(uniform_tex, 2); } + // Set the texture samplers to correspond to different lookup table texture units + GLuint uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[0]"); + if (uniform_lut != -1) { glUniform1i(uniform_lut, 3); } + uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[1]"); + if (uniform_lut != -1) { glUniform1i(uniform_lut, 4); } + uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[2]"); + if (uniform_lut != -1) { glUniform1i(uniform_lut, 5); } + uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[3]"); + if (uniform_lut != -1) { glUniform1i(uniform_lut, 6); } + uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[4]"); + if (uniform_lut != -1) { glUniform1i(uniform_lut, 7); } + uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[5]"); + if (uniform_lut != -1) { glUniform1i(uniform_lut, 8); } + current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get(); unsigned int block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data"); glUniformBlockBinding(current_shader->shader.handle, block_index, 0); - } - // Update uniforms - SyncAlphaTest(); - SyncCombinerColor(); - auto& tev_stages = Pica::g_state.regs.GetTevStages(); - for (int index = 0; index < tev_stages.size(); ++index) - SyncTevConstColor(index, tev_stages[index]); + // Update uniforms + SyncAlphaTest(); + SyncCombinerColor(); + auto& tev_stages = Pica::g_state.regs.GetTevStages(); + for (int index = 0; index < tev_stages.size(); ++index) + SyncTevConstColor(index, tev_stages[index]); + + SyncGlobalAmbient(); + for (int light_index = 0; light_index < 8; light_index++) { + SyncLightDiffuse(light_index); + SyncLightAmbient(light_index); + SyncLightPosition(light_index); + } + } } void RasterizerOpenGL::SyncFramebuffer() { const auto& regs = Pica::g_state.regs; - PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); + PAddr new_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); Pica::Regs::ColorFormat new_fb_color_format = regs.framebuffer.color_format; - PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); + PAddr new_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); Pica::Regs::DepthFormat new_fb_depth_format = regs.framebuffer.depth_format; bool fb_size_changed = fb_color_texture.width != static_cast<GLsizei>(regs.framebuffer.GetWidth()) || @@ -531,10 +752,10 @@ void RasterizerOpenGL::SyncFramebuffer() { bool depth_fb_prop_changed = fb_depth_texture.format != new_fb_depth_format || fb_size_changed; - bool color_fb_modified = last_fb_color_addr != cur_fb_color_addr || + bool color_fb_modified = cached_fb_color_addr != new_fb_color_addr || color_fb_prop_changed; - bool depth_fb_modified = last_fb_depth_addr != cur_fb_depth_addr || + bool depth_fb_modified = cached_fb_depth_addr != new_fb_depth_addr || depth_fb_prop_changed; // Commit if framebuffer modified in any way @@ -574,13 +795,13 @@ void RasterizerOpenGL::SyncFramebuffer() { // Load buffer data again if fb modified in any way if (color_fb_modified) { - last_fb_color_addr = cur_fb_color_addr; + cached_fb_color_addr = new_fb_color_addr; ReloadColorBuffer(); } if (depth_fb_modified) { - last_fb_depth_addr = cur_fb_depth_addr; + cached_fb_depth_addr = new_fb_depth_addr; ReloadDepthBuffer(); } @@ -596,12 +817,12 @@ void RasterizerOpenGL::SyncCullMode() { case Pica::Regs::CullMode::KeepClockWise: state.cull.enabled = true; - state.cull.mode = GL_BACK; + state.cull.front_face = GL_CW; break; case Pica::Regs::CullMode::KeepCounterClockWise: state.cull.enabled = true; - state.cull.mode = GL_FRONT; + state.cull.front_face = GL_CCW; break; default: @@ -611,6 +832,15 @@ void RasterizerOpenGL::SyncCullMode() { } } +void RasterizerOpenGL::SyncDepthModifiers() { + float depth_scale = -Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32(); + float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f; + + // TODO: Implement scale modifier + uniform_block_data.data.depth_offset = depth_offset; + uniform_block_data.dirty = true; +} + void RasterizerOpenGL::SyncBlendEnabled() { state.blend.enabled = (Pica::g_state.regs.output_merger.alphablend_enable == 1); } @@ -657,8 +887,10 @@ void RasterizerOpenGL::SyncStencilTest() { void RasterizerOpenGL::SyncDepthTest() { const auto& regs = Pica::g_state.regs; - state.depth.test_enabled = (regs.output_merger.depth_test_enable == 1); - state.depth.test_func = PicaToGL::CompareFunc(regs.output_merger.depth_test_func); + state.depth.test_enabled = regs.output_merger.depth_test_enable == 1 || + regs.output_merger.depth_write_enable == 1; + state.depth.test_func = regs.output_merger.depth_test_enable == 1 ? + PicaToGL::CompareFunc(regs.output_merger.depth_test_func) : GL_ALWAYS; state.color_mask.red_enabled = regs.output_merger.red_enable; state.color_mask.green_enabled = regs.output_merger.green_enable; state.color_mask.blue_enabled = regs.output_merger.blue_enable; @@ -682,19 +914,87 @@ void RasterizerOpenGL::SyncTevConstColor(int stage_index, const Pica::Regs::TevS } } +void RasterizerOpenGL::SyncGlobalAmbient() { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.global_ambient); + if (color != uniform_block_data.data.lighting_global_ambient) { + uniform_block_data.data.lighting_global_ambient = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) { + std::array<GLvec4, 256> new_data; + + for (unsigned offset = 0; offset < new_data.size(); ++offset) { + new_data[offset][0] = Pica::g_state.lighting.luts[(lut_index * 4) + 0][offset].ToFloat(); + new_data[offset][1] = Pica::g_state.lighting.luts[(lut_index * 4) + 1][offset].ToFloat(); + new_data[offset][2] = Pica::g_state.lighting.luts[(lut_index * 4) + 2][offset].ToFloat(); + new_data[offset][3] = Pica::g_state.lighting.luts[(lut_index * 4) + 3][offset].ToFloat(); + } + + if (new_data != lighting_lut_data[lut_index]) { + lighting_lut_data[lut_index] = new_data; + glActiveTexture(GL_TEXTURE3 + lut_index); + glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, lighting_lut_data[lut_index].data()); + } +} + +void RasterizerOpenGL::SyncLightSpecular0(int light_index) { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_0); + if (color != uniform_block_data.data.light_src[light_index].specular_0) { + uniform_block_data.data.light_src[light_index].specular_0 = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightSpecular1(int light_index) { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_1); + if (color != uniform_block_data.data.light_src[light_index].specular_1) { + uniform_block_data.data.light_src[light_index].specular_1 = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightDiffuse(int light_index) { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse); + if (color != uniform_block_data.data.light_src[light_index].diffuse) { + uniform_block_data.data.light_src[light_index].diffuse = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightAmbient(int light_index) { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].ambient); + if (color != uniform_block_data.data.light_src[light_index].ambient) { + uniform_block_data.data.light_src[light_index].ambient = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightPosition(int light_index) { + GLvec3 position = { + Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(), + Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(), + Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32() }; + + if (position != uniform_block_data.data.light_src[light_index].position) { + uniform_block_data.data.light_src[light_index].position = position; + uniform_block_data.dirty = true; + } +} + void RasterizerOpenGL::SyncDrawState() { const auto& regs = Pica::g_state.regs; // Sync the viewport - GLsizei viewport_width = (GLsizei)Pica::float24::FromRawFloat24(regs.viewport_size_x).ToFloat32() * 2; - GLsizei viewport_height = (GLsizei)Pica::float24::FromRawFloat24(regs.viewport_size_y).ToFloat32() * 2; + GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2; + GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2; // OpenGL uses different y coordinates, so negate corner offset and flip origin // TODO: Ensure viewport_corner.x should not be negated or origin flipped // TODO: Use floating-point viewports for accuracy if supported - glViewport((GLsizei)static_cast<float>(regs.viewport_corner.x), - -(GLsizei)static_cast<float>(regs.viewport_corner.y) - + regs.framebuffer.GetHeight() - viewport_height, + glViewport((GLsizei)regs.viewport_corner.x, + (GLsizei)regs.viewport_corner.y, viewport_width, viewport_height); // Sync bound texture(s), upload if not cached @@ -717,7 +1017,7 @@ void RasterizerOpenGL::SyncDrawState() { MICROPROFILE_DEFINE(OpenGL_FramebufferReload, "OpenGL", "FB Reload", MP_RGB(70, 70, 200)); void RasterizerOpenGL::ReloadColorBuffer() { - u8* color_buffer = Memory::GetPhysicalPointer(Pica::g_state.regs.framebuffer.GetColorBufferPhysicalAddress()); + u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr); if (color_buffer == nullptr) return; @@ -733,7 +1033,7 @@ void RasterizerOpenGL::ReloadColorBuffer() { for (int x = 0; x < fb_color_texture.width; ++x) { const u32 coarse_y = y & ~7; u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel; - u32 gl_pixel_index = (x + y * fb_color_texture.width) * bytes_per_pixel; + u32 gl_pixel_index = (x + (fb_color_texture.height - 1 - y) * fb_color_texture.width) * bytes_per_pixel; u8* pixel = color_buffer + dst_offset; memcpy(&temp_fb_color_buffer[gl_pixel_index], pixel, bytes_per_pixel); @@ -752,13 +1052,11 @@ void RasterizerOpenGL::ReloadColorBuffer() { } void RasterizerOpenGL::ReloadDepthBuffer() { - PAddr depth_buffer_addr = Pica::g_state.regs.framebuffer.GetDepthBufferPhysicalAddress(); - - if (depth_buffer_addr == 0) + if (cached_fb_depth_addr == 0) return; // TODO: Appears to work, but double-check endianness of depth values and order of depth-stencil - u8* depth_buffer = Memory::GetPhysicalPointer(depth_buffer_addr); + u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr); if (depth_buffer == nullptr) return; @@ -779,7 +1077,7 @@ void RasterizerOpenGL::ReloadDepthBuffer() { for (int x = 0; x < fb_depth_texture.width; ++x) { const u32 coarse_y = y & ~7; u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; - u32 gl_pixel_index = (x + y * fb_depth_texture.width); + u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width); u8* pixel = depth_buffer + dst_offset; u32 depth_stencil = *(u32*)pixel; @@ -791,7 +1089,7 @@ void RasterizerOpenGL::ReloadDepthBuffer() { for (int x = 0; x < fb_depth_texture.width; ++x) { const u32 coarse_y = y & ~7; u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; - u32 gl_pixel_index = (x + y * fb_depth_texture.width) * gl_bpp; + u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width) * gl_bpp; u8* pixel = depth_buffer + dst_offset; memcpy(&temp_fb_depth_data[gl_pixel_index], pixel, bytes_per_pixel); @@ -821,8 +1119,8 @@ Common::Profiling::TimingCategory buffer_commit_category("Framebuffer Commit"); MICROPROFILE_DEFINE(OpenGL_FramebufferCommit, "OpenGL", "FB Commit", MP_RGB(70, 70, 200)); void RasterizerOpenGL::CommitColorBuffer() { - if (last_fb_color_addr != 0) { - u8* color_buffer = Memory::GetPhysicalPointer(last_fb_color_addr); + if (cached_fb_color_addr != 0) { + u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr); if (color_buffer != nullptr) { Common::Profiling::ScopeTimer timer(buffer_commit_category); @@ -846,7 +1144,7 @@ void RasterizerOpenGL::CommitColorBuffer() { for (int x = 0; x < fb_color_texture.width; ++x) { const u32 coarse_y = y & ~7; u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel; - u32 gl_pixel_index = x * bytes_per_pixel + y * fb_color_texture.width * bytes_per_pixel; + u32 gl_pixel_index = x * bytes_per_pixel + (fb_color_texture.height - 1 - y) * fb_color_texture.width * bytes_per_pixel; u8* pixel = color_buffer + dst_offset; memcpy(pixel, &temp_gl_color_buffer[gl_pixel_index], bytes_per_pixel); @@ -857,9 +1155,9 @@ void RasterizerOpenGL::CommitColorBuffer() { } void RasterizerOpenGL::CommitDepthBuffer() { - if (last_fb_depth_addr != 0) { + if (cached_fb_depth_addr != 0) { // TODO: Output seems correct visually, but doesn't quite match sw renderer output. One of them is wrong. - u8* depth_buffer = Memory::GetPhysicalPointer(last_fb_depth_addr); + u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr); if (depth_buffer != nullptr) { Common::Profiling::ScopeTimer timer(buffer_commit_category); @@ -888,7 +1186,7 @@ void RasterizerOpenGL::CommitDepthBuffer() { for (int x = 0; x < fb_depth_texture.width; ++x) { const u32 coarse_y = y & ~7; u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; - u32 gl_pixel_index = (x + y * fb_depth_texture.width); + u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width); u8* pixel = depth_buffer + dst_offset; u32 depth_stencil = ((u32*)temp_gl_depth_data)[gl_pixel_index]; @@ -900,7 +1198,7 @@ void RasterizerOpenGL::CommitDepthBuffer() { for (int x = 0; x < fb_depth_texture.width; ++x) { const u32 coarse_y = y & ~7; u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; - u32 gl_pixel_index = (x + y * fb_depth_texture.width) * gl_bpp; + u32 gl_pixel_index = (x + (fb_depth_texture.height - 1 - y) * fb_depth_texture.width) * gl_bpp; u8* pixel = depth_buffer + dst_offset; memcpy(pixel, &temp_gl_depth_data[gl_pixel_index], bytes_per_pixel); |