diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 308 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 16 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 1555 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 426 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_resource_manager.h | 2 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 460 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 4 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_manager.cpp | 4 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_util.cpp | 6 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_util.h | 8 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 83 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 43 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/maxwell_to_gl.h | 89 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 17 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.h | 2 |
15 files changed, 1214 insertions, 1809 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 6f05f24a0..ea138d402 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -112,7 +112,7 @@ RasterizerOpenGL::RasterizerOpenGL() { glEnable(GL_BLEND); - NGLOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); + LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); } RasterizerOpenGL::~RasterizerOpenGL() { @@ -146,7 +146,6 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, u64 size = end - start + 1; // Copy vertex array data - res_cache.FlushRegion(start, size, nullptr); Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size); // Bind the vertex array to the buffer at the current offset. @@ -166,9 +165,9 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, // assume every shader uses them all. for (unsigned index = 0; index < 16; ++index) { auto& attrib = regs.vertex_attrib_format[index]; - NGLOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", - index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), - attrib.offset.Value(), attrib.IsNormalized()); + LOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", + index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), + attrib.offset.Value(), attrib.IsNormalized()); auto& buffer = regs.vertex_array[attrib.buffer]; ASSERT(buffer.IsEnabled()); @@ -197,8 +196,8 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!"); // Next available bindpoints to use when uploading the const buffers and textures to the GLSL - // shaders. - u32 current_constbuffer_bindpoint = 0; + // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points. + u32 current_constbuffer_bindpoint = uniform_buffers.size(); u32 current_texture_bindpoint = 0; for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { @@ -252,8 +251,8 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { break; } default: - NGLOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", - index, shader_config.enable.Value(), shader_config.offset); + LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, + shader_config.enable.Value(), shader_config.offset); UNREACHABLE(); } @@ -298,17 +297,16 @@ bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { return true; } -void RasterizerOpenGL::DrawArrays() { - if (accelerate_draw == AccelDraw::Disabled) - return; - - MICROPROFILE_SCOPE(OpenGL_Drawing); +std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, + bool using_depth_fb) { const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; - // TODO(bunnei): Implement these + // Sync the depth test state before configuring the framebuffer surfaces. + SyncDepthTestState(); + + // TODO(bunnei): Implement this const bool has_stencil = false; - const bool using_color_fb = true; - const bool using_depth_fb = false; + const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()}; const bool write_color_fb = @@ -325,35 +323,21 @@ void RasterizerOpenGL::DrawArrays() { std::tie(color_surface, depth_surface, surfaces_rect) = res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect); - const u16 res_scale = color_surface != nullptr - ? color_surface->res_scale - : (depth_surface == nullptr ? 1u : depth_surface->res_scale); - MathUtil::Rectangle<u32> draw_rect{ + static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left, + surfaces_rect.left, surfaces_rect.right)), // Left + static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top, + surfaces_rect.bottom, surfaces_rect.top)), // Top + static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right, + surfaces_rect.left, surfaces_rect.right)), // Right static_cast<u32>( - std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left * res_scale, - surfaces_rect.left, surfaces_rect.right)), // Left - static_cast<u32>( - std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top * res_scale, - surfaces_rect.bottom, surfaces_rect.top)), // Top - static_cast<u32>( - std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right * res_scale, - surfaces_rect.left, surfaces_rect.right)), // Right - static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + - viewport_rect.bottom * res_scale, - surfaces_rect.bottom, surfaces_rect.top))}; // Bottom + std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.bottom, + surfaces_rect.bottom, surfaces_rect.top))}; // Bottom // Bind the framebuffer surfaces BindFramebufferSurfaces(color_surface, depth_surface, has_stencil); - // Sync the viewport - SyncViewport(surfaces_rect, res_scale); - - // Sync the blend state registers - SyncBlendState(); - - // TODO(bunnei): Sync framebuffer_scale uniform here - // TODO(bunnei): Sync scissorbox uniform(s) here + SyncViewport(surfaces_rect); // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable // scissor test to prevent drawing outside of the framebuffer region @@ -364,6 +348,66 @@ void RasterizerOpenGL::DrawArrays() { state.scissor.height = draw_rect.GetHeight(); state.Apply(); + // Only return the surface to be marked as dirty if writing to it is enabled. + return std::make_pair(write_color_fb ? color_surface : nullptr, + write_depth_fb ? depth_surface : nullptr); +} + +void RasterizerOpenGL::Clear() { + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + + bool use_color_fb = false; + bool use_depth_fb = false; + + GLbitfield clear_mask = 0; + if (regs.clear_buffers.R && regs.clear_buffers.G && regs.clear_buffers.B && + regs.clear_buffers.A) { + clear_mask |= GL_COLOR_BUFFER_BIT; + use_color_fb = true; + } + if (regs.clear_buffers.Z) { + clear_mask |= GL_DEPTH_BUFFER_BIT; + use_depth_fb = true; + } + + if (clear_mask == 0) + return; + + auto [dirty_color_surface, dirty_depth_surface] = + ConfigureFramebuffers(use_color_fb, use_depth_fb); + + // TODO(Subv): Support clearing only partial colors. + glClearColor(regs.clear_color[0], regs.clear_color[1], regs.clear_color[2], + regs.clear_color[3]); + glClearDepth(regs.clear_depth); + + glClear(clear_mask); + + // Mark framebuffer surfaces as dirty + if (dirty_color_surface != nullptr) { + res_cache.MarkSurfaceAsDirty(dirty_color_surface); + } + if (dirty_depth_surface != nullptr) { + res_cache.MarkSurfaceAsDirty(dirty_depth_surface); + } +} + +void RasterizerOpenGL::DrawArrays() { + if (accelerate_draw == AccelDraw::Disabled) + return; + + MICROPROFILE_SCOPE(OpenGL_Drawing); + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + + auto [dirty_color_surface, dirty_depth_surface] = + ConfigureFramebuffers(true, regs.zeta.Address() != 0); + + SyncBlendState(); + SyncCullMode(); + + // TODO(bunnei): Sync framebuffer_scale uniform here + // TODO(bunnei): Sync scissorbox uniform(s) here + // Draw the vertex batch const bool is_indexed = accelerate_draw == AccelDraw::Indexed; const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()}; @@ -420,14 +464,16 @@ void RasterizerOpenGL::DrawArrays() { const GLenum primitive_mode{MaxwellToGL::PrimitiveTopology(regs.draw.topology)}; if (is_indexed) { - const GLint index_min{static_cast<GLint>(regs.index_array.first)}; - const GLint index_max{static_cast<GLint>(regs.index_array.first + regs.index_array.count)}; - glDrawRangeElementsBaseVertex(primitive_mode, index_min, index_max, regs.index_array.count, - MaxwellToGL::IndexFormat(regs.index_array.format), - reinterpret_cast<const void*>(index_buffer_offset), - -index_min); + const GLint base_vertex{static_cast<GLint>(regs.vb_element_base)}; + + // Adjust the index buffer offset so it points to the first desired index. + index_buffer_offset += regs.index_array.first * regs.index_array.FormatSizeInBytes(); + + glDrawElementsBaseVertex(primitive_mode, regs.index_array.count, + MaxwellToGL::IndexFormat(regs.index_array.format), + reinterpret_cast<const void*>(index_buffer_offset), base_vertex); } else { - glDrawArrays(primitive_mode, 0, regs.vertex_buffer.count); + glDrawArrays(primitive_mode, regs.vertex_buffer.first, regs.vertex_buffer.count); } // Disable scissor test @@ -437,24 +483,16 @@ void RasterizerOpenGL::DrawArrays() { // Unbind textures for potential future use as framebuffer attachments for (auto& texture_unit : state.texture_units) { - texture_unit.texture_2d = 0; + texture_unit.Unbind(); } state.Apply(); // Mark framebuffer surfaces as dirty - MathUtil::Rectangle<u32> draw_rect_unscaled{ - draw_rect.left / res_scale, draw_rect.top / res_scale, draw_rect.right / res_scale, - draw_rect.bottom / res_scale}; - - if (color_surface != nullptr && write_color_fb) { - auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled); - res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), - color_surface); + if (dirty_color_surface != nullptr) { + res_cache.MarkSurfaceAsDirty(dirty_color_surface); } - if (depth_surface != nullptr && write_depth_fb) { - auto interval = depth_surface->GetSubRectInterval(draw_rect_unscaled); - res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), - depth_surface); + if (dirty_depth_surface != nullptr) { + res_cache.MarkSurfaceAsDirty(dirty_depth_surface); } } @@ -462,7 +500,7 @@ void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {} void RasterizerOpenGL::FlushAll() { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - res_cache.FlushAll(); + res_cache.FlushRegion(0, Kernel::VMManager::MAX_ADDRESS); } void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) { @@ -472,13 +510,13 @@ void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) { void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - res_cache.InvalidateRegion(addr, size, nullptr); + res_cache.InvalidateRegion(addr, size); } void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); res_cache.FlushRegion(addr, size); - res_cache.InvalidateRegion(addr, size, nullptr); + res_cache.InvalidateRegion(addr, size); } bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { @@ -497,45 +535,28 @@ bool RasterizerOpenGL::AccelerateFill(const void* config) { return true; } -bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, +bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride, ScreenInfo& screen_info) { - if (framebuffer_addr == 0) { - return false; + if (!framebuffer_addr) { + return {}; } + MICROPROFILE_SCOPE(OpenGL_CacheManagement); - SurfaceParams src_params; - src_params.cpu_addr = framebuffer_addr; - src_params.addr = res_cache.TryFindFramebufferGpuAddress(framebuffer_addr).get_value_or(0); - src_params.width = std::min(framebuffer.width, pixel_stride); - src_params.height = framebuffer.height; - src_params.stride = pixel_stride; - src_params.is_tiled = true; - src_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; - src_params.pixel_format = - SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format); - src_params.component_type = - SurfaceParams::ComponentTypeFromGPUPixelFormat(framebuffer.pixel_format); - src_params.UpdateParams(); - - MathUtil::Rectangle<u32> src_rect; - Surface src_surface; - std::tie(src_surface, src_rect) = - res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); - - if (src_surface == nullptr) { - return false; + const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)}; + if (!surface) { + return {}; } - u32 scaled_width = src_surface->GetScaledWidth(); - u32 scaled_height = src_surface->GetScaledHeight(); + // Verify that the cached surface is the same size and format as the requested framebuffer + const auto& params{surface->GetSurfaceParams()}; + const auto& pixel_format{SurfaceParams::PixelFormatFromGPUPixelFormat(config.pixel_format)}; + ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); + ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); + ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different"); - screen_info.display_texcoords = MathUtil::Rectangle<float>( - (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width, - (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width); - - screen_info.display_texture = src_surface->texture.handle; + screen_info.display_texture = surface->Texture().handle; return true; } @@ -608,32 +629,44 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address); - std::vector<u8> data; + size_t size = 0; + if (used_buffer.IsIndirect()) { // Buffer is accessed indirectly, so upload the entire thing - data.resize(buffer.size * sizeof(float)); + size = buffer.size * sizeof(float); + + if (size > MaxConstbufferSize) { + LOG_ERROR(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size, + MaxConstbufferSize); + size = MaxConstbufferSize; + } } else { // Buffer is accessed directly, upload just what we use - data.resize(used_buffer.GetSize() * sizeof(float)); + size = used_buffer.GetSize() * sizeof(float); } + // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 + // UBO alignment requirements. + size = Common::AlignUp(size, sizeof(GLvec4)); + ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); + + std::vector<u8> data(size); Memory::ReadBlock(*addr, data.data(), data.size()); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer_draw_state.ssbo); - glBufferData(GL_SHADER_STORAGE_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + glBindBuffer(GL_UNIFORM_BUFFER, buffer_draw_state.ssbo); + glBufferData(GL_UNIFORM_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW); + glBindBuffer(GL_UNIFORM_BUFFER, 0); // Now configure the bindpoint of the buffer inside the shader std::string buffer_name = used_buffer.GetName(); - GLuint index = - glGetProgramResourceIndex(program, GL_SHADER_STORAGE_BLOCK, buffer_name.c_str()); + GLuint index = glGetProgramResourceIndex(program, GL_UNIFORM_BLOCK, buffer_name.c_str()); if (index != -1) - glShaderStorageBlockBinding(program, index, buffer_draw_state.bindpoint); + glUniformBlockBinding(program, index, buffer_draw_state.bindpoint); } state.Apply(); - return current_bindpoint + entries.size(); + return current_bindpoint + static_cast<u32>(entries.size()); } u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, u32 current_unit, @@ -653,16 +686,23 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, // Bind the uniform to the sampler. GLint uniform = glGetUniformLocation(program, entry.GetName().c_str()); - ASSERT(uniform != -1); + if (uniform == -1) { + continue; + } + glProgramUniform1i(program, uniform, current_bindpoint); const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset()); - ASSERT(texture.enabled); + + if (!texture.enabled) { + state.texture_units[current_bindpoint].texture_2d = 0; + continue; + } texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); Surface surface = res_cache.GetTextureSurface(texture); if (surface != nullptr) { - state.texture_units[current_bindpoint].texture_2d = surface->texture.handle; + state.texture_units[current_bindpoint].texture_2d = surface->Texture().handle; state.texture_units[current_bindpoint].swizzle.r = MaxwellToGL::SwizzleSource(texture.tic.x_source); state.texture_units[current_bindpoint].swizzle.g = @@ -679,7 +719,7 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, state.Apply(); - return current_unit + entries.size(); + return current_unit + static_cast<u32>(entries.size()); } void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, @@ -688,16 +728,16 @@ void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, state.Apply(); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - color_surface != nullptr ? color_surface->texture.handle : 0, 0); + color_surface != nullptr ? color_surface->Texture().handle : 0, 0); if (depth_surface != nullptr) { if (has_stencil) { // attach both depth and stencil glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - depth_surface->texture.handle, 0); + depth_surface->Texture().handle, 0); } else { // attach depth glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - depth_surface->texture.handle, 0); + depth_surface->Texture().handle, 0); // clear stencil attachment glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); } @@ -708,14 +748,14 @@ void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, } } -void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale) { +void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect) { const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()}; - state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left * res_scale; - state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom * res_scale; - state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth() * res_scale); - state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight() * res_scale); + state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left; + state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom; + state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth()); + state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight()); } void RasterizerOpenGL::SyncClipEnabled() { @@ -727,7 +767,27 @@ void RasterizerOpenGL::SyncClipCoef() { } void RasterizerOpenGL::SyncCullMode() { - UNREACHABLE(); + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + + state.cull.enabled = regs.cull.enabled != 0; + + if (state.cull.enabled) { + state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); + state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); + + const bool flip_triangles{regs.screen_y_control.triangle_rast_flip == 0 || + regs.viewport_transform[0].scale_y < 0.0f}; + + // If the GPU is configured to flip the rasterized triangles, then we need to flip the + // notion of front and back. Note: We flip the triangles when the value of the register is 0 + // because OpenGL already does it for us. + if (flip_triangles) { + if (state.cull.front_face == GL_CCW) + state.cull.front_face = GL_CW; + else if (state.cull.front_face == GL_CW) + state.cull.front_face = GL_CCW; + } + } } void RasterizerOpenGL::SyncDepthScale() { @@ -738,9 +798,20 @@ void RasterizerOpenGL::SyncDepthOffset() { UNREACHABLE(); } +void RasterizerOpenGL::SyncDepthTestState() { + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + + state.depth.test_enabled = regs.depth_test_enable != 0; + state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; + + if (!state.depth.test_enabled) + return; + + state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); +} + void RasterizerOpenGL::SyncBlendState() { const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; - ASSERT_MSG(regs.independent_blend_enable == 1, "Only independent blending is implemented"); // TODO(Subv): Support more than just render target 0. state.blend.enabled = regs.blend.enable[0] != 0; @@ -748,6 +819,7 @@ void RasterizerOpenGL::SyncBlendState() { if (!state.blend.enabled) return; + ASSERT_MSG(regs.independent_blend_enable == 1, "Only independent blending is implemented"); ASSERT_MSG(!regs.independent_blend[0].separate_alpha, "Unimplemented"); state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.independent_blend[0].equation_rgb); state.blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_source_rgb); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index b7c8cf843..c406142e4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -7,6 +7,7 @@ #include <array> #include <cstddef> #include <memory> +#include <utility> #include <vector> #include <glad/glad.h> #include "common/common_types.h" @@ -28,6 +29,7 @@ public: ~RasterizerOpenGL() override; void DrawArrays() override; + void Clear() override; void NotifyMaxwellRegisterChanged(u32 method) override; void FlushAll() override; void FlushRegion(Tegra::GPUVAddr addr, u64 size) override; @@ -54,6 +56,11 @@ public: OGLShader shader; }; + /// Maximum supported size that a constbuffer can have in bytes. + static constexpr size_t MaxConstbufferSize = 0x10000; + static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0, + "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); + private: class SamplerInfo { public: @@ -76,6 +83,10 @@ private: u32 border_color_a; }; + /// Configures the color and depth framebuffer states and returns the dirty <Color, Depth> + /// surfaces if writing was enabled. + std::pair<Surface, Surface> ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb); + /// Binds the framebuffer color and depth surface void BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface, bool has_stencil); @@ -104,7 +115,7 @@ private: u32 current_unit, const std::vector<GLShader::SamplerEntry>& entries); /// Syncs the viewport to match the guest state - void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale); + void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect); /// Syncs the clip enabled status to match the guest state void SyncClipEnabled(); @@ -121,6 +132,9 @@ private: /// Syncs the depth offset to match the guest state void SyncDepthOffset(); + /// Syncs the depth test state to match the guest state + void SyncDepthTestState(); + /// Syncs the blend state to match the guest state void SyncBlendState(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index ff48a2669..323ff7408 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -1,36 +1,23 @@ -// Copyright 2015 Citra Emulator Project +// Copyright 2018 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #include <algorithm> -#include <atomic> -#include <cstring> -#include <iterator> -#include <memory> -#include <utility> -#include <vector> -#include <boost/optional.hpp> -#include <boost/range/iterator_range.hpp> #include <glad/glad.h> + #include "common/alignment.h" -#include "common/bit_field.h" -#include "common/color.h" -#include "common/logging/log.h" -#include "common/math_util.h" +#include "common/assert.h" #include "common/microprofile.h" #include "common/scope_exit.h" #include "core/core.h" -#include "core/frontend/emu_window.h" #include "core/hle/kernel/process.h" -#include "core/hle/kernel/vm_manager.h" #include "core/memory.h" #include "core/settings.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" -#include "video_core/renderer_opengl/gl_state.h" +#include "video_core/textures/astc.h" #include "video_core/textures/decoders.h" #include "video_core/utils.h" -#include "video_core/video_core.h" using SurfaceType = SurfaceParams::SurfaceType; using PixelFormat = SurfaceParams::PixelFormat; @@ -40,89 +27,178 @@ struct FormatTuple { GLint internal_format; GLenum format; GLenum type; + ComponentType component_type; bool compressed; }; +/*static*/ SurfaceParams SurfaceParams::CreateForTexture( + const Tegra::Texture::FullTextureInfo& config) { + + SurfaceParams params{}; + params.addr = config.tic.Address(); + params.is_tiled = config.tic.IsTiled(); + params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, + params.pixel_format = PixelFormatFromTextureFormat(config.tic.format); + params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); + params.type = GetFormatType(params.pixel_format); + params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); + params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); + params.unaligned_height = config.tic.Height(); + params.size_in_bytes = params.SizeInBytes(); + return params; +} + +/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer( + const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config) { + + SurfaceParams params{}; + params.addr = config.Address(); + params.is_tiled = true; + params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; + params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); + params.component_type = ComponentTypeFromRenderTarget(config.format); + params.type = GetFormatType(params.pixel_format); + params.width = config.width; + params.height = config.height; + params.unaligned_height = config.height; + params.size_in_bytes = params.SizeInBytes(); + return params; +} + +/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer( + const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config, Tegra::GPUVAddr zeta_address, + Tegra::DepthFormat format) { + + SurfaceParams params{}; + params.addr = zeta_address; + params.is_tiled = true; + params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; + params.pixel_format = PixelFormatFromDepthFormat(format); + params.component_type = ComponentTypeFromDepthFormat(format); + params.type = GetFormatType(params.pixel_format); + params.size_in_bytes = params.SizeInBytes(); + params.width = config.width; + params.height = config.height; + params.unaligned_height = config.height; + params.size_in_bytes = params.SizeInBytes(); + return params; +} + static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8 - {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, false}, // B5G6R5 - {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10 - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false}, // A1B5G5R5 - {GL_R8, GL_RED, GL_UNSIGNED_BYTE, false}, // R8 - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F - {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F - {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1 - {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23 - {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45 - {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN1 + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8 + {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5 + {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm, + false}, // A2B10G10R10 + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5 + {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8 + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F + {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float, + false}, // R11FG11FB10F + {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI + {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + true}, // DXT1 + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + true}, // DXT23 + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + true}, // DXT45 + {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1 + {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + true}, // BC7U + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 + + // DepthStencil formats + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, + false}, // Z24S8 + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, + false}, // S8Z24 + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F }}; static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { - const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); - if (type == SurfaceType::ColorTexture) { - ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size()); - // For now only UNORM components are supported, or either R11FG11FB10F or RGBA16F which are - // type FLOAT - ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F || - pixel_format == PixelFormat::R11FG11FB10F); - return tex_format_tuples[static_cast<unsigned int>(pixel_format)]; - } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { - // TODO(Subv): Implement depth formats - ASSERT_MSG(false, "Unimplemented"); - } + ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size()); + auto& format = tex_format_tuples[static_cast<unsigned int>(pixel_format)]; + ASSERT(component_type == format.component_type); - UNREACHABLE(); - return {}; + return format; } -template <typename Map, typename Interval> -constexpr auto RangeFromInterval(Map& map, const Interval& interval) { - return boost::make_iterator_range(map.equal_range(interval)); +VAddr SurfaceParams::GetCpuAddr() const { + const auto& gpu = Core::System::GetInstance().GPU(); + return *gpu.memory_manager->GpuToCpuAddress(addr); } -static u16 GetResolutionScaleFactor() { - return static_cast<u16>(!Settings::values.resolution_factor - ? VideoCore::g_emu_window->GetFramebufferLayout().GetScalingRatio() - : Settings::values.resolution_factor); +static bool IsPixelFormatASTC(PixelFormat format) { + switch (format) { + case PixelFormat::ASTC_2D_4X4: + return true; + default: + return false; + } +} + +static std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { + switch (format) { + case PixelFormat::ASTC_2D_4X4: + return {4, 4}; + default: + LOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format)); + UNREACHABLE(); + } +} + +MathUtil::Rectangle<u32> SurfaceParams::GetRect() const { + u32 actual_height{unaligned_height}; + if (IsPixelFormatASTC(pixel_format)) { + // ASTC formats must stop at the ATSC block size boundary + actual_height = Common::AlignDown(actual_height, GetASTCBlockSize(pixel_format).second); + } + return {0, actual_height, width, 0}; } template <bool morton_to_gl, PixelFormat format> -void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr base, - Tegra::GPUVAddr start, Tegra::GPUVAddr end) { +void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr addr) { constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); const auto& gpu = Core::System::GetInstance().GPU(); if (morton_to_gl) { - auto data = Tegra::Texture::UnswizzleTexture( - *gpu.memory_manager->GpuToCpuAddress(base), - SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); - std::memcpy(gl_buffer, data.data(), data.size()); + if (SurfaceParams::GetFormatType(format) == SurfaceType::ColorTexture) { + auto data = Tegra::Texture::UnswizzleTexture( + *gpu.memory_manager->GpuToCpuAddress(addr), + SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); + std::memcpy(gl_buffer, data.data(), data.size()); + } else { + auto data = Tegra::Texture::UnswizzleDepthTexture( + *gpu.memory_manager->GpuToCpuAddress(addr), + SurfaceParams::DepthFormatFromPixelFormat(format), stride, height, block_height); + std::memcpy(gl_buffer, data.data(), data.size()); + } } else { - // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check - // the configuration for this and perform more generic un/swizzle - NGLOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); + // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should + // check the configuration for this and perform more generic un/swizzle + LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); VideoCore::MortonCopyPixels128( stride, height, bytes_per_pixel, gl_bytes_per_pixel, - Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(base)), gl_buffer, + Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(addr)), gl_buffer, morton_to_gl); } } -static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr, - Tegra::GPUVAddr), +static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), SurfaceParams::MaxPixelFormat> morton_to_gl_fns = { MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>, MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>, - MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::DXT1>, - MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>, - MortonCopy<true, PixelFormat::DXN1>, + MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::RGBA32UI>, + MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>, + MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>, + MortonCopy<true, PixelFormat::BC7U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>, + MortonCopy<true, PixelFormat::Z24S8>, MortonCopy<true, PixelFormat::S8Z24>, + MortonCopy<true, PixelFormat::Z32F>, }; -static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr, - Tegra::GPUVAddr), +static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), SurfaceParams::MaxPixelFormat> gl_to_morton_fns = { MortonCopy<false, PixelFormat::ABGR8>, @@ -132,11 +208,17 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra: MortonCopy<false, PixelFormat::R8>, MortonCopy<false, PixelFormat::RGBA16F>, MortonCopy<false, PixelFormat::R11FG11FB10F>, - // TODO(Subv): Swizzling the DXT1/DXT23/DXT45/DXN1 formats is not yet supported + MortonCopy<false, PixelFormat::RGBA32UI>, + // TODO(Subv): Swizzling the DXT1/DXT23/DXT45/DXN1/BC7U formats is not yet supported + nullptr, nullptr, nullptr, nullptr, nullptr, + MortonCopy<false, PixelFormat::ABGR8>, + MortonCopy<false, PixelFormat::Z24S8>, + MortonCopy<false, PixelFormat::S8Z24>, + MortonCopy<false, PixelFormat::Z32F>, }; // Allocate an uninitialized texture of appropriate size and format for the surface @@ -166,374 +248,144 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup cur_state.Apply(); } -static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex, - const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type, - GLuint read_fb_handle, GLuint draw_fb_handle) { - - glCopyImageSubData(src_tex, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, dst_tex, - GL_TEXTURE_2D, 0, dst_rect.left, dst_rect.bottom, 0, src_rect.GetWidth(), - src_rect.GetHeight(), 0); - return true; -} - -static bool FillSurface(const Surface& surface, const u8* fill_data, - const MathUtil::Rectangle<u32>& fill_rect, GLuint draw_fb_handle) { - UNREACHABLE(); - return {}; -} - -SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { - SurfaceParams params = *this; - const u32 tiled_size = is_tiled ? 8 : 1; - const u64 stride_tiled_bytes = BytesInPixels(stride * tiled_size); - Tegra::GPUVAddr aligned_start = - addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes); - Tegra::GPUVAddr aligned_end = - addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes); - - if (aligned_end - aligned_start > stride_tiled_bytes) { - params.addr = aligned_start; - params.height = static_cast<u32>((aligned_end - aligned_start) / BytesInPixels(stride)); - } else { - // 1 row - ASSERT(aligned_end - aligned_start == stride_tiled_bytes); - const u64 tiled_alignment = BytesInPixels(is_tiled ? 8 * 8 : 1); - aligned_start = - addr + Common::AlignDown(boost::icl::first(interval) - addr, tiled_alignment); - aligned_end = - addr + Common::AlignUp(boost::icl::last_next(interval) - addr, tiled_alignment); - params.addr = aligned_start; - params.width = static_cast<u32>(PixelsInBytes(aligned_end - aligned_start) / tiled_size); - params.stride = params.width; - params.height = tiled_size; - } - params.UpdateParams(); - - return params; -} - -SurfaceInterval SurfaceParams::GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const { - if (unscaled_rect.GetHeight() == 0 || unscaled_rect.GetWidth() == 0) { - return {}; - } - - if (is_tiled) { - unscaled_rect.left = Common::AlignDown(unscaled_rect.left, 8) * 8; - unscaled_rect.bottom = Common::AlignDown(unscaled_rect.bottom, 8) / 8; - unscaled_rect.right = Common::AlignUp(unscaled_rect.right, 8) * 8; - unscaled_rect.top = Common::AlignUp(unscaled_rect.top, 8) / 8; - } - - const u32 stride_tiled = !is_tiled ? stride : stride * 8; - - const u32 pixel_offset = - stride_tiled * (!is_tiled ? unscaled_rect.bottom : (height / 8) - unscaled_rect.top) + - unscaled_rect.left; - - const u32 pixels = (unscaled_rect.GetHeight() - 1) * stride_tiled + unscaled_rect.GetWidth(); - - return {addr + BytesInPixels(pixel_offset), addr + BytesInPixels(pixel_offset + pixels)}; -} - -MathUtil::Rectangle<u32> SurfaceParams::GetSubRect(const SurfaceParams& sub_surface) const { - const u32 begin_pixel_index = static_cast<u32>(PixelsInBytes(sub_surface.addr - addr)); - - if (is_tiled) { - const int x0 = (begin_pixel_index % (stride * 8)) / 8; - const int y0 = (begin_pixel_index / (stride * 8)) * 8; - // Top to bottom - return MathUtil::Rectangle<u32>(x0, height - y0, x0 + sub_surface.width, - height - (y0 + sub_surface.height)); - } - - const int x0 = begin_pixel_index % stride; - const int y0 = begin_pixel_index / stride; - // Bottom to top - return MathUtil::Rectangle<u32>(x0, y0 + sub_surface.height, x0 + sub_surface.width, y0); -} - -MathUtil::Rectangle<u32> SurfaceParams::GetScaledSubRect(const SurfaceParams& sub_surface) const { - auto rect = GetSubRect(sub_surface); - rect.left = rect.left * res_scale; - rect.right = rect.right * res_scale; - rect.top = rect.top * res_scale; - rect.bottom = rect.bottom * res_scale; - return rect; -} - -bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { - return std::tie(other_surface.addr, other_surface.width, other_surface.height, - other_surface.stride, other_surface.block_height, other_surface.pixel_format, - other_surface.component_type, - other_surface.is_tiled) == std::tie(addr, width, height, stride, block_height, - pixel_format, component_type, is_tiled) && - pixel_format != PixelFormat::Invalid; -} - -bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { - return sub_surface.addr >= addr && sub_surface.end <= end && - sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && - sub_surface.is_tiled == is_tiled && sub_surface.block_height == block_height && - sub_surface.component_type == component_type && - (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && - (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) && - GetSubRect(sub_surface).left + sub_surface.width <= stride; -} - -bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { - return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format && - addr <= expanded_surface.end && expanded_surface.addr <= end && - is_tiled == expanded_surface.is_tiled && block_height == expanded_surface.block_height && - component_type == expanded_surface.component_type && stride == expanded_surface.stride && - (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) % - BytesInPixels(stride * (is_tiled ? 8 : 1)) == - 0; -} - -bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { - if (pixel_format == PixelFormat::Invalid || addr > texcopy_params.addr || - end < texcopy_params.end) { - return false; - } - if (texcopy_params.block_height != block_height || - texcopy_params.component_type != component_type) - return false; - - if (texcopy_params.width != texcopy_params.stride) { - const u32 tile_stride = static_cast<u32>(BytesInPixels(stride * (is_tiled ? 8 : 1))); - return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && - texcopy_params.width % BytesInPixels(is_tiled ? 64 : 1) == 0 && - (texcopy_params.height == 1 || texcopy_params.stride == tile_stride) && - ((texcopy_params.addr - addr) % tile_stride) + texcopy_params.width <= tile_stride; - } - return FromInterval(texcopy_params.GetInterval()).GetInterval() == texcopy_params.GetInterval(); -} - -VAddr SurfaceParams::GetCpuAddr() const { - // When this function is used, only cpu_addr or (GPU) addr should be set, not both - ASSERT(!(cpu_addr && addr)); - const auto& gpu = Core::System::GetInstance().GPU(); - return cpu_addr.get_value_or(*gpu.memory_manager->GpuToCpuAddress(addr)); -} - -bool CachedSurface::CanFill(const SurfaceParams& dest_surface, - SurfaceInterval fill_interval) const { - if (type == SurfaceType::Fill && IsRegionValid(fill_interval) && - boost::icl::first(fill_interval) >= addr && - boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range - dest_surface.FromInterval(fill_interval).GetInterval() == - fill_interval) { // make sure interval is a rectangle in dest surface - if (fill_size * CHAR_BIT != dest_surface.GetFormatBpp()) { - // Check if bits repeat for our fill_size - const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / CHAR_BIT, 1u); - std::vector<u8> fill_test(fill_size * dest_bytes_per_pixel); - - for (u32 i = 0; i < dest_bytes_per_pixel; ++i) - std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size); - - for (u32 i = 0; i < fill_size; ++i) - if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0], - dest_bytes_per_pixel) != 0) - return false; - - if (dest_surface.GetFormatBpp() == 4 && (fill_test[0] & 0xF) != (fill_test[0] >> 4)) - return false; +CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) { + texture.Create(); + const auto& rect{params.GetRect()}; + AllocateSurfaceTexture(texture.handle, + GetFormatTuple(params.pixel_format, params.component_type), + rect.GetWidth(), rect.GetHeight()); +} + +static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) { + union S8Z24 { + BitField<0, 24, u32> z24; + BitField<24, 8, u32> s8; + }; + static_assert(sizeof(S8Z24) == 4, "S8Z24 is incorrect size"); + + union Z24S8 { + BitField<0, 8, u32> s8; + BitField<8, 24, u32> z24; + }; + static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size"); + + S8Z24 input_pixel{}; + Z24S8 output_pixel{}; + for (size_t y = 0; y < height; ++y) { + for (size_t x = 0; x < width; ++x) { + const size_t offset{y * width + x}; + std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24)); + output_pixel.s8.Assign(input_pixel.s8); + output_pixel.z24.Assign(input_pixel.z24); + std::memcpy(&data[offset], &output_pixel, sizeof(Z24S8)); } - return true; } - return false; -} - -bool CachedSurface::CanCopy(const SurfaceParams& dest_surface, - SurfaceInterval copy_interval) const { - SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval); - ASSERT(subrect_params.GetInterval() == copy_interval); - if (CanSubRect(subrect_params)) - return true; - - if (CanFill(dest_surface, copy_interval)) - return true; - - return false; } - -SurfaceInterval SurfaceParams::GetCopyableInterval(const Surface& src_surface) const { - SurfaceInterval result{}; - const auto valid_regions = - SurfaceRegions(GetInterval() & src_surface->GetInterval()) - src_surface->invalid_regions; - for (auto& valid_interval : valid_regions) { - const SurfaceInterval aligned_interval{ - addr + Common::AlignUp(boost::icl::first(valid_interval) - addr, - BytesInPixels(is_tiled ? 8 * 8 : 1)), - addr + Common::AlignDown(boost::icl::last_next(valid_interval) - addr, - BytesInPixels(is_tiled ? 8 * 8 : 1))}; - - if (BytesInPixels(is_tiled ? 8 * 8 : 1) > boost::icl::length(valid_interval) || - boost::icl::length(aligned_interval) == 0) { - continue; - } - - // Get the rectangle within aligned_interval - const u32 stride_bytes = static_cast<u32>(BytesInPixels(stride)) * (is_tiled ? 8 : 1); - SurfaceInterval rect_interval{ - addr + Common::AlignUp(boost::icl::first(aligned_interval) - addr, stride_bytes), - addr + Common::AlignDown(boost::icl::last_next(aligned_interval) - addr, stride_bytes), - }; - if (boost::icl::first(rect_interval) > boost::icl::last_next(rect_interval)) { - // 1 row - rect_interval = aligned_interval; - } else if (boost::icl::length(rect_interval) == 0) { - // 2 rows that do not make a rectangle, return the larger one - const SurfaceInterval row1{boost::icl::first(aligned_interval), - boost::icl::first(rect_interval)}; - const SurfaceInterval row2{boost::icl::first(rect_interval), - boost::icl::last_next(aligned_interval)}; - rect_interval = (boost::icl::length(row1) > boost::icl::length(row2)) ? row1 : row2; - } - - if (boost::icl::length(rect_interval) > boost::icl::length(result)) { - result = rect_interval; - } +/** + * Helper function to perform software conversion (as needed) when loading a buffer from Switch + * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with + * typical desktop GPUs. + */ +static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelFormat pixel_format, + u32 width, u32 height) { + switch (pixel_format) { + case PixelFormat::ASTC_2D_4X4: { + // Convert ASTC pixel formats to RGBA8, as most desktop GPUs do not support ASTC. + u32 block_width{}; + u32 block_height{}; + std::tie(block_width, block_height) = GetASTCBlockSize(pixel_format); + data = Tegra::Texture::ASTC::Decompress(data, width, height, block_width, block_height); + break; + } + case PixelFormat::S8Z24: + // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24. + ConvertS8Z24ToZ24S8(data, width, height); + break; + } +} + +/** + * Helper function to perform software conversion (as needed) when flushing a buffer to Switch + * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with + * typical desktop GPUs. + */ +static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& /*data*/, PixelFormat pixel_format, + u32 /*width*/, u32 /*height*/) { + switch (pixel_format) { + case PixelFormat::ASTC_2D_4X4: + case PixelFormat::S8Z24: + LOG_CRITICAL(Render_OpenGL, "Unimplemented pixel_format={}", + static_cast<u32>(pixel_format)); + UNREACHABLE(); + break; } - return result; -} - -void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface, - SurfaceInterval copy_interval) { - SurfaceParams subrect_params = dst_surface->FromInterval(copy_interval); - ASSERT(subrect_params.GetInterval() == copy_interval); - - ASSERT(src_surface != dst_surface); - - // This is only called when CanCopy is true, no need to run checks here - if (src_surface->type == SurfaceType::Fill) { - // FillSurface needs a 4 bytes buffer - const u64 fill_offset = - (boost::icl::first(copy_interval) - src_surface->addr) % src_surface->fill_size; - std::array<u8, 4> fill_buffer; - - u64 fill_buff_pos = fill_offset; - for (int i : {0, 1, 2, 3}) - fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size]; - - FillSurface(dst_surface, &fill_buffer[0], dst_surface->GetScaledSubRect(subrect_params), - draw_framebuffer.handle); - return; - } - if (src_surface->CanSubRect(subrect_params)) { - BlitTextures(src_surface->texture.handle, src_surface->GetScaledSubRect(subrect_params), - dst_surface->texture.handle, dst_surface->GetScaledSubRect(subrect_params), - src_surface->type, read_framebuffer.handle, draw_framebuffer.handle); - return; - } - UNREACHABLE(); } MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); -void CachedSurface::LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end) { - ASSERT(type != SurfaceType::Fill); +void CachedSurface::LoadGLBuffer() { + ASSERT(params.type != SurfaceType::Fill); - u8* const texture_src_data = Memory::GetPointer(GetCpuAddr()); - if (texture_src_data == nullptr) - return; + u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr()); - if (gl_buffer == nullptr) { - gl_buffer_size = GetActualWidth() * GetActualHeight() * GetGLBytesPerPixel(pixel_format); - gl_buffer.reset(new u8[gl_buffer_size]); - } + ASSERT(texture_src_data); - MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); + gl_buffer.resize(params.width * params.height * GetGLBytesPerPixel(params.pixel_format)); - ASSERT(load_start >= addr && load_end <= end); - const u64 start_offset = load_start - addr; + MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); - if (!is_tiled) { - const u32 bytes_per_pixel{GetFormatBpp() >> 3}; + if (!params.is_tiled) { + const u32 bytes_per_pixel{params.GetFormatBpp() >> 3}; - std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, - bytes_per_pixel * width * height); + std::memcpy(gl_buffer.data(), texture_src_data, + bytes_per_pixel * params.width * params.height); } else { - morton_to_gl_fns[static_cast<size_t>(pixel_format)](GetActualWidth(), block_height, - GetActualHeight(), &gl_buffer[0], addr, - load_start, load_end); + morton_to_gl_fns[static_cast<size_t>(params.pixel_format)]( + params.width, params.block_height, params.height, gl_buffer.data(), params.addr); } + + ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer, params.pixel_format, params.width, params.height); } MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); -void CachedSurface::FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end) { - u8* const dst_buffer = Memory::GetPointer(GetCpuAddr()); - if (dst_buffer == nullptr) - return; - - ASSERT(gl_buffer_size == width * height * GetGLBytesPerPixel(pixel_format)); +void CachedSurface::FlushGLBuffer() { + u8* const dst_buffer = Memory::GetPointer(params.GetCpuAddr()); - // TODO: Should probably be done in ::Memory:: and check for other regions too - // same as loadglbuffer() - if (flush_start < Memory::VRAM_VADDR_END && flush_end > Memory::VRAM_VADDR_END) - flush_end = Memory::VRAM_VADDR_END; - - if (flush_start < Memory::VRAM_VADDR && flush_end > Memory::VRAM_VADDR) - flush_start = Memory::VRAM_VADDR; + ASSERT(dst_buffer); + ASSERT(gl_buffer.size() == + params.width * params.height * GetGLBytesPerPixel(params.pixel_format)); MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); - ASSERT(flush_start >= addr && flush_end <= end); - const u64 start_offset = flush_start - addr; - const u64 end_offset = flush_end - addr; - - if (type == SurfaceType::Fill) { - const u64 coarse_start_offset = start_offset - (start_offset % fill_size); - const u64 backup_bytes = start_offset % fill_size; - std::array<u8, 4> backup_data; - if (backup_bytes) - std::memcpy(&backup_data[0], &dst_buffer[coarse_start_offset], backup_bytes); - - for (u64 offset = coarse_start_offset; offset < end_offset; offset += fill_size) { - std::memcpy(&dst_buffer[offset], &fill_data[0], - std::min(fill_size, end_offset - offset)); - } + ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer, params.pixel_format, params.width, + params.height); - if (backup_bytes) - std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes); - } else if (!is_tiled) { - std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start); + if (!params.is_tiled) { + std::memcpy(dst_buffer, gl_buffer.data(), params.size_in_bytes); } else { - gl_to_morton_fns[static_cast<size_t>(pixel_format)]( - stride, block_height, height, &gl_buffer[0], addr, flush_start, flush_end); + gl_to_morton_fns[static_cast<size_t>(params.pixel_format)]( + params.width, params.block_height, params.height, gl_buffer.data(), params.addr); } } MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); -void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, - GLuint draw_fb_handle) { - if (type == SurfaceType::Fill) +void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) { + if (params.type == SurfaceType::Fill) return; MICROPROFILE_SCOPE(OpenGL_TextureUL); - ASSERT(gl_buffer_size == - GetActualWidth() * GetActualHeight() * GetGLBytesPerPixel(pixel_format)); + ASSERT(gl_buffer.size() == + params.width * params.height * GetGLBytesPerPixel(params.pixel_format)); + + const auto& rect{params.GetRect()}; // Load data from memory to the surface GLint x0 = static_cast<GLint>(rect.left); GLint y0 = static_cast<GLint>(rect.bottom); - size_t buffer_offset = (y0 * stride + x0) * GetGLBytesPerPixel(pixel_format); + size_t buffer_offset = (y0 * params.width + x0) * GetGLBytesPerPixel(params.pixel_format); - const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type); + const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); GLuint target_tex = texture.handle; - - // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in - // surface - OGLTexture unscaled_tex; - if (res_scale != 1) { - x0 = 0; - y0 = 0; - - unscaled_tex.Create(); - AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight()); - target_tex = unscaled_tex.handle; - } - OpenGLState cur_state = OpenGLState::GetCurState(); GLuint old_tex = cur_state.texture_units[0].texture_2d; @@ -541,15 +393,15 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint cur_state.Apply(); // Ensure no bad interactions with GL_UNPACK_ALIGNMENT - ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); - glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(stride)); + ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0); + glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.width)); glActiveTexture(GL_TEXTURE0); if (tuple.compressed) { - glCompressedTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, - static_cast<GLsizei>(rect.GetWidth() * GetCompresssionFactor()), - static_cast<GLsizei>(rect.GetHeight() * GetCompresssionFactor()), 0, - size, &gl_buffer[buffer_offset]); + glCompressedTexImage2D( + GL_TEXTURE_2D, 0, tuple.internal_format, static_cast<GLsizei>(params.width), + static_cast<GLsizei>(params.height), 0, static_cast<GLsizei>(params.size_in_bytes), + &gl_buffer[buffer_offset]); } else { glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, @@ -560,845 +412,250 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint cur_state.texture_units[0].texture_2d = old_tex; cur_state.Apply(); - - if (res_scale != 1) { - auto scaled_rect = rect; - scaled_rect.left *= res_scale; - scaled_rect.top *= res_scale; - scaled_rect.right *= res_scale; - scaled_rect.bottom *= res_scale; - - BlitTextures(unscaled_tex.handle, {0, rect.GetHeight(), rect.GetWidth(), 0}, texture.handle, - scaled_rect, type, read_fb_handle, draw_fb_handle); - } } MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64)); -void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, - GLuint draw_fb_handle) { - if (type == SurfaceType::Fill) +void CachedSurface::DownloadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) { + if (params.type == SurfaceType::Fill) return; MICROPROFILE_SCOPE(OpenGL_TextureDL); - if (gl_buffer == nullptr) { - gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format); - gl_buffer.reset(new u8[gl_buffer_size]); - } + gl_buffer.resize(params.width * params.height * GetGLBytesPerPixel(params.pixel_format)); OpenGLState state = OpenGLState::GetCurState(); OpenGLState prev_state = state; SCOPE_EXIT({ prev_state.Apply(); }); - const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type); + const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); // Ensure no bad interactions with GL_PACK_ALIGNMENT - ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); - glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(stride)); - size_t buffer_offset = (rect.bottom * stride + rect.left) * GetGLBytesPerPixel(pixel_format); - - // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush - if (res_scale != 1) { - auto scaled_rect = rect; - scaled_rect.left *= res_scale; - scaled_rect.top *= res_scale; - scaled_rect.right *= res_scale; - scaled_rect.bottom *= res_scale; - - OGLTexture unscaled_tex; - unscaled_tex.Create(); - - MathUtil::Rectangle<u32> unscaled_tex_rect{0, rect.GetHeight(), rect.GetWidth(), 0}; - AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight()); - BlitTextures(texture.handle, scaled_rect, unscaled_tex.handle, unscaled_tex_rect, type, - read_fb_handle, draw_fb_handle); - - state.texture_units[0].texture_2d = unscaled_tex.handle; - state.Apply(); - - glActiveTexture(GL_TEXTURE0); - glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]); - } else { - state.ResetTexture(texture.handle); - state.draw.read_framebuffer = read_fb_handle; - state.Apply(); - - if (type == SurfaceType::ColorTexture) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - texture.handle, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - } else if (type == SurfaceType::Depth) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - texture.handle, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - } else { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - texture.handle, 0); - } - glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom), - static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()), - tuple.format, tuple.type, &gl_buffer[buffer_offset]); - } + ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0); + glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width)); - glPixelStorei(GL_PACK_ROW_LENGTH, 0); -} - -enum class MatchFlags { - None = 0, - Invalid = 1, // Flag that can be applied to other match types, invalid matches require - // validation before they can be used - Exact = 1 << 1, // Surfaces perfectly match - SubRect = 1 << 2, // Surface encompasses params - Copy = 1 << 3, // Surface we can copy from - Expand = 1 << 4, // Surface that can expand params - TexCopy = 1 << 5 // Surface that will match a display transfer "texture copy" parameters -}; - -constexpr MatchFlags operator|(MatchFlags lhs, MatchFlags rhs) { - return static_cast<MatchFlags>(static_cast<int>(lhs) | static_cast<int>(rhs)); -} + const auto& rect{params.GetRect()}; + size_t buffer_offset = + (rect.bottom * params.width + rect.left) * GetGLBytesPerPixel(params.pixel_format); -constexpr MatchFlags operator&(MatchFlags lhs, MatchFlags rhs) { - return static_cast<MatchFlags>(static_cast<int>(lhs) & static_cast<int>(rhs)); -} + state.UnbindTexture(texture.handle); + state.draw.read_framebuffer = read_fb_handle; + state.Apply(); -/// Get the best surface match (and its match type) for the given flags -template <MatchFlags find_flags> -Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, - ScaleMatch match_scale_type, - boost::optional<SurfaceInterval> validate_interval = boost::none) { - Surface match_surface = nullptr; - bool match_valid = false; - u32 match_scale = 0; - SurfaceInterval match_interval{}; - - for (auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { - for (auto& surface : pair.second) { - bool res_scale_matched = match_scale_type == ScaleMatch::Exact - ? (params.res_scale == surface->res_scale) - : (params.res_scale <= surface->res_scale); - // validity will be checked in GetCopyableInterval - bool is_valid = - (find_flags & MatchFlags::Copy) != MatchFlags::None - ? true - : surface->IsRegionValid(validate_interval.value_or(params.GetInterval())); - - if ((find_flags & MatchFlags::Invalid) == MatchFlags::None && !is_valid) - continue; - - auto IsMatch_Helper = [&](auto check_type, auto match_fn) { - if ((find_flags & check_type) == MatchFlags::None) - return; - - bool matched; - SurfaceInterval surface_interval; - std::tie(matched, surface_interval) = match_fn(); - if (!matched) - return; - - if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore && - surface->type != SurfaceType::Fill) - return; - - // Found a match, update only if this is better than the previous one - auto UpdateMatch = [&] { - match_surface = surface; - match_valid = is_valid; - match_scale = surface->res_scale; - match_interval = surface_interval; - }; - - if (surface->res_scale > match_scale) { - UpdateMatch(); - return; - } else if (surface->res_scale < match_scale) { - return; - } - - if (is_valid && !match_valid) { - UpdateMatch(); - return; - } else if (is_valid != match_valid) { - return; - } - - if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) { - UpdateMatch(); - } - }; - IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Exact>{}, [&] { - return std::make_pair(surface->ExactMatch(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::SubRect>{}, [&] { - return std::make_pair(surface->CanSubRect(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Copy>{}, [&] { - auto copy_interval = - params.FromInterval(*validate_interval).GetCopyableInterval(surface); - bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 && - surface->CanCopy(params, copy_interval); - return std::make_pair(matched, copy_interval); - }); - IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Expand>{}, [&] { - return std::make_pair(surface->CanExpand(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::TexCopy>{}, [&] { - return std::make_pair(surface->CanTexCopy(params), surface->GetInterval()); - }); - } + if (params.type == SurfaceType::ColorTexture) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + texture.handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + } else if (params.type == SurfaceType::Depth) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + texture.handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + } else { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + texture.handle, 0); } - return match_surface; + glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom), + static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()), + tuple.format, tuple.type, &gl_buffer[buffer_offset]); + + glPixelStorei(GL_PACK_ROW_LENGTH, 0); } RasterizerCacheOpenGL::RasterizerCacheOpenGL() { read_framebuffer.Create(); draw_framebuffer.Create(); - - attributeless_vao.Create(); - - d24s8_abgr_buffer.Create(); - d24s8_abgr_buffer_size = 0; - - const char* vs_source = R"( -#version 330 core -const vec2 vertices[4] = vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0)); -void main() { - gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0); -} -)"; - const char* fs_source = R"( -#version 330 core - -uniform samplerBuffer tbo; -uniform vec2 tbo_size; -uniform vec4 viewport; - -out vec4 color; - -void main() { - vec2 tbo_coord = (gl_FragCoord.xy - viewport.xy) * tbo_size / viewport.zw; - int tbo_offset = int(tbo_coord.y) * int(tbo_size.x) + int(tbo_coord.x); - color = texelFetch(tbo, tbo_offset).rabg; -} -)"; - d24s8_abgr_shader.CreateFromSource(vs_source, nullptr, fs_source); - - OpenGLState state = OpenGLState::GetCurState(); - GLuint old_program = state.draw.shader_program; - state.draw.shader_program = d24s8_abgr_shader.handle; - state.Apply(); - - GLint tbo_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo"); - ASSERT(tbo_u_id != -1); - glUniform1i(tbo_u_id, 0); - - state.draw.shader_program = old_program; - state.Apply(); - - d24s8_abgr_tbo_size_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo_size"); - ASSERT(d24s8_abgr_tbo_size_u_id != -1); - d24s8_abgr_viewport_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "viewport"); - ASSERT(d24s8_abgr_viewport_u_id != -1); } RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { - FlushAll(); - while (!surface_cache.empty()) - UnregisterSurface(*surface_cache.begin()->second.begin()); -} - -bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface, - const MathUtil::Rectangle<u32>& src_rect, - const Surface& dst_surface, - const MathUtil::Rectangle<u32>& dst_rect) { - if (!SurfaceParams::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) - return false; - - return BlitTextures(src_surface->texture.handle, src_rect, dst_surface->texture.handle, - dst_rect, src_surface->type, read_framebuffer.handle, - draw_framebuffer.handle); -} - -void RasterizerCacheOpenGL::ConvertD24S8toABGR(GLuint src_tex, - const MathUtil::Rectangle<u32>& src_rect, - GLuint dst_tex, - const MathUtil::Rectangle<u32>& dst_rect) { - OpenGLState prev_state = OpenGLState::GetCurState(); - SCOPE_EXIT({ prev_state.Apply(); }); - - OpenGLState state; - state.draw.read_framebuffer = read_framebuffer.handle; - state.draw.draw_framebuffer = draw_framebuffer.handle; - state.Apply(); - - glBindBuffer(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer.handle); - - GLsizeiptr target_pbo_size = src_rect.GetWidth() * src_rect.GetHeight() * 4; - if (target_pbo_size > d24s8_abgr_buffer_size) { - d24s8_abgr_buffer_size = target_pbo_size * 2; - glBufferData(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer_size, nullptr, GL_STREAM_COPY); - } - - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex, - 0); - glReadPixels(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.bottom), - static_cast<GLsizei>(src_rect.GetWidth()), - static_cast<GLsizei>(src_rect.GetHeight()), GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, - 0); - - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - - // PBO now contains src_tex in RABG format - state.draw.shader_program = d24s8_abgr_shader.handle; - state.draw.vertex_array = attributeless_vao.handle; - state.viewport.x = static_cast<GLint>(dst_rect.left); - state.viewport.y = static_cast<GLint>(dst_rect.bottom); - state.viewport.width = static_cast<GLsizei>(dst_rect.GetWidth()); - state.viewport.height = static_cast<GLsizei>(dst_rect.GetHeight()); - state.Apply(); - - OGLTexture tbo; - tbo.Create(); - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_BUFFER, tbo.handle); - glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA8, d24s8_abgr_buffer.handle); - - glUniform2f(d24s8_abgr_tbo_size_u_id, static_cast<GLfloat>(src_rect.GetWidth()), - static_cast<GLfloat>(src_rect.GetHeight())); - glUniform4f(d24s8_abgr_viewport_u_id, static_cast<GLfloat>(state.viewport.x), - static_cast<GLfloat>(state.viewport.y), static_cast<GLfloat>(state.viewport.width), - static_cast<GLfloat>(state.viewport.height)); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - - glBindTexture(GL_TEXTURE_BUFFER, 0); -} - -Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, - bool load_if_create) { - if (params.addr == 0 || params.height * params.width == 0) { - return nullptr; - } - // Use GetSurfaceSubRect instead - ASSERT(params.width == params.stride); - - ASSERT(!params.is_tiled || - (params.GetActualWidth() % 8 == 0 && params.GetActualHeight() % 8 == 0)); - - // Check for an exact match in existing surfaces - Surface surface = - FindMatch<MatchFlags::Exact | MatchFlags::Invalid>(surface_cache, params, match_res_scale); - - if (surface == nullptr) { - u16 target_res_scale = params.res_scale; - if (match_res_scale != ScaleMatch::Exact) { - // This surface may have a subrect of another surface with a higher res_scale, find it - // to adjust our params - SurfaceParams find_params = params; - Surface expandable = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>( - surface_cache, find_params, match_res_scale); - if (expandable != nullptr && expandable->res_scale > target_res_scale) { - target_res_scale = expandable->res_scale; - } - } - SurfaceParams new_params = params; - new_params.res_scale = target_res_scale; - surface = CreateSurface(new_params); - RegisterSurface(surface); - } - - if (load_if_create) { - ValidateSurface(surface, params.addr, params.size); - } - - return surface; -} - -boost::optional<Tegra::GPUVAddr> RasterizerCacheOpenGL::TryFindFramebufferGpuAddress( - VAddr cpu_addr) const { - // Tries to find the GPU address of a framebuffer based on the CPU address. This is because - // final output framebuffers are specified by CPU address, but internally our GPU cache uses GPU - // addresses. We iterate through all cached framebuffers, and compare their starting CPU address - // to the one provided. This is obviously not great, and won't work if the framebuffer overlaps - // surfaces. - - std::vector<Tegra::GPUVAddr> gpu_addresses; - for (const auto& pair : surface_cache) { - for (const auto& surface : pair.second) { - const VAddr surface_cpu_addr = surface->GetCpuAddr(); - if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + surface->size)) { - ASSERT_MSG(cpu_addr == surface_cpu_addr, "overlapping surfaces are unsupported"); - gpu_addresses.push_back(surface->addr); - } - } + while (!surface_cache.empty()) { + UnregisterSurface(surface_cache.begin()->second); } - - if (gpu_addresses.empty()) { - return {}; - } - - ASSERT_MSG(gpu_addresses.size() == 1, ">1 surface is unsupported"); - return gpu_addresses[0]; -} - -SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params, - ScaleMatch match_res_scale, - bool load_if_create) { - if (params.addr == 0 || params.height * params.width == 0) { - return std::make_tuple(nullptr, MathUtil::Rectangle<u32>{}); - } - - // Attempt to find encompassing surface - Surface surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(surface_cache, params, - match_res_scale); - - // Check if FindMatch failed because of res scaling - // If that's the case create a new surface with - // the dimensions of the lower res_scale surface - // to suggest it should not be used again - if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) { - surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(surface_cache, params, - ScaleMatch::Ignore); - if (surface != nullptr) { - ASSERT(surface->res_scale < params.res_scale); - SurfaceParams new_params = *surface; - new_params.res_scale = params.res_scale; - - surface = CreateSurface(new_params); - RegisterSurface(surface); - } - } - - SurfaceParams aligned_params = params; - if (params.is_tiled) { - aligned_params.height = Common::AlignUp(params.height, 8); - aligned_params.width = Common::AlignUp(params.width, 8); - aligned_params.stride = Common::AlignUp(params.stride, 8); - aligned_params.UpdateParams(); - } - - // Check for a surface we can expand before creating a new one - if (surface == nullptr) { - surface = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>(surface_cache, aligned_params, - match_res_scale); - if (surface != nullptr) { - aligned_params.width = aligned_params.stride; - aligned_params.UpdateParams(); - - SurfaceParams new_params = *surface; - new_params.addr = std::min(aligned_params.addr, surface->addr); - new_params.end = std::max(aligned_params.end, surface->end); - new_params.size = new_params.end - new_params.addr; - new_params.height = static_cast<u32>( - new_params.size / aligned_params.BytesInPixels(aligned_params.stride)); - ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0); - - Surface new_surface = CreateSurface(new_params); - DuplicateSurface(surface, new_surface); - - // Delete the expanded surface, this can't be done safely yet - // because it may still be in use - remove_surfaces.emplace(surface); - - surface = new_surface; - RegisterSurface(new_surface); - } - } - - // No subrect found - create and return a new surface - if (surface == nullptr) { - SurfaceParams new_params = aligned_params; - // Can't have gaps in a surface - new_params.width = aligned_params.stride; - new_params.UpdateParams(); - // GetSurface will create the new surface and possibly adjust res_scale if necessary - surface = GetSurface(new_params, match_res_scale, load_if_create); - } else if (load_if_create) { - ValidateSurface(surface, aligned_params.addr, aligned_params.size); - } - - return std::make_tuple(surface, surface->GetScaledSubRect(params)); } Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) { - auto& gpu = Core::System::GetInstance().GPU(); - - SurfaceParams params; - params.addr = config.tic.Address(); - params.is_tiled = config.tic.IsTiled(); - params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format); - - params.width = Common::AlignUp(config.tic.Width(), params.GetCompresssionFactor()) / - params.GetCompresssionFactor(); - params.height = Common::AlignUp(config.tic.Height(), params.GetCompresssionFactor()) / - params.GetCompresssionFactor(); - - // TODO(Subv): Different types per component are not supported. - ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() && - config.tic.r_type.Value() == config.tic.b_type.Value() && - config.tic.r_type.Value() == config.tic.a_type.Value()); - - params.component_type = SurfaceParams::ComponentTypeFromTexture(config.tic.r_type.Value()); - - if (config.tic.IsTiled()) { - params.block_height = config.tic.BlockHeight(); - params.width = Common::AlignUp(params.width, params.block_height); - params.height = Common::AlignUp(params.height, params.block_height); - } else { - // Use the texture-provided stride value if the texture isn't tiled. - params.stride = static_cast<u32>(params.PixelsInBytes(config.tic.Pitch())); - } - - params.UpdateParams(); - - if (params.GetActualWidth() % 8 != 0 || params.GetActualHeight() % 8 != 0 || - params.stride != params.width) { - Surface src_surface; - MathUtil::Rectangle<u32> rect; - std::tie(src_surface, rect) = GetSurfaceSubRect(params, ScaleMatch::Ignore, true); - - rect = rect.Scale(params.GetCompresssionFactor()); - - params.res_scale = src_surface->res_scale; - Surface tmp_surface = CreateSurface(params); - - auto dst_rect = tmp_surface->GetScaledRect().Scale(params.GetCompresssionFactor()); - BlitTextures(src_surface->texture.handle, rect, tmp_surface->texture.handle, dst_rect, - SurfaceParams::GetFormatType(params.pixel_format), read_framebuffer.handle, - draw_framebuffer.handle); - - remove_surfaces.emplace(tmp_surface); - return tmp_surface; - } - - return GetSurface(params, ScaleMatch::Ignore, true); + return GetSurface(SurfaceParams::CreateForTexture(config)); } SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport) { const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; - const auto& config = regs.rt[0]; // TODO(bunnei): This is hard corded to use just the first render buffer - NGLOG_WARNING(Render_OpenGL, "hard-coded for render target 0!"); - - // update resolution_scale_factor and reset cache if changed - // TODO (bunnei): This code was ported as-is from Citra, and is technically not thread-safe. We - // need to fix this before making the renderer multi-threaded. - static u16 resolution_scale_factor = GetResolutionScaleFactor(); - if (resolution_scale_factor != GetResolutionScaleFactor()) { - resolution_scale_factor = GetResolutionScaleFactor(); - FlushAll(); - while (!surface_cache.empty()) - UnregisterSurface(*surface_cache.begin()->second.begin()); - } - - MathUtil::Rectangle<u32> viewport_clamped{ - static_cast<u32>(std::clamp(viewport.left, 0, static_cast<s32>(config.width))), - static_cast<u32>(std::clamp(viewport.top, 0, static_cast<s32>(config.height))), - static_cast<u32>(std::clamp(viewport.right, 0, static_cast<s32>(config.width))), - static_cast<u32>(std::clamp(viewport.bottom, 0, static_cast<s32>(config.height)))}; + LOG_WARNING(Render_OpenGL, "hard-coded for render target 0!"); // get color and depth surfaces - SurfaceParams color_params; - color_params.is_tiled = true; - color_params.res_scale = resolution_scale_factor; - color_params.width = config.width; - color_params.height = config.height; - // TODO(Subv): Can framebuffers use a different block height? - color_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; - SurfaceParams depth_params = color_params; - - color_params.addr = config.Address(); - color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format); - color_params.component_type = SurfaceParams::ComponentTypeFromRenderTarget(config.format); - color_params.UpdateParams(); - - ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented"); - // depth_params.addr = config.GetDepthBufferPhysicalAddress(); - // depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format); - // depth_params.UpdateParams(); - - auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); - auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); - - // Make sure that framebuffers don't overlap if both color and depth are being used - if (using_color_fb && using_depth_fb && - boost::icl::length(color_vp_interval & depth_vp_interval)) { - NGLOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " - "overlapping framebuffers not supported!"); - using_depth_fb = false; + SurfaceParams color_params{}; + SurfaceParams depth_params{}; + + if (using_color_fb) { + color_params = SurfaceParams::CreateForFramebuffer(regs.rt[0]); + } + + if (using_depth_fb) { + depth_params = + SurfaceParams::CreateForDepthBuffer(regs.rt[0], regs.zeta.Address(), regs.zeta.format); } MathUtil::Rectangle<u32> color_rect{}; - Surface color_surface = nullptr; - if (using_color_fb) - std::tie(color_surface, color_rect) = - GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); + Surface color_surface; + if (using_color_fb) { + color_surface = GetSurface(color_params); + if (color_surface) { + color_rect = color_surface->GetSurfaceParams().GetRect(); + } + } MathUtil::Rectangle<u32> depth_rect{}; - Surface depth_surface = nullptr; - if (using_depth_fb) - std::tie(depth_surface, depth_rect) = - GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); + Surface depth_surface; + if (using_depth_fb) { + depth_surface = GetSurface(depth_params); + if (depth_surface) { + depth_rect = depth_surface->GetSurfaceParams().GetRect(); + } + } MathUtil::Rectangle<u32> fb_rect{}; - if (color_surface != nullptr && depth_surface != nullptr) { + if (color_surface && depth_surface) { fb_rect = color_rect; // Color and Depth surfaces must have the same dimensions and offsets if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top || color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) { - color_surface = GetSurface(color_params, ScaleMatch::Exact, false); - depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); - fb_rect = color_surface->GetScaledRect(); + color_surface = GetSurface(color_params); + depth_surface = GetSurface(depth_params); + fb_rect = color_surface->GetSurfaceParams().GetRect(); } - } else if (color_surface != nullptr) { + } else if (color_surface) { fb_rect = color_rect; - } else if (depth_surface != nullptr) { + } else if (depth_surface) { fb_rect = depth_rect; } - if (color_surface != nullptr) { - ValidateSurface(color_surface, boost::icl::first(color_vp_interval), - boost::icl::length(color_vp_interval)); - } - if (depth_surface != nullptr) { - ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval), - boost::icl::length(depth_vp_interval)); - } - return std::make_tuple(color_surface, depth_surface, fb_rect); } -Surface RasterizerCacheOpenGL::GetFillSurface(const void* config) { - UNREACHABLE(); - return {}; +void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { + surface->LoadGLBuffer(); + surface->UploadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); } -SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& params) { - MathUtil::Rectangle<u32> rect{}; +void RasterizerCacheOpenGL::MarkSurfaceAsDirty(const Surface& surface) { + if (Settings::values.use_accurate_framebuffers) { + // If enabled, always flush dirty surfaces + surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle); + surface->FlushGLBuffer(); + } else { + // Otherwise, don't mark surfaces that we write to as cached, because the resulting loads + // and flushes are very slow and do not seem to improve accuracy + const auto& params{surface->GetSurfaceParams()}; + Memory::RasterizerMarkRegionCached(params.addr, params.size_in_bytes, false); + } +} - Surface match_surface = FindMatch<MatchFlags::TexCopy | MatchFlags::Invalid>( - surface_cache, params, ScaleMatch::Ignore); +Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { + if (params.addr == 0 || params.height * params.width == 0) { + return {}; + } - if (match_surface != nullptr) { - ValidateSurface(match_surface, params.addr, params.size); + const auto& gpu = Core::System::GetInstance().GPU(); + // Don't try to create any entries in the cache if the address of the texture is invalid. + if (gpu.memory_manager->GpuToCpuAddress(params.addr) == boost::none) + return {}; - SurfaceParams match_subrect; - if (params.width != params.stride) { - const u32 tiled_size = match_surface->is_tiled ? 8 : 1; - match_subrect = params; - match_subrect.width = - static_cast<u32>(match_surface->PixelsInBytes(params.width) / tiled_size); - match_subrect.stride = - static_cast<u32>(match_surface->PixelsInBytes(params.stride) / tiled_size); - match_subrect.height *= tiled_size; - } else { - match_subrect = match_surface->FromInterval(params.GetInterval()); - ASSERT(match_subrect.GetInterval() == params.GetInterval()); + // Check for an exact match in existing surfaces + const auto& surface_key{SurfaceKey::Create(params)}; + const auto& search{surface_cache.find(surface_key)}; + Surface surface; + if (search != surface_cache.end()) { + surface = search->second; + if (Settings::values.use_accurate_framebuffers) { + // Reload the surface from Switch memory + LoadSurface(surface); } - - rect = match_surface->GetScaledSubRect(match_subrect); + } else { + surface = std::make_shared<CachedSurface>(params); + RegisterSurface(surface); + LoadSurface(surface); } - return std::make_tuple(match_surface, rect); + return surface; } -void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface, - const Surface& dest_surface) { - ASSERT(dest_surface->addr <= src_surface->addr && dest_surface->end >= src_surface->end); - - BlitSurfaces(src_surface, src_surface->GetScaledRect(), dest_surface, - dest_surface->GetScaledSubRect(*src_surface)); - - dest_surface->invalid_regions -= src_surface->GetInterval(); - dest_surface->invalid_regions += src_surface->invalid_regions; - - SurfaceRegions regions; - for (auto& pair : RangeFromInterval(dirty_regions, src_surface->GetInterval())) { - if (pair.second == src_surface) { - regions += pair.first; +Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr cpu_addr) const { + // Tries to find the GPU address of a framebuffer based on the CPU address. This is because + // final output framebuffers are specified by CPU address, but internally our GPU cache uses + // GPU addresses. We iterate through all cached framebuffers, and compare their starting CPU + // address to the one provided. This is obviously not great, and won't work if the + // framebuffer overlaps surfaces. + + std::vector<Surface> surfaces; + for (const auto& surface : surface_cache) { + const auto& params = surface.second->GetSurfaceParams(); + const VAddr surface_cpu_addr = params.GetCpuAddr(); + if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + params.size_in_bytes)) { + ASSERT_MSG(cpu_addr == surface_cpu_addr, "overlapping surfaces are unsupported"); + surfaces.push_back(surface.second); } } - for (auto& interval : regions) { - dirty_regions.set({interval, dest_surface}); - } -} -void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr, - u64 size) { - if (size == 0) - return; - - const SurfaceInterval validate_interval(addr, addr + size); - - if (surface->type == SurfaceType::Fill) { - // Sanity check, fill surfaces will always be valid when used - ASSERT(surface->IsRegionValid(validate_interval)); - return; + if (surfaces.empty()) { + return {}; } - while (true) { - const auto it = surface->invalid_regions.find(validate_interval); - if (it == surface->invalid_regions.end()) - break; - - const auto interval = *it & validate_interval; - // Look for a valid surface to copy from - SurfaceParams params = surface->FromInterval(interval); - - Surface copy_surface = - FindMatch<MatchFlags::Copy>(surface_cache, params, ScaleMatch::Ignore, interval); - if (copy_surface != nullptr) { - SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface); - CopySurface(copy_surface, surface, copy_interval); - surface->invalid_regions.erase(copy_interval); - continue; - } + ASSERT_MSG(surfaces.size() == 1, ">1 surface is unsupported"); - // Load data from Switch memory - FlushRegion(params.addr, params.size); - surface->LoadGLBuffer(params.addr, params.end); - surface->UploadGLTexture(surface->GetSubRect(params), read_framebuffer.handle, - draw_framebuffer.handle); - surface->invalid_regions.erase(params.GetInterval()); - } + return surfaces[0]; } -void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface) { - if (size == 0) - return; - - const SurfaceInterval flush_interval(addr, addr + size); - SurfaceRegions flushed_intervals; - - for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { - // small sizes imply that this most likely comes from the cpu, flush the entire region - // the point is to avoid thousands of small writes every frame if the cpu decides to access - // that region, anything higher than 8 you're guaranteed it comes from a service - const auto interval = size <= 8 ? pair.first : pair.first & flush_interval; - auto& surface = pair.second; - - if (flush_surface != nullptr && surface != flush_surface) - continue; +void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr /*addr*/, size_t /*size*/) { + // TODO(bunnei): This is unused in the current implementation of the rasterizer cache. We should + // probably implement this in the future, but for now, the `use_accurate_framebufers` setting + // can be used to always flush. +} - // Sanity check, this surface is the last one that marked this region dirty - ASSERT(surface->IsRegionValid(interval)); +void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) { + for (const auto& pair : surface_cache) { + const auto& surface{pair.second}; + const auto& params{surface->GetSurfaceParams()}; - if (surface->type != SurfaceType::Fill) { - SurfaceParams params = surface->FromInterval(interval); - surface->DownloadGLTexture(surface->GetSubRect(params), read_framebuffer.handle, - draw_framebuffer.handle); + if (params.IsOverlappingRegion(addr, size)) { + UnregisterSurface(surface); } - surface->FlushGLBuffer(boost::icl::first(interval), boost::icl::last_next(interval)); - flushed_intervals += interval; } - // Reset dirty regions - dirty_regions -= flushed_intervals; } -void RasterizerCacheOpenGL::FlushAll() { - FlushRegion(0, Kernel::VMManager::MAX_ADDRESS); -} +void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { + const auto& params{surface->GetSurfaceParams()}; + const auto& surface_key{SurfaceKey::Create(params)}; + const auto& search{surface_cache.find(surface_key)}; -void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size, - const Surface& region_owner) { - if (size == 0) + if (search != surface_cache.end()) { + // Registered already return; - - const SurfaceInterval invalid_interval(addr, addr + size); - - if (region_owner != nullptr) { - ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end); - // Surfaces can't have a gap - ASSERT(region_owner->width == region_owner->stride); - region_owner->invalid_regions.erase(invalid_interval); - } - - for (auto& pair : RangeFromInterval(surface_cache, invalid_interval)) { - for (auto& cached_surface : pair.second) { - if (cached_surface == region_owner) - continue; - - // If cpu is invalidating this region we want to remove it - // to (likely) mark the memory pages as uncached - if (region_owner == nullptr && size <= 8) { - FlushRegion(cached_surface->addr, cached_surface->size, cached_surface); - remove_surfaces.emplace(cached_surface); - continue; - } - - const auto interval = cached_surface->GetInterval() & invalid_interval; - cached_surface->invalid_regions.insert(interval); - - // Remove only "empty" fill surfaces to avoid destroying and recreating OGL textures - if (cached_surface->type == SurfaceType::Fill && - cached_surface->IsSurfaceFullyInvalid()) { - remove_surfaces.emplace(cached_surface); - } - } } - if (region_owner != nullptr) - dirty_regions.set({invalid_interval, region_owner}); - else - dirty_regions.erase(invalid_interval); - - for (auto& remove_surface : remove_surfaces) { - if (remove_surface == region_owner) { - Surface expanded_surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>( - surface_cache, *region_owner, ScaleMatch::Ignore); - ASSERT(expanded_surface); - - if ((region_owner->invalid_regions - expanded_surface->invalid_regions).empty()) { - DuplicateSurface(region_owner, expanded_surface); - } else { - continue; - } - } - UnregisterSurface(remove_surface); - } - - remove_surfaces.clear(); + surface_cache[surface_key] = surface; + UpdatePagesCachedCount(params.addr, params.size_in_bytes, 1); } -Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { - Surface surface = std::make_shared<CachedSurface>(); - static_cast<SurfaceParams&>(*surface) = params; - - surface->texture.Create(); - - surface->gl_buffer_size = 0; - surface->invalid_regions.insert(surface->GetInterval()); - AllocateSurfaceTexture(surface->texture.handle, - GetFormatTuple(surface->pixel_format, surface->component_type), - surface->GetScaledWidth(), surface->GetScaledHeight()); - - return surface; -} +void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { + const auto& params{surface->GetSurfaceParams()}; + const auto& surface_key{SurfaceKey::Create(params)}; + const auto& search{surface_cache.find(surface_key)}; -void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { - if (surface->registered) { + if (search == surface_cache.end()) { + // Unregistered already return; } - surface->registered = true; - surface_cache.add({surface->GetInterval(), SurfaceSet{surface}}); - UpdatePagesCachedCount(surface->addr, surface->size, 1); + + UpdatePagesCachedCount(params.addr, params.size_in_bytes, -1); + surface_cache.erase(search); } -void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { - if (!surface->registered) { - return; - } - surface->registered = false; - UpdatePagesCachedCount(surface->addr, surface->size, -1); - surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); +template <typename Map, typename Interval> +constexpr auto RangeFromInterval(Map& map, const Interval& interval) { + return boost::make_iterator_range(map.equal_range(interval)); } void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 0f43e863d..1bedae992 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -1,57 +1,26 @@ -// Copyright 2015 Citra Emulator Project +// Copyright 2018 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #pragma once #include <array> +#include <map> #include <memory> -#include <set> -#include <tuple> -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-local-typedefs" -#endif +#include <vector> #include <boost/icl/interval_map.hpp> -#include <boost/icl/interval_set.hpp> -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif -#include <boost/optional.hpp> -#include <glad/glad.h> -#include "common/assert.h" -#include "common/common_funcs.h" #include "common/common_types.h" +#include "common/hash.h" #include "common/math_util.h" -#include "video_core/gpu.h" -#include "video_core/memory_manager.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/textures/texture.h" -struct CachedSurface; +class CachedSurface; using Surface = std::shared_ptr<CachedSurface>; -using SurfaceSet = std::set<Surface>; - -using SurfaceRegions = boost::icl::interval_set<Tegra::GPUVAddr>; -using SurfaceMap = boost::icl::interval_map<Tegra::GPUVAddr, Surface>; -using SurfaceCache = boost::icl::interval_map<Tegra::GPUVAddr, SurfaceSet>; - -using SurfaceInterval = SurfaceCache::interval_type; -static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval_type>() && - std::is_same<SurfaceMap::interval_type, SurfaceCache::interval_type>(), - "incorrect interval types"); - -using SurfaceRect_Tuple = std::tuple<Surface, MathUtil::Rectangle<u32>>; using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; - using PageMap = boost::icl::interval_map<u64, int>; -enum class ScaleMatch { - Exact, // only accept same res scale - Upscale, // only allow higher scale than params - Ignore // accept every scaled res -}; - struct SurfaceParams { enum class PixelFormat { ABGR8 = 0, @@ -61,12 +30,24 @@ struct SurfaceParams { R8 = 4, RGBA16F = 5, R11FG11FB10F = 6, - DXT1 = 7, - DXT23 = 8, - DXT45 = 9, - DXN1 = 10, // This is also known as BC4 + RGBA32UI = 7, + DXT1 = 8, + DXT23 = 9, + DXT45 = 10, + DXN1 = 11, // This is also known as BC4 + BC7U = 12, + ASTC_2D_4X4 = 13, - Max, + MaxColorFormat, + + // DepthStencil formats + Z24S8 = 14, + S8Z24 = 15, + Z32F = 16, + + MaxDepthStencilFormat, + + Max = MaxDepthStencilFormat, Invalid = 255, }; @@ -92,10 +73,10 @@ struct SurfaceParams { /** * Gets the compression factor for the specified PixelFormat. This applies to just the * "compressed width" and "compressed height", not the overall compression factor of a - * compressed image. This is used for maintaining proper surface sizes for compressed texture - * formats. + * compressed image. This is used for maintaining proper surface sizes for compressed + * texture formats. */ - static constexpr u32 GetCompresssionFactor(PixelFormat format) { + static constexpr u32 GetCompressionFactor(PixelFormat format) { if (format == PixelFormat::Invalid) return 0; @@ -107,18 +88,21 @@ struct SurfaceParams { 1, // R8 1, // RGBA16F 1, // R11FG11FB10F + 1, // RGBA32UI 4, // DXT1 4, // DXT23 4, // DXT45 4, // DXN1 + 4, // BC7U + 4, // ASTC_2D_4X4 + 1, // Z24S8 + 1, // S8Z24 + 1, // Z32F }}; ASSERT(static_cast<size_t>(format) < compression_factor_table.size()); return compression_factor_table[static_cast<size_t>(format)]; } - u32 GetCompresssionFactor() const { - return GetCompresssionFactor(pixel_format); - } static constexpr u32 GetFormatBpp(PixelFormat format) { if (format == PixelFormat::Invalid) @@ -132,10 +116,16 @@ struct SurfaceParams { 8, // R8 64, // RGBA16F 32, // R11FG11FB10F + 128, // RGBA32UI 64, // DXT1 128, // DXT23 128, // DXT45 64, // DXN1 + 128, // BC7U + 32, // ASTC_2D_4X4 + 32, // Z24S8 + 32, // S8Z24 + 32, // Z32F }}; ASSERT(static_cast<size_t>(format) < bpp_table.size()); @@ -145,6 +135,20 @@ struct SurfaceParams { return GetFormatBpp(pixel_format); } + static PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format) { + switch (format) { + case Tegra::DepthFormat::S8_Z24_UNORM: + return PixelFormat::S8Z24; + case Tegra::DepthFormat::Z24_S8_UNORM: + return PixelFormat::Z24S8; + case Tegra::DepthFormat::Z32_FLOAT: + return PixelFormat::Z32F; + default: + LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); + UNREACHABLE(); + } + } + static PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { switch (format) { case Tegra::RenderTargetFormat::RGBA8_UNORM: @@ -156,18 +160,10 @@ struct SurfaceParams { return PixelFormat::RGBA16F; case Tegra::RenderTargetFormat::R11G11B10_FLOAT: return PixelFormat::R11FG11FB10F; + case Tegra::RenderTargetFormat::RGBA32_UINT: + return PixelFormat::RGBA32UI; default: - NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); - UNREACHABLE(); - } - } - - static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { - switch (format) { - case Tegra::FramebufferConfig::PixelFormat::ABGR8: - return PixelFormat::ABGR8; - default: - NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); + LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); } } @@ -189,6 +185,8 @@ struct SurfaceParams { return PixelFormat::RGBA16F; case Tegra::Texture::TextureFormat::BF10GF11RF11: return PixelFormat::R11FG11FB10F; + case Tegra::Texture::TextureFormat::R32_G32_B32_A32: + return PixelFormat::RGBA32UI; case Tegra::Texture::TextureFormat::DXT1: return PixelFormat::DXT1; case Tegra::Texture::TextureFormat::DXT23: @@ -197,8 +195,12 @@ struct SurfaceParams { return PixelFormat::DXT45; case Tegra::Texture::TextureFormat::DXN1: return PixelFormat::DXN1; + case Tegra::Texture::TextureFormat::BC7U: + return PixelFormat::BC7U; + case Tegra::Texture::TextureFormat::ASTC_2D_4X4: + return PixelFormat::ASTC_2D_4X4; default: - NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); + LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); } } @@ -220,6 +222,8 @@ struct SurfaceParams { return Tegra::Texture::TextureFormat::R16_G16_B16_A16; case PixelFormat::R11FG11FB10F: return Tegra::Texture::TextureFormat::BF10GF11RF11; + case PixelFormat::RGBA32UI: + return Tegra::Texture::TextureFormat::R32_G32_B32_A32; case PixelFormat::DXT1: return Tegra::Texture::TextureFormat::DXT1; case PixelFormat::DXT23: @@ -228,6 +232,23 @@ struct SurfaceParams { return Tegra::Texture::TextureFormat::DXT45; case PixelFormat::DXN1: return Tegra::Texture::TextureFormat::DXN1; + case PixelFormat::BC7U: + return Tegra::Texture::TextureFormat::BC7U; + case PixelFormat::ASTC_2D_4X4: + return Tegra::Texture::TextureFormat::ASTC_2D_4X4; + default: + UNREACHABLE(); + } + } + + static Tegra::DepthFormat DepthFormatFromPixelFormat(PixelFormat format) { + switch (format) { + case PixelFormat::S8Z24: + return Tegra::DepthFormat::S8_Z24_UNORM; + case PixelFormat::Z24S8: + return Tegra::DepthFormat::Z24_S8_UNORM; + case PixelFormat::Z32F: + return Tegra::DepthFormat::Z32_FLOAT; default: UNREACHABLE(); } @@ -239,7 +260,7 @@ struct SurfaceParams { case Tegra::Texture::ComponentType::UNORM: return ComponentType::UNorm; default: - NGLOG_CRITICAL(HW_GPU, "Unimplemented component type={}", static_cast<u32>(type)); + LOG_CRITICAL(HW_GPU, "Unimplemented component type={}", static_cast<u32>(type)); UNREACHABLE(); } } @@ -254,215 +275,153 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::RGBA16_FLOAT: case Tegra::RenderTargetFormat::R11G11B10_FLOAT: return ComponentType::Float; + case Tegra::RenderTargetFormat::RGBA32_UINT: + return ComponentType::UInt; default: - NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); + LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); } } - static ComponentType ComponentTypeFromGPUPixelFormat( - Tegra::FramebufferConfig::PixelFormat format) { + static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { switch (format) { case Tegra::FramebufferConfig::PixelFormat::ABGR8: - return ComponentType::UNorm; + return PixelFormat::ABGR8; default: - NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); + LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); } } - static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) { - SurfaceType a_type = GetFormatType(pixel_format_a); - SurfaceType b_type = GetFormatType(pixel_format_b); - - if (a_type == SurfaceType::ColorTexture && b_type == SurfaceType::ColorTexture) { - return true; - } - - if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) { - return true; - } - - if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) { - return true; + static ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format) { + switch (format) { + case Tegra::DepthFormat::S8_Z24_UNORM: + case Tegra::DepthFormat::Z24_S8_UNORM: + return ComponentType::UNorm; + case Tegra::DepthFormat::Z32_FLOAT: + return ComponentType::Float; + default: + LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); + UNREACHABLE(); } - - return false; } static SurfaceType GetFormatType(PixelFormat pixel_format) { - if (static_cast<size_t>(pixel_format) < MaxPixelFormat) { + if (static_cast<size_t>(pixel_format) < static_cast<size_t>(PixelFormat::MaxColorFormat)) { return SurfaceType::ColorTexture; } + if (static_cast<size_t>(pixel_format) < + static_cast<size_t>(PixelFormat::MaxDepthStencilFormat)) { + return SurfaceType::DepthStencil; + } + // TODO(Subv): Implement the other formats ASSERT(false); return SurfaceType::Invalid; } - /// Update the params "size", "end" and "type" from the already set "addr", "width", "height" - /// and "pixel_format" - void UpdateParams() { - if (stride == 0) { - stride = width; - } - type = GetFormatType(pixel_format); - size = !is_tiled ? BytesInPixels(stride * (height - 1) + width) - : BytesInPixels(stride * 8 * (height / 8 - 1) + width * 8); - end = addr + size; - } - - SurfaceInterval GetInterval() const { - return SurfaceInterval::right_open(addr, end); - } - - // Returns the outer rectangle containing "interval" - SurfaceParams FromInterval(SurfaceInterval interval) const; - - SurfaceInterval GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const; - - // Returns the region of the biggest valid rectange within interval - SurfaceInterval GetCopyableInterval(const Surface& src_surface) const; - - /** - * Gets the actual width (in pixels) of the surface. This is provided because `width` is used - * for tracking the surface region in memory, which may be compressed for certain formats. In - * this scenario, `width` is actually the compressed width. - */ - u32 GetActualWidth() const { - return width * GetCompresssionFactor(); - } - - /** - * Gets the actual height (in pixels) of the surface. This is provided because `height` is used - * for tracking the surface region in memory, which may be compressed for certain formats. In - * this scenario, `height` is actually the compressed height. - */ - u32 GetActualHeight() const { - return height * GetCompresssionFactor(); - } + /// Returns the rectangle corresponding to this surface + MathUtil::Rectangle<u32> GetRect() const; - u32 GetScaledWidth() const { - return width * res_scale; + /// Returns the size of this surface in bytes, adjusted for compression + size_t SizeInBytes() const { + const u32 compression_factor{GetCompressionFactor(pixel_format)}; + ASSERT(width % compression_factor == 0); + ASSERT(height % compression_factor == 0); + return (width / compression_factor) * (height / compression_factor) * + GetFormatBpp(pixel_format) / CHAR_BIT; } - u32 GetScaledHeight() const { - return height * res_scale; - } + /// Returns the CPU virtual address for this surface + VAddr GetCpuAddr() const; - MathUtil::Rectangle<u32> GetRect() const { - return {0, height, width, 0}; + /// Returns true if the specified region overlaps with this surface's region in Switch memory + bool IsOverlappingRegion(Tegra::GPUVAddr region_addr, size_t region_size) const { + return addr <= (region_addr + region_size) && region_addr <= (addr + size_in_bytes); } - MathUtil::Rectangle<u32> GetScaledRect() const { - return {0, GetScaledHeight(), GetScaledWidth(), 0}; - } + /// Creates SurfaceParams from a texture configuration + static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config); + + /// Creates SurfaceParams from a framebuffer configuration + static SurfaceParams CreateForFramebuffer( + const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config); + + /// Creates SurfaceParams for a depth buffer configuration + static SurfaceParams CreateForDepthBuffer( + const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config, + Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format); + + Tegra::GPUVAddr addr; + bool is_tiled; + u32 block_height; + PixelFormat pixel_format; + ComponentType component_type; + SurfaceType type; + u32 width; + u32 height; + u32 unaligned_height; + size_t size_in_bytes; +}; - u64 PixelsInBytes(u64 size) const { - return size * CHAR_BIT / GetFormatBpp(pixel_format); +/// Hashable variation of SurfaceParams, used for a key in the surface cache +struct SurfaceKey : Common::HashableStruct<SurfaceParams> { + static SurfaceKey Create(const SurfaceParams& params) { + SurfaceKey res; + res.state = params; + return res; } +}; - u64 BytesInPixels(u64 pixels) const { - return pixels * GetFormatBpp(pixel_format) / CHAR_BIT; +namespace std { +template <> +struct hash<SurfaceKey> { + size_t operator()(const SurfaceKey& k) const { + return k.Hash(); } - - VAddr GetCpuAddr() const; - - bool ExactMatch(const SurfaceParams& other_surface) const; - bool CanSubRect(const SurfaceParams& sub_surface) const; - bool CanExpand(const SurfaceParams& expanded_surface) const; - bool CanTexCopy(const SurfaceParams& texcopy_params) const; - - MathUtil::Rectangle<u32> GetSubRect(const SurfaceParams& sub_surface) const; - MathUtil::Rectangle<u32> GetScaledSubRect(const SurfaceParams& sub_surface) const; - - Tegra::GPUVAddr addr = 0; - Tegra::GPUVAddr end = 0; - boost::optional<VAddr> cpu_addr; - u64 size = 0; - - u32 width = 0; - u32 height = 0; - u32 stride = 0; - u32 block_height = 0; - u16 res_scale = 1; - - bool is_tiled = false; - PixelFormat pixel_format = PixelFormat::Invalid; - SurfaceType type = SurfaceType::Invalid; - ComponentType component_type = ComponentType::Invalid; }; +} // namespace std -struct CachedSurface : SurfaceParams { - bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const; - bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const; - - bool IsRegionValid(SurfaceInterval interval) const { - return (invalid_regions.find(interval) == invalid_regions.end()); - } +class CachedSurface final { +public: + CachedSurface(const SurfaceParams& params); - bool IsSurfaceFullyInvalid() const { - return (invalid_regions & GetInterval()) == SurfaceRegions(GetInterval()); + const OGLTexture& Texture() const { + return texture; } - bool registered = false; - SurfaceRegions invalid_regions; - - u64 fill_size = 0; /// Number of bytes to read from fill_data - std::array<u8, 4> fill_data; - - OGLTexture texture; - - static constexpr unsigned int GetGLBytesPerPixel(PixelFormat format) { - if (format == PixelFormat::Invalid) + static constexpr unsigned int GetGLBytesPerPixel(SurfaceParams::PixelFormat format) { + if (format == SurfaceParams::PixelFormat::Invalid) return 0; return SurfaceParams::GetFormatBpp(format) / CHAR_BIT; } - std::unique_ptr<u8[]> gl_buffer; - size_t gl_buffer_size = 0; + const SurfaceParams& GetSurfaceParams() const { + return params; + } // Read/Write data in Switch memory to/from gl_buffer - void LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end); - void FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end); + void LoadGLBuffer(); + void FlushGLBuffer(); // Upload/Download data in gl_buffer in/to this surface's texture - void UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, - GLuint draw_fb_handle); - void DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, - GLuint draw_fb_handle); + void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle); + void DownloadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle); + +private: + OGLTexture texture; + std::vector<u8> gl_buffer; + SurfaceParams params; }; -class RasterizerCacheOpenGL : NonCopyable { +class RasterizerCacheOpenGL final : NonCopyable { public: RasterizerCacheOpenGL(); ~RasterizerCacheOpenGL(); - /// Blit one surface's texture to another - bool BlitSurfaces(const Surface& src_surface, const MathUtil::Rectangle<u32>& src_rect, - const Surface& dst_surface, const MathUtil::Rectangle<u32>& dst_rect); - - void ConvertD24S8toABGR(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, - GLuint dst_tex, const MathUtil::Rectangle<u32>& dst_rect); - - /// Copy one surface's region to another - void CopySurface(const Surface& src_surface, const Surface& dst_surface, - SurfaceInterval copy_interval); - - /// Load a texture from Switch memory to OpenGL and cache it (if not already cached) - Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, - bool load_if_create); - - /// Tries to find a framebuffer GPU address based on the provided CPU address - boost::optional<Tegra::GPUVAddr> TryFindFramebufferGpuAddress(VAddr cpu_addr) const; - - /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from - /// Switch memory to OpenGL and caches it (if not already cached) - SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, - bool load_if_create); - /// Get a surface based on the texture configuration Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); @@ -470,29 +429,21 @@ public: SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport); - /// Get a surface that matches the fill config - Surface GetFillSurface(const void* config); + /// Marks the specified surface as "dirty", in that it is out of sync with Switch memory + void MarkSurfaceAsDirty(const Surface& surface); - /// Get a surface that matches a "texture copy" display transfer config - SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); + /// Tries to find a framebuffer GPU address based on the provided CPU address + Surface TryFindFramebufferSurface(VAddr cpu_addr) const; /// Write any cached resources overlapping the region back to memory (if dirty) - void FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface = nullptr); - - /// Mark region as being invalidated by region_owner (nullptr if Switch memory) - void InvalidateRegion(Tegra::GPUVAddr addr, u64 size, const Surface& region_owner); + void FlushRegion(Tegra::GPUVAddr addr, size_t size); - /// Flush all cached resources tracked by this cache manager - void FlushAll(); + /// Mark the specified region as being invalidated + void InvalidateRegion(Tegra::GPUVAddr addr, size_t size); private: - void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface); - - /// Update surface's texture for given region when necessary - void ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr, u64 size); - - /// Create a new surface - Surface CreateSurface(const SurfaceParams& params); + void LoadSurface(const Surface& surface); + Surface GetSurface(const SurfaceParams& params); /// Register surface into the cache void RegisterSurface(const Surface& surface); @@ -503,18 +454,9 @@ private: /// Increase/decrease the number of surface in pages touching the specified region void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta); - SurfaceCache surface_cache; + std::unordered_map<SurfaceKey, Surface> surface_cache; PageMap cached_pages; - SurfaceMap dirty_regions; - SurfaceSet remove_surfaces; OGLFramebuffer read_framebuffer; OGLFramebuffer draw_framebuffer; - - OGLVertexArray attributeless_vao; - OGLBuffer d24s8_abgr_buffer; - GLsizeiptr d24s8_abgr_buffer_size; - OGLProgram d24s8_abgr_shader; - GLint d24s8_abgr_tbo_size_u_id; - GLint d24s8_abgr_viewport_u_id; }; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 93f9172e7..0fed93ca5 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -38,7 +38,7 @@ public: if (handle == 0) return; glDeleteTextures(1, &handle); - OpenGLState::GetCurState().ResetTexture(handle).Apply(); + OpenGLState::GetCurState().UnbindTexture(handle).Apply(); handle = 0; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 67726e7c6..5914077e8 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -9,6 +9,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" namespace GLShader { @@ -16,6 +17,7 @@ namespace Decompiler { using Tegra::Shader::Attribute; using Tegra::Shader::Instruction; +using Tegra::Shader::LogicOperation; using Tegra::Shader::OpCode; using Tegra::Shader::Register; using Tegra::Shader::Sampler; @@ -266,6 +268,27 @@ public: } /** + * Returns code that does an integer size conversion for the specified size. + * @param value Value to perform integer size conversion on. + * @param size Register size to use for conversion instructions. + * @returns GLSL string corresponding to the value converted to the specified size. + */ + static std::string ConvertIntegerSize(const std::string& value, Register::Size size) { + switch (size) { + case Register::Size::Byte: + return "((" + value + " << 24) >> 24)"; + case Register::Size::Short: + return "((" + value + " << 16) >> 16)"; + case Register::Size::Word: + // Default - do nothing + return value; + default: + LOG_CRITICAL(HW_GPU, "Unimplemented conversion size {}", static_cast<u32>(size)); + UNREACHABLE(); + } + } + + /** * Gets a register as an float. * @param reg The register to get. * @param elem The element to use for the operation. @@ -281,15 +304,18 @@ public: * @param reg The register to get. * @param elem The element to use for the operation. * @param is_signed Whether to get the register as a signed (or unsigned) integer. + * @param size Register size to use for conversion instructions. * @returns GLSL string corresponding to the register as an integer. */ - std::string GetRegisterAsInteger(const Register& reg, unsigned elem = 0, - bool is_signed = true) { + std::string GetRegisterAsInteger(const Register& reg, unsigned elem = 0, bool is_signed = true, + Register::Size size = Register::Size::Word) { const std::string func = GetGLSLConversionFunc( GLSLRegister::Type::Float, is_signed ? GLSLRegister::Type::Integer : GLSLRegister::Type::UnsignedInteger); - return func + '(' + GetRegister(reg, elem) + ')'; + std::string value = func + '(' + GetRegister(reg, elem) + ')'; + + return ConvertIntegerSize(value, size); } /** @@ -299,13 +325,15 @@ public: * @param value The code representing the value to assign. * @param dest_num_components Number of components in the destination. * @param value_num_components Number of components in the value. - * @param is_abs Optional, when True, applies absolute value to output. + * @param is_saturated Optional, when True, saturates the provided value. * @param dest_elem Optional, the destination element to use for the operation. */ void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value, - u64 dest_num_components, u64 value_num_components, bool is_abs = false, - u64 dest_elem = 0) { - SetRegister(reg, elem, value, dest_num_components, value_num_components, is_abs, dest_elem); + u64 dest_num_components, u64 value_num_components, + bool is_saturated = false, u64 dest_elem = 0) { + + SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value, + dest_num_components, value_num_components, dest_elem); } /** @@ -315,18 +343,22 @@ public: * @param value The code representing the value to assign. * @param dest_num_components Number of components in the destination. * @param value_num_components Number of components in the value. - * @param is_abs Optional, when True, applies absolute value to output. + * @param is_saturated Optional, when True, saturates the provided value. * @param dest_elem Optional, the destination element to use for the operation. + * @param size Register size to use for conversion instructions. */ void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem, const std::string& value, u64 dest_num_components, - u64 value_num_components, bool is_abs = false, u64 dest_elem = 0) { + u64 value_num_components, bool is_saturated = false, + u64 dest_elem = 0, Register::Size size = Register::Size::Word) { + ASSERT_MSG(!is_saturated, "Unimplemented"); + const std::string func = GetGLSLConversionFunc( is_signed ? GLSLRegister::Type::Integer : GLSLRegister::Type::UnsignedInteger, GLSLRegister::Type::Float); - SetRegister(reg, elem, func + '(' + value + ')', dest_num_components, value_num_components, - is_abs, dest_elem); + SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')', + dest_num_components, value_num_components, dest_elem); } /** @@ -366,7 +398,8 @@ public: /// Generates code representing a uniform (C buffer) register, interpreted as the input type. std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type) { declr_const_buffers[index].MarkAsUsed(index, offset, stage); - std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset) + ']'; + std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset / 4) + "][" + + std::to_string(offset % 4) + ']'; if (type == GLSLRegister::Type::Float) { return value; @@ -380,8 +413,12 @@ public: std::string GetUniformIndirect(u64 index, s64 offset, const Register& index_reg, GLSLRegister::Type type) { declr_const_buffers[index].MarkAsUsedIndirect(index, stage); - std::string value = 'c' + std::to_string(index) + "[(floatBitsToInt(" + - GetRegister(index_reg, 0) + ") + " + std::to_string(offset) + ") / 4]"; + + std::string final_offset = "((floatBitsToInt(" + GetRegister(index_reg, 0) + ") + " + + std::to_string(offset) + ") / 4)"; + + std::string value = + 'c' + std::to_string(index) + '[' + final_offset + " / 4][" + final_offset + " % 4]"; if (type == GLSLRegister::Type::Float) { return value; @@ -423,9 +460,10 @@ public: unsigned const_buffer_layout = 0; for (const auto& entry : GetConstBuffersDeclarations()) { - declarations.AddLine("layout(std430) buffer " + entry.GetName()); + declarations.AddLine("layout(std140) uniform " + entry.GetName()); declarations.AddLine('{'); - declarations.AddLine(" float c" + std::to_string(entry.GetIndex()) + "[];"); + declarations.AddLine(" vec4 c" + std::to_string(entry.GetIndex()) + + "[MAX_CONSTBUFFER_ELEMENTS];"); declarations.AddLine("};"); declarations.AddNewLine(); ++const_buffer_layout; @@ -500,13 +538,11 @@ private: * @param value The code representing the value to assign. * @param dest_num_components Number of components in the destination. * @param value_num_components Number of components in the value. - * @param is_abs Optional, when True, applies absolute value to output. * @param dest_elem Optional, the destination element to use for the operation. */ void SetRegister(const Register& reg, u64 elem, const std::string& value, - u64 dest_num_components, u64 value_num_components, bool is_abs, - u64 dest_elem) { - std::string dest = GetRegister(reg, dest_elem); + u64 dest_num_components, u64 value_num_components, u64 dest_elem) { + std::string dest = GetRegister(reg, static_cast<u32>(dest_elem)); if (dest_num_components > 1) { dest += GetSwizzle(elem); } @@ -516,8 +552,6 @@ private: src += GetSwizzle(elem); } - src = is_abs ? "abs(" + src + ')' : src; - shader.AddLine(dest + " = " + src + ';'); } @@ -547,7 +581,7 @@ private: return "input_attribute_" + std::to_string(index); } - NGLOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index); + LOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index); UNREACHABLE(); } } @@ -565,7 +599,7 @@ private: return "output_attribute_" + std::to_string(index); } - NGLOG_CRITICAL(HW_GPU, "Unhandled output attribute: {}", index); + LOG_CRITICAL(HW_GPU, "Unhandled output attribute: {}", index); UNREACHABLE(); } } @@ -685,21 +719,31 @@ private: /** * Returns the comparison string to use to compare two values in the 'set' family of * instructions. - * @params condition The condition used in the 'set'-family instruction. + * @param condition The condition used in the 'set'-family instruction. + * @param op_a First operand to use for the comparison. + * @param op_b Second operand to use for the comparison. * @returns String corresponding to the GLSL operator that matches the desired comparison. */ - std::string GetPredicateComparison(Tegra::Shader::PredCondition condition) const { + std::string GetPredicateComparison(Tegra::Shader::PredCondition condition, + const std::string& op_a, const std::string& op_b) const { using Tegra::Shader::PredCondition; static const std::unordered_map<PredCondition, const char*> PredicateComparisonStrings = { - {PredCondition::LessThan, "<"}, {PredCondition::Equal, "=="}, - {PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"}, - {PredCondition::NotEqual, "!="}, {PredCondition::GreaterEqual, ">="}, + {PredCondition::LessThan, "<"}, {PredCondition::Equal, "=="}, + {PredCondition::LessEqual, "<="}, {PredCondition::GreaterThan, ">"}, + {PredCondition::NotEqual, "!="}, {PredCondition::GreaterEqual, ">="}, + {PredCondition::NotEqualWithNan, "!="}, }; - auto comparison = PredicateComparisonStrings.find(condition); + const auto& comparison{PredicateComparisonStrings.find(condition)}; ASSERT_MSG(comparison != PredicateComparisonStrings.end(), "Unknown predicate comparison operation"); - return comparison->second; + + std::string predicate{'(' + op_a + ") " + comparison->second + " (" + op_b + ')'}; + if (condition == PredCondition::NotEqualWithNan) { + predicate += " || isnan(" + op_a + ") || isnan(" + op_b + ')'; + } + + return predicate; } /** @@ -733,6 +777,31 @@ private: return (absolute_offset % SchedPeriod) == 0; } + void WriteLogicOperation(Register dest, LogicOperation logic_op, const std::string& op_a, + const std::string& op_b) { + switch (logic_op) { + case LogicOperation::And: { + regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " & " + op_b + ')', 1, 1); + break; + } + case LogicOperation::Or: { + regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " | " + op_b + ')', 1, 1); + break; + } + case LogicOperation::Xor: { + regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " ^ " + op_b + ')', 1, 1); + break; + } + case LogicOperation::PassB: { + regs.SetRegisterToInteger(dest, true, 0, op_b, 1, 1); + break; + } + default: + LOG_CRITICAL(HW_GPU, "Unimplemented logic operation: {}", static_cast<u32>(logic_op)); + UNREACHABLE(); + } + } + /** * Compiles a single instruction from Tegra to GLSL. * @param offset the offset of the Tegra shader instruction. @@ -750,8 +819,9 @@ private: // Decoding failure if (!opcode) { - NGLOG_CRITICAL(HW_GPU, "Unhandled instruction: {0:x}", instr.value); + LOG_CRITICAL(HW_GPU, "Unhandled instruction: {0:x}", instr.value); UNREACHABLE(); + return offset + 1; } shader.AddLine("// " + std::to_string(offset) + ": " + opcode->GetName()); @@ -770,22 +840,25 @@ private: switch (opcode->GetType()) { case OpCode::Type::Arithmetic: { - std::string op_a = instr.alu.negate_a ? "-" : ""; - op_a += regs.GetRegisterAsFloat(instr.gpr8); + std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); if (instr.alu.abs_a) { op_a = "abs(" + op_a + ')'; } - std::string op_b = instr.alu.negate_b ? "-" : ""; + if (instr.alu.negate_a) { + op_a = "-(" + op_a + ')'; + } + + std::string op_b; if (instr.is_b_imm) { - op_b += GetImmediate19(instr); + op_b = GetImmediate19(instr); } else { if (instr.is_b_gpr) { - op_b += regs.GetRegisterAsFloat(instr.gpr20); + op_b = regs.GetRegisterAsFloat(instr.gpr20); } else { - op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, - GLSLRegister::Type::Float); + op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + GLSLRegister::Type::Float); } } @@ -793,6 +866,10 @@ private: op_b = "abs(" + op_b + ')'; } + if (instr.alu.negate_b) { + op_b = "-(" + op_b + ')'; + } + switch (opcode->GetId()) { case OpCode::Id::MOV_C: case OpCode::Id::MOV_R: { @@ -800,68 +877,53 @@ private: break; } - case OpCode::Id::MOV32_IMM: { - // mov32i doesn't have abs or neg bits. - regs.SetRegisterToFloat(instr.gpr0, 0, GetImmediate32(instr), 1, 1); - break; - } case OpCode::Id::FMUL_C: case OpCode::Id::FMUL_R: case OpCode::Id::FMUL_IMM: { - ASSERT_MSG(!instr.saturate_a, "Unimplemented"); - - regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1, instr.alu.abs_d); - break; - } - case OpCode::Id::FMUL32_IMM: { - // fmul32i doesn't have abs or neg bits. - regs.SetRegisterToFloat( - instr.gpr0, 0, - regs.GetRegisterAsFloat(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1); + regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1, + instr.alu.saturate_d); break; } case OpCode::Id::FADD_C: case OpCode::Id::FADD_R: case OpCode::Id::FADD_IMM: { - ASSERT_MSG(!instr.saturate_a, "Unimplemented"); - - regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, instr.alu.abs_d); + regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, + instr.alu.saturate_d); break; } case OpCode::Id::MUFU: { - ASSERT_MSG(!instr.saturate_a, "Unimplemented"); - switch (instr.sub_op) { case SubOp::Cos: regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1, - instr.alu.abs_d); + instr.alu.saturate_d); break; case SubOp::Sin: regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1, - instr.alu.abs_d); + instr.alu.saturate_d); break; case SubOp::Ex2: regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1, - instr.alu.abs_d); + instr.alu.saturate_d); break; case SubOp::Lg2: regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1, - instr.alu.abs_d); + instr.alu.saturate_d); break; case SubOp::Rcp: - regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1, instr.alu.abs_d); + regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1, + instr.alu.saturate_d); break; case SubOp::Rsq: regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1, - instr.alu.abs_d); + instr.alu.saturate_d); break; - case SubOp::Min: - regs.SetRegisterToFloat(instr.gpr0, 0, "min(" + op_a + "," + op_b + ')', 1, 1, - instr.alu.abs_d); + case SubOp::Sqrt: + regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1, + instr.alu.saturate_d); break; default: - NGLOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}", - static_cast<unsigned>(instr.sub_op.Value())); + LOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}", + static_cast<unsigned>(instr.sub_op.Value())); UNREACHABLE(); } break; @@ -884,16 +946,31 @@ private: // Currently RRO is only implemented as a register move. // Usage of `abs_b` and `negate_b` here should also be correct. regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1); - NGLOG_WARNING(HW_GPU, "RRO instruction is incomplete"); + LOG_WARNING(HW_GPU, "RRO instruction is incomplete"); break; } default: { - NGLOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: {}", opcode->GetName()); + LOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: {}", opcode->GetName()); UNREACHABLE(); } } break; } + case OpCode::Type::ArithmeticImmediate: { + switch (opcode->GetId()) { + case OpCode::Id::MOV32_IMM: { + regs.SetRegisterToFloat(instr.gpr0, 0, GetImmediate32(instr), 1, 1); + break; + } + case OpCode::Id::FMUL32_IMM: { + regs.SetRegisterToFloat( + instr.gpr0, 0, + regs.GetRegisterAsFloat(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1); + break; + } + } + break; + } case OpCode::Type::Bfe: { ASSERT_MSG(!instr.bfe.negate_b, "Unimplemented"); @@ -912,56 +989,13 @@ private: break; } default: { - NGLOG_CRITICAL(HW_GPU, "Unhandled BFE instruction: {}", opcode->GetName()); + LOG_CRITICAL(HW_GPU, "Unhandled BFE instruction: {}", opcode->GetName()); UNREACHABLE(); } } break; } - case OpCode::Type::Logic: { - std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true); - - if (instr.alu.lop.invert_a) - op_a = "~(" + op_a + ')'; - - switch (opcode->GetId()) { - case OpCode::Id::LOP32I: { - u32 imm = static_cast<u32>(instr.alu.imm20_32.Value()); - - if (instr.alu.lop.invert_b) - imm = ~imm; - - switch (instr.alu.lop.operation) { - case Tegra::Shader::LogicOperation::And: { - regs.SetRegisterToInteger(instr.gpr0, true, 0, - '(' + op_a + " & " + std::to_string(imm) + ')', 1, 1); - break; - } - case Tegra::Shader::LogicOperation::Or: { - regs.SetRegisterToInteger(instr.gpr0, true, 0, - '(' + op_a + " | " + std::to_string(imm) + ')', 1, 1); - break; - } - case Tegra::Shader::LogicOperation::Xor: { - regs.SetRegisterToInteger(instr.gpr0, true, 0, - '(' + op_a + " ^ " + std::to_string(imm) + ')', 1, 1); - break; - } - default: - NGLOG_CRITICAL(HW_GPU, "Unimplemented lop32i operation: {}", - static_cast<u32>(instr.alu.lop.operation.Value())); - UNREACHABLE(); - } - break; - } - default: { - NGLOG_CRITICAL(HW_GPU, "Unhandled logic instruction: {}", opcode->GetName()); - UNREACHABLE(); - } - } - break; - } case OpCode::Type::Shift: { std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true); @@ -998,21 +1032,46 @@ private: regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " << " + op_b, 1, 1); break; default: { - NGLOG_CRITICAL(HW_GPU, "Unhandled shift instruction: {}", opcode->GetName()); + LOG_CRITICAL(HW_GPU, "Unhandled shift instruction: {}", opcode->GetName()); UNREACHABLE(); } } break; } - case OpCode::Type::ArithmeticInteger: { + case OpCode::Type::ArithmeticIntegerImmediate: { std::string op_a = regs.GetRegisterAsInteger(instr.gpr8); + std::string op_b = std::to_string(instr.alu.imm20_32.Value()); - if (instr.alu_integer.negate_a) - op_a = '-' + op_a; + switch (opcode->GetId()) { + case OpCode::Id::IADD32I: + if (instr.iadd32i.negate_a) + op_a = "-(" + op_a + ')'; - std::string op_b = instr.alu_integer.negate_b ? "-" : ""; + regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1, + instr.iadd32i.saturate != 0); + break; + case OpCode::Id::LOP32I: { + if (instr.alu.lop32i.invert_a) + op_a = "~(" + op_a + ')'; + if (instr.alu.lop32i.invert_b) + op_b = "~(" + op_b + ')'; + + WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b); + break; + } + default: { + LOG_CRITICAL(HW_GPU, "Unhandled ArithmeticIntegerImmediate instruction: {}", + opcode->GetName()); + UNREACHABLE(); + } + } + break; + } + case OpCode::Type::ArithmeticInteger: { + std::string op_a = regs.GetRegisterAsInteger(instr.gpr8); + std::string op_b; if (instr.is_b_imm) { op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')'; } else { @@ -1028,22 +1087,63 @@ private: case OpCode::Id::IADD_C: case OpCode::Id::IADD_R: case OpCode::Id::IADD_IMM: { - ASSERT_MSG(!instr.saturate_a, "Unimplemented"); - regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1); + if (instr.alu_integer.negate_a) + op_a = "-(" + op_a + ')'; + + if (instr.alu_integer.negate_b) + op_b = "-(" + op_b + ')'; + + regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1, + instr.alu.saturate_d); break; } case OpCode::Id::ISCADD_C: case OpCode::Id::ISCADD_R: case OpCode::Id::ISCADD_IMM: { + if (instr.alu_integer.negate_a) + op_a = "-(" + op_a + ')'; + + if (instr.alu_integer.negate_b) + op_b = "-(" + op_b + ')'; + std::string shift = std::to_string(instr.alu_integer.shift_amount.Value()); regs.SetRegisterToInteger(instr.gpr0, true, 0, "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1); break; } + case OpCode::Id::LOP_C: + case OpCode::Id::LOP_R: + case OpCode::Id::LOP_IMM: { + ASSERT_MSG(!instr.alu.lop.unk44, "Unimplemented"); + ASSERT_MSG(instr.alu.lop.pred48 == Pred::UnusedIndex, "Unimplemented"); + + if (instr.alu.lop.invert_a) + op_a = "~(" + op_a + ')'; + + if (instr.alu.lop.invert_b) + op_b = "~(" + op_b + ')'; + + WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b); + break; + } + case OpCode::Id::IMNMX_C: + case OpCode::Id::IMNMX_R: + case OpCode::Id::IMNMX_IMM: { + ASSERT_MSG(instr.imnmx.exchange == Tegra::Shader::IMinMaxExchange::None, + "Unimplemented"); + std::string condition = + GetPredicateCondition(instr.imnmx.pred, instr.imnmx.negate_pred != 0); + std::string parameters = op_a + ',' + op_b; + regs.SetRegisterToInteger(instr.gpr0, instr.imnmx.is_signed, 0, + '(' + condition + ") ? min(" + parameters + ") : max(" + + parameters + ')', + 1, 1); + break; + } default: { - NGLOG_CRITICAL(HW_GPU, "Unhandled ArithmeticInteger instruction: {}", - opcode->GetName()); + LOG_CRITICAL(HW_GPU, "Unhandled ArithmeticInteger instruction: {}", + opcode->GetName()); UNREACHABLE(); } } @@ -1051,8 +1151,6 @@ private: break; } case OpCode::Type::Ffma: { - ASSERT_MSG(!instr.saturate_a, "Unimplemented"); - std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); std::string op_b = instr.ffma.negate_b ? "-" : ""; std::string op_c = instr.ffma.negate_c ? "-" : ""; @@ -1081,38 +1179,38 @@ private: break; } default: { - NGLOG_CRITICAL(HW_GPU, "Unhandled FFMA instruction: {}", opcode->GetName()); + LOG_CRITICAL(HW_GPU, "Unhandled FFMA instruction: {}", opcode->GetName()); UNREACHABLE(); } } - regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + " + " + op_c, 1, 1); + regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + " + " + op_c, 1, 1, + instr.alu.saturate_d); break; } case OpCode::Type::Conversion: { - ASSERT_MSG(instr.conversion.size == Register::Size::Word, "Unimplemented"); ASSERT_MSG(!instr.conversion.negate_a, "Unimplemented"); - ASSERT_MSG(!instr.saturate_a, "Unimplemented"); switch (opcode->GetId()) { case OpCode::Id::I2I_R: { ASSERT_MSG(!instr.conversion.selector, "Unimplemented"); - std::string op_a = - regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed); + std::string op_a = regs.GetRegisterAsInteger( + instr.gpr20, 0, instr.conversion.is_input_signed, instr.conversion.src_size); if (instr.conversion.abs_a) { op_a = "abs(" + op_a + ')'; } regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, - 1); + 1, instr.alu.saturate_d, 0, instr.conversion.dest_size); break; } case OpCode::Id::I2F_R: { + ASSERT_MSG(instr.conversion.dest_size == Register::Size::Word, "Unimplemented"); ASSERT_MSG(!instr.conversion.selector, "Unimplemented"); - std::string op_a = - regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed); + std::string op_a = regs.GetRegisterAsInteger( + instr.gpr20, 0, instr.conversion.is_input_signed, instr.conversion.src_size); if (instr.conversion.abs_a) { op_a = "abs(" + op_a + ')'; @@ -1122,13 +1220,16 @@ private: break; } case OpCode::Id::F2F_R: { - ASSERT_MSG(!instr.saturate_a, "Unimplemented"); - + ASSERT_MSG(instr.conversion.dest_size == Register::Size::Word, "Unimplemented"); + ASSERT_MSG(instr.conversion.src_size == Register::Size::Word, "Unimplemented"); std::string op_a = regs.GetRegisterAsFloat(instr.gpr20); switch (instr.conversion.f2f.rounding) { case Tegra::Shader::F2fRoundingOp::None: break; + case Tegra::Shader::F2fRoundingOp::Round: + op_a = "roundEven(" + op_a + ')'; + break; case Tegra::Shader::F2fRoundingOp::Floor: op_a = "floor(" + op_a + ')'; break; @@ -1139,8 +1240,8 @@ private: op_a = "trunc(" + op_a + ')'; break; default: - NGLOG_CRITICAL(HW_GPU, "Unimplemented f2f rounding mode {}", - static_cast<u32>(instr.conversion.f2f.rounding.Value())); + LOG_CRITICAL(HW_GPU, "Unimplemented f2f rounding mode {}", + static_cast<u32>(instr.conversion.f2f.rounding.Value())); UNREACHABLE(); break; } @@ -1149,10 +1250,11 @@ private: op_a = "abs(" + op_a + ')'; } - regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); + regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d); break; } case OpCode::Id::F2I_R: { + ASSERT_MSG(instr.conversion.src_size == Register::Size::Word, "Unimplemented"); std::string op_a = regs.GetRegisterAsFloat(instr.gpr20); if (instr.conversion.abs_a) { @@ -1172,8 +1274,8 @@ private: op_a = "trunc(" + op_a + ')'; break; default: - NGLOG_CRITICAL(HW_GPU, "Unimplemented f2i rounding mode {}", - static_cast<u32>(instr.conversion.f2i.rounding.Value())); + LOG_CRITICAL(HW_GPU, "Unimplemented f2i rounding mode {}", + static_cast<u32>(instr.conversion.f2i.rounding.Value())); UNREACHABLE(); break; } @@ -1185,11 +1287,11 @@ private: } regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, - 1); + 1, false, 0, instr.conversion.dest_size); break; } default: { - NGLOG_CRITICAL(HW_GPU, "Unhandled conversion instruction: {}", opcode->GetName()); + LOG_CRITICAL(HW_GPU, "Unhandled conversion instruction: {}", opcode->GetName()); UNREACHABLE(); } } @@ -1224,8 +1326,8 @@ private: break; default: - NGLOG_CRITICAL(HW_GPU, "Unhandled type: {}", - static_cast<unsigned>(instr.ld_c.type.Value())); + LOG_CRITICAL(HW_GPU, "Unhandled type: {}", + static_cast<unsigned>(instr.ld_c.type.Value())); UNREACHABLE(); } break; @@ -1298,7 +1400,7 @@ private: break; } default: { - NGLOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {}", opcode->GetName()); + LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {}", opcode->GetName()); UNREACHABLE(); } } @@ -1340,10 +1442,9 @@ private: std::string second_pred = GetPredicateCondition(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); - std::string comparator = GetPredicateComparison(instr.fsetp.cond); std::string combiner = GetPredicateCombiner(instr.fsetp.op); - std::string predicate = '(' + op_a + ") " + comparator + " (" + op_b + ')'; + std::string predicate = GetPredicateComparison(instr.fsetp.cond, op_a, op_b); // Set the primary predicate to the result of Predicate OP SecondPredicate SetPredicate(instr.fsetp.pred3, '(' + predicate + ") " + combiner + " (" + second_pred + ')'); @@ -1378,10 +1479,9 @@ private: std::string second_pred = GetPredicateCondition(instr.isetp.pred39, instr.isetp.neg_pred != 0); - std::string comparator = GetPredicateComparison(instr.isetp.cond); std::string combiner = GetPredicateCombiner(instr.isetp.op); - std::string predicate = '(' + op_a + ") " + comparator + " (" + op_b + ')'; + std::string predicate = GetPredicateComparison(instr.isetp.cond, op_a, op_b); // Set the primary predicate to the result of Predicate OP SecondPredicate SetPredicate(instr.isetp.pred3, '(' + predicate + ") " + combiner + " (" + second_pred + ')'); @@ -1394,6 +1494,36 @@ private: } break; } + case OpCode::Type::PredicateSetPredicate: { + std::string op_a = + GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); + std::string op_b = + GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); + + using Tegra::Shader::Pred; + // We can't use the constant predicate as destination. + ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); + + std::string second_pred = + GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); + + std::string combiner = GetPredicateCombiner(instr.psetp.op); + + std::string predicate = + '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')'; + + // Set the primary predicate to the result of Predicate OP SecondPredicate + SetPredicate(instr.psetp.pred3, + '(' + predicate + ") " + combiner + " (" + second_pred + ')'); + + if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { + // Set the secondary predicate to the result of !Predicate OP SecondPredicate, + // if enabled + SetPredicate(instr.psetp.pred0, + "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); + } + break; + } case OpCode::Type::FloatSet: { std::string op_a = instr.fset.neg_a ? "-" : ""; op_a += regs.GetRegisterAsFloat(instr.gpr8); @@ -1428,11 +1558,10 @@ private: std::string second_pred = GetPredicateCondition(instr.fset.pred39, instr.fset.neg_pred != 0); - std::string comparator = GetPredicateComparison(instr.fset.cond); std::string combiner = GetPredicateCombiner(instr.fset.op); - std::string predicate = "(((" + op_a + ") " + comparator + " (" + op_b + ")) " + - combiner + " (" + second_pred + "))"; + std::string predicate = "((" + GetPredicateComparison(instr.fset.cond, op_a, op_b) + + ") " + combiner + " (" + second_pred + "))"; if (instr.fset.bf) { regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1); @@ -1463,11 +1592,10 @@ private: std::string second_pred = GetPredicateCondition(instr.iset.pred39, instr.iset.neg_pred != 0); - std::string comparator = GetPredicateComparison(instr.iset.cond); std::string combiner = GetPredicateCombiner(instr.iset.op); - std::string predicate = "(((" + op_a + ") " + comparator + " (" + op_b + ")) " + - combiner + " (" + second_pred + "))"; + std::string predicate = "((" + GetPredicateComparison(instr.iset.cond, op_a, op_b) + + ") " + combiner + " (" + second_pred + "))"; if (instr.iset.bf) { regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1); @@ -1518,8 +1646,15 @@ private: // can ignore this when generating GLSL code. break; } + case OpCode::Id::DEPBAR: + case OpCode::Id::SYNC: { + // TODO(Subv): Find out if we actually have to care about these instructions or if + // the GLSL compiler takes care of that for us. + LOG_WARNING(HW_GPU, "DEPBAR/SYNC instruction is stubbed"); + break; + } default: { - NGLOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName()); + LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName()); UNREACHABLE(); } } @@ -1646,7 +1781,10 @@ private: }; // namespace Decompiler std::string GetCommonDeclarations() { - return "bool exec_shader();"; + std::string declarations = "bool exec_shader();\n"; + declarations += "#define MAX_CONSTBUFFER_ELEMENTS " + + std::to_string(RasterizerOpenGL::MaxConstbufferSize / (sizeof(GLvec4))); + return declarations; } boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset, @@ -1656,7 +1794,7 @@ boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, GLSLGenerator generator(subroutines, program_code, main_offset, stage); return ProgramResult{generator.GetShaderCode(), generator.GetEntries()}; } catch (const DecompileFail& exception) { - NGLOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what()); + LOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what()); } return boost::none; } diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index b88d592b7..c1e6fac9f 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -39,6 +39,10 @@ void main() { // Viewport can be flipped, which is unsupported by glViewport position.xy *= viewport_flip.xy; gl_Position = position; + + // TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0 + // For now, this is here to bring order in lieu of proper emulation + position.w = 1.0; } )"; out += program.first; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 7c00beb33..d7167b298 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -38,8 +38,8 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; // TODO(bunnei): Support more than one viewport - viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0 : 1.0; - viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0 : 1.0; + viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f; + viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f; } } // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 8568fface..3c087d638 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -27,7 +27,7 @@ GLuint LoadShader(const char* source, GLenum type) { } GLuint shader_id = glCreateShader(type); glShaderSource(shader_id, 1, &source, nullptr); - NGLOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); + LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); glCompileShader(shader_id); GLint result = GL_FALSE; @@ -39,9 +39,9 @@ GLuint LoadShader(const char* source, GLenum type) { std::string shader_error(info_log_length, ' '); glGetShaderInfoLog(shader_id, info_log_length, nullptr, &shader_error[0]); if (result == GL_TRUE) { - NGLOG_DEBUG(Render_OpenGL, "{}", shader_error); + LOG_DEBUG(Render_OpenGL, "{}", shader_error); } else { - NGLOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error); + LOG_ERROR(Render_OpenGL, "Error compiling {} shader:\n{}", debug_type, shader_error); } } return shader_id; diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index 2036a06a9..0e4d782e2 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -29,7 +29,7 @@ void LogShaderSource(T... shaders) { std::string source(source_length, ' '); glGetShaderSource(shader, source_length, nullptr, &source[0]); - NGLOG_INFO(Render_OpenGL, "Shader source {}", source); + LOG_INFO(Render_OpenGL, "Shader source {}", source); } } @@ -49,7 +49,7 @@ GLuint LoadShader(const char* source, GLenum type); template <typename... T> GLuint LoadProgram(bool separable_program, T... shaders) { // Link the program - NGLOG_DEBUG(Render_OpenGL, "Linking program..."); + LOG_DEBUG(Render_OpenGL, "Linking program..."); GLuint program_id = glCreateProgram(); @@ -71,9 +71,9 @@ GLuint LoadProgram(bool separable_program, T... shaders) { std::string program_error(info_log_length, ' '); glGetProgramInfoLog(program_id, info_log_length, nullptr, &program_error[0]); if (result == GL_TRUE) { - NGLOG_DEBUG(Render_OpenGL, "{}", program_error); + LOG_DEBUG(Render_OpenGL, "{}", program_error); } else { - NGLOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error); + LOG_ERROR(Render_OpenGL, "Error linking shader:\n{}", program_error); } } diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 44f0c8a01..2e8a422a8 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -48,24 +48,9 @@ OpenGLState::OpenGLState() { logic_op = GL_COPY; for (auto& texture_unit : texture_units) { - texture_unit.texture_2d = 0; - texture_unit.sampler = 0; - texture_unit.swizzle.r = GL_RED; - texture_unit.swizzle.g = GL_GREEN; - texture_unit.swizzle.b = GL_BLUE; - texture_unit.swizzle.a = GL_ALPHA; + texture_unit.Reset(); } - lighting_lut.texture_buffer = 0; - - fog_lut.texture_buffer = 0; - - proctex_lut.texture_buffer = 0; - proctex_diff_lut.texture_buffer = 0; - proctex_color_map.texture_buffer = 0; - proctex_alpha_map.texture_buffer = 0; - proctex_noise_lut.texture_buffer = 0; - draw.read_framebuffer = 0; draw.draw_framebuffer = 0; draw.vertex_array = 0; @@ -196,13 +181,13 @@ void OpenGLState::Apply() const { } // Textures - for (size_t i = 0; i < std::size(texture_units); ++i) { + for (int i = 0; i < std::size(texture_units); ++i) { if (texture_units[i].texture_2d != cur_state.texture_units[i].texture_2d) { glActiveTexture(TextureUnits::MaxwellTexture(i).Enum()); glBindTexture(GL_TEXTURE_2D, texture_units[i].texture_2d); } if (texture_units[i].sampler != cur_state.texture_units[i].sampler) { - glBindSampler(i, texture_units[i].sampler); + glBindSampler(static_cast<GLuint>(i), texture_units[i].sampler); } // Update the texture swizzle if (texture_units[i].swizzle.r != cur_state.texture_units[i].swizzle.r || @@ -223,54 +208,12 @@ void OpenGLState::Apply() const { if (current.enabled != new_state.enabled || current.bindpoint != new_state.bindpoint || current.ssbo != new_state.ssbo) { if (new_state.enabled) { - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, new_state.bindpoint, new_state.ssbo); + glBindBufferBase(GL_UNIFORM_BUFFER, new_state.bindpoint, new_state.ssbo); } } } } - // Lighting LUTs - if (lighting_lut.texture_buffer != cur_state.lighting_lut.texture_buffer) { - glActiveTexture(TextureUnits::LightingLUT.Enum()); - glBindTexture(GL_TEXTURE_BUFFER, lighting_lut.texture_buffer); - } - - // Fog LUT - if (fog_lut.texture_buffer != cur_state.fog_lut.texture_buffer) { - glActiveTexture(TextureUnits::FogLUT.Enum()); - glBindTexture(GL_TEXTURE_BUFFER, fog_lut.texture_buffer); - } - - // ProcTex Noise LUT - if (proctex_noise_lut.texture_buffer != cur_state.proctex_noise_lut.texture_buffer) { - glActiveTexture(TextureUnits::ProcTexNoiseLUT.Enum()); - glBindTexture(GL_TEXTURE_BUFFER, proctex_noise_lut.texture_buffer); - } - - // ProcTex Color Map - if (proctex_color_map.texture_buffer != cur_state.proctex_color_map.texture_buffer) { - glActiveTexture(TextureUnits::ProcTexColorMap.Enum()); - glBindTexture(GL_TEXTURE_BUFFER, proctex_color_map.texture_buffer); - } - - // ProcTex Alpha Map - if (proctex_alpha_map.texture_buffer != cur_state.proctex_alpha_map.texture_buffer) { - glActiveTexture(TextureUnits::ProcTexAlphaMap.Enum()); - glBindTexture(GL_TEXTURE_BUFFER, proctex_alpha_map.texture_buffer); - } - - // ProcTex LUT - if (proctex_lut.texture_buffer != cur_state.proctex_lut.texture_buffer) { - glActiveTexture(TextureUnits::ProcTexLUT.Enum()); - glBindTexture(GL_TEXTURE_BUFFER, proctex_lut.texture_buffer); - } - - // ProcTex Diff LUT - if (proctex_diff_lut.texture_buffer != cur_state.proctex_diff_lut.texture_buffer) { - glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum()); - glBindTexture(GL_TEXTURE_BUFFER, proctex_diff_lut.texture_buffer); - } - // Framebuffer if (draw.read_framebuffer != cur_state.draw.read_framebuffer) { glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); @@ -338,26 +281,12 @@ void OpenGLState::Apply() const { cur_state = *this; } -OpenGLState& OpenGLState::ResetTexture(GLuint handle) { +OpenGLState& OpenGLState::UnbindTexture(GLuint handle) { for (auto& unit : texture_units) { if (unit.texture_2d == handle) { - unit.texture_2d = 0; + unit.Unbind(); } } - if (lighting_lut.texture_buffer == handle) - lighting_lut.texture_buffer = 0; - if (fog_lut.texture_buffer == handle) - fog_lut.texture_buffer = 0; - if (proctex_noise_lut.texture_buffer == handle) - proctex_noise_lut.texture_buffer = 0; - if (proctex_color_map.texture_buffer == handle) - proctex_color_map.texture_buffer = 0; - if (proctex_alpha_map.texture_buffer == handle) - proctex_alpha_map.texture_buffer = 0; - if (proctex_lut.texture_buffer == handle) - proctex_lut.texture_buffer = 0; - if (proctex_diff_lut.texture_buffer == handle) - proctex_diff_lut.texture_buffer = 0; return *this; } diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 839e50e93..3398d7c04 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -91,35 +91,20 @@ public: GLint b; // GL_TEXTURE_SWIZZLE_B GLint a; // GL_TEXTURE_SWIZZLE_A } swizzle; - } texture_units[32]; - - struct { - GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER - } lighting_lut; - - struct { - GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER - } fog_lut; - - struct { - GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER - } proctex_noise_lut; - struct { - GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER - } proctex_color_map; - - struct { - GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER - } proctex_alpha_map; - - struct { - GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER - } proctex_lut; - - struct { - GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER - } proctex_diff_lut; + void Unbind() { + texture_2d = 0; + swizzle.r = GL_RED; + swizzle.g = GL_GREEN; + swizzle.b = GL_BLUE; + swizzle.a = GL_ALPHA; + } + + void Reset() { + Unbind(); + sampler = 0; + } + } texture_units[32]; struct { GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING @@ -165,7 +150,7 @@ public: void Apply() const; /// Resets any references to the given resource - OpenGLState& ResetTexture(GLuint handle); + OpenGLState& UnbindTexture(GLuint handle); OpenGLState& ResetSampler(GLuint handle); OpenGLState& ResetProgram(GLuint handle); OpenGLState& ResetPipeline(GLuint handle); diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 2155fb019..e19c3b280 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -29,9 +29,13 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { switch (attrib.size) { case Maxwell::VertexAttribute::Size::Size_8_8_8_8: return GL_UNSIGNED_BYTE; + case Maxwell::VertexAttribute::Size::Size_16_16: + return GL_UNSIGNED_SHORT; + case Maxwell::VertexAttribute::Size::Size_10_10_10_2: + return GL_UNSIGNED_INT_2_10_10_10_REV; } - NGLOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); + LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); UNREACHABLE(); return {}; } @@ -41,9 +45,13 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { switch (attrib.size) { case Maxwell::VertexAttribute::Size::Size_8_8_8_8: return GL_BYTE; + case Maxwell::VertexAttribute::Size::Size_16_16: + return GL_SHORT; + case Maxwell::VertexAttribute::Size::Size_10_10_10_2: + return GL_INT_2_10_10_10_REV; } - NGLOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); + LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); UNREACHABLE(); return {}; } @@ -52,7 +60,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return GL_FLOAT; } - NGLOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString()); + LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString()); UNREACHABLE(); return {}; } @@ -66,7 +74,7 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) { case Maxwell::IndexFormat::UnsignedInt: return GL_UNSIGNED_INT; } - NGLOG_CRITICAL(Render_OpenGL, "Unimplemented index_format={}", static_cast<u32>(index_format)); + LOG_CRITICAL(Render_OpenGL, "Unimplemented index_format={}", static_cast<u32>(index_format)); UNREACHABLE(); return {}; } @@ -78,7 +86,7 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { case Maxwell::PrimitiveTopology::TriangleStrip: return GL_TRIANGLE_STRIP; } - NGLOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology)); + LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology)); UNREACHABLE(); return {}; } @@ -90,8 +98,8 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode) { case Tegra::Texture::TextureFilter::Nearest: return GL_NEAREST; } - NGLOG_CRITICAL(Render_OpenGL, "Unimplemented texture filter mode={}", - static_cast<u32>(filter_mode)); + LOG_CRITICAL(Render_OpenGL, "Unimplemented texture filter mode={}", + static_cast<u32>(filter_mode)); UNREACHABLE(); return {}; } @@ -110,8 +118,7 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) { // manually mix them. However the shader part of this is not yet implemented. return GL_CLAMP_TO_BORDER; } - NGLOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode={}", - static_cast<u32>(wrap_mode)); + LOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode={}", static_cast<u32>(wrap_mode)); UNREACHABLE(); return {}; } @@ -129,7 +136,7 @@ inline GLenum BlendEquation(Maxwell::Blend::Equation equation) { case Maxwell::Blend::Equation::Max: return GL_MAX; } - NGLOG_CRITICAL(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation)); + LOG_CRITICAL(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation)); UNREACHABLE(); return {}; } @@ -175,7 +182,7 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) { case Maxwell::Blend::Factor::OneMinusConstantAlpha: return GL_ONE_MINUS_CONSTANT_ALPHA; } - NGLOG_CRITICAL(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor)); + LOG_CRITICAL(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor)); UNREACHABLE(); return {}; } @@ -196,7 +203,65 @@ inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) { case Tegra::Texture::SwizzleSource::OneFloat: return GL_ONE; } - NGLOG_CRITICAL(Render_OpenGL, "Unimplemented swizzle source={}", static_cast<u32>(source)); + LOG_CRITICAL(Render_OpenGL, "Unimplemented swizzle source={}", static_cast<u32>(source)); + UNREACHABLE(); + return {}; +} + +inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) { + switch (comparison) { + case Maxwell::ComparisonOp::Never: + case Maxwell::ComparisonOp::NeverOld: + return GL_NEVER; + case Maxwell::ComparisonOp::Less: + case Maxwell::ComparisonOp::LessOld: + return GL_LESS; + case Maxwell::ComparisonOp::Equal: + case Maxwell::ComparisonOp::EqualOld: + return GL_EQUAL; + case Maxwell::ComparisonOp::LessEqual: + case Maxwell::ComparisonOp::LessEqualOld: + return GL_LEQUAL; + case Maxwell::ComparisonOp::Greater: + case Maxwell::ComparisonOp::GreaterOld: + return GL_GREATER; + case Maxwell::ComparisonOp::NotEqual: + case Maxwell::ComparisonOp::NotEqualOld: + return GL_NOTEQUAL; + case Maxwell::ComparisonOp::GreaterEqual: + case Maxwell::ComparisonOp::GreaterEqualOld: + return GL_GEQUAL; + case Maxwell::ComparisonOp::Always: + case Maxwell::ComparisonOp::AlwaysOld: + return GL_ALWAYS; + } + LOG_CRITICAL(Render_OpenGL, "Unimplemented comparison op={}", static_cast<u32>(comparison)); + UNREACHABLE(); + return {}; +} + +inline GLenum FrontFace(Maxwell::Cull::FrontFace front_face) { + switch (front_face) { + case Maxwell::Cull::FrontFace::ClockWise: + return GL_CW; + case Maxwell::Cull::FrontFace::CounterClockWise: + return GL_CCW; + } + LOG_CRITICAL(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face)); + UNREACHABLE(); + return {}; +} + +inline GLenum CullFace(Maxwell::Cull::CullFace cull_face) { + switch (cull_face) { + case Maxwell::Cull::CullFace::Front: + return GL_FRONT; + case Maxwell::Cull::CullFace::Back: + return GL_BACK; + case Maxwell::Cull::CullFace::FrontAndBack: + return GL_FRONT_AND_BACK; + } + LOG_CRITICAL(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face)); UNREACHABLE(); return {}; } diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index f33766bfd..00841e937 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -150,7 +150,6 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf screen_info)) { // Reset the screen info's display texture to its own permanent texture screen_info.display_texture = screen_info.texture.resource.handle; - screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f); Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes, Memory::FlushMode::Flush); @@ -302,8 +301,8 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, right = texcoords.left; } else { // Other transformations are unsupported - NGLOG_CRITICAL(Render_OpenGL, "Unsupported framebuffer_transform_flags={}", - static_cast<u32>(framebuffer_transform_flags)); + LOG_CRITICAL(Render_OpenGL, "Unsupported framebuffer_transform_flags={}", + static_cast<u32>(framebuffer_transform_flags)); UNIMPLEMENTED(); } } @@ -405,14 +404,14 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum switch (severity) { case GL_DEBUG_SEVERITY_HIGH: - NGLOG_ERROR(Render_OpenGL, format, str_source, str_type, id, message); + LOG_ERROR(Render_OpenGL, format, str_source, str_type, id, message); break; case GL_DEBUG_SEVERITY_MEDIUM: - NGLOG_WARNING(Render_OpenGL, format, str_source, str_type, id, message); + LOG_WARNING(Render_OpenGL, format, str_source, str_type, id, message); break; case GL_DEBUG_SEVERITY_NOTIFICATION: case GL_DEBUG_SEVERITY_LOW: - NGLOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message); + LOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message); break; } } @@ -430,9 +429,9 @@ bool RendererOpenGL::Init() { const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))}; const char* gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))}; - NGLOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version); - NGLOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor); - NGLOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model); + LOG_INFO(Render_OpenGL, "GL_VERSION: {}", gl_version); + LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor); + LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model); Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor); Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 2cc6d9a00..21f0d298c 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -27,7 +27,7 @@ struct TextureInfo { /// Structure used for storing information about the display target for the Switch screen struct ScreenInfo { GLuint display_texture; - MathUtil::Rectangle<float> display_texcoords; + const MathUtil::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; TextureInfo texture; }; |