diff options
Diffstat (limited to 'src/video_core')
18 files changed, 299 insertions, 195 deletions
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 3c869d3a1..d03bc1c0c 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -311,6 +311,25 @@ public: AlwaysOld = 8, }; + enum class LogicOperation : u32 { + Clear = 0x1500, + And = 0x1501, + AndReverse = 0x1502, + Copy = 0x1503, + AndInverted = 0x1504, + NoOp = 0x1505, + Xor = 0x1506, + Or = 0x1507, + Nor = 0x1508, + Equiv = 0x1509, + Invert = 0x150A, + OrReverse = 0x150B, + CopyInverted = 0x150C, + OrInverted = 0x150D, + Nand = 0x150E, + Set = 0x150F, + }; + struct Cull { enum class FrontFace : u32 { ClockWise = 0x0900, @@ -695,7 +714,14 @@ public: Cull cull; - INSERT_PADDING_WORDS(0x2B); + INSERT_PADDING_WORDS(0x28); + + struct { + u32 enable; + LogicOperation operation; + } logic_op; + + INSERT_PADDING_WORDS(0x1); union { u32 raw; @@ -942,6 +968,7 @@ ASSERT_REG_POSITION(draw, 0x585); ASSERT_REG_POSITION(index_array, 0x5F2); ASSERT_REG_POSITION(instanced_arrays, 0x620); ASSERT_REG_POSITION(cull, 0x646); +ASSERT_REG_POSITION(logic_op, 0x671); ASSERT_REG_POSITION(clear_buffers, 0x674); ASSERT_REG_POSITION(query, 0x6C0); ASSERT_REG_POSITION(vertex_array[0], 0x700); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 3ba6fe614..67194b0e3 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -477,7 +477,9 @@ union Instruction { if (texture_info >= 12 && texture_info <= 13) return TextureType::TextureCube; - UNIMPLEMENTED(); + LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", + static_cast<u32>(texture_info.Value())); + UNREACHABLE(); } bool IsArrayTexture() const { @@ -516,14 +518,16 @@ union Instruction { return TextureType::Texture1D; } if (texture_info == 2 || texture_info == 8 || texture_info == 12 || - texture_info >= 4 && texture_info <= 6) { + (texture_info >= 4 && texture_info <= 6)) { return TextureType::Texture2D; } if (texture_info == 7) { return TextureType::Texture3D; } - UNIMPLEMENTED(); + LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", + static_cast<u32>(texture_info.Value())); + UNREACHABLE(); } bool IsArrayTexture() const { diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 499e84b89..a4a219d8d 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -8,8 +8,6 @@ #include "video_core/gpu.h" #include "video_core/memory_manager.h" -struct ScreenInfo; - namespace VideoCore { class RasterizerInterface { @@ -55,7 +53,7 @@ public: /// Attempt to use a faster method to display the framebuffer to screen virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, - u32 pixel_stride, ScreenInfo& screen_info) { + u32 pixel_stride) { return false; } diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index c1abdfbfe..be17a2b9c 100644 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <memory> #include "core/frontend/emu_window.h" #include "core/settings.h" #include "video_core/renderer_base.h" @@ -17,18 +16,11 @@ RendererBase::RendererBase(Core::Frontend::EmuWindow& window) : render_window{wi RendererBase::~RendererBase() = default; void RendererBase::RefreshBaseSettings() { - RefreshRasterizerSetting(); UpdateCurrentFramebufferLayout(); renderer_settings.use_framelimiter = Settings::values.use_frame_limit; } -void RendererBase::RefreshRasterizerSetting() { - if (rasterizer == nullptr) { - rasterizer = std::make_unique<RasterizerOpenGL>(render_window); - } -} - void RendererBase::UpdateCurrentFramebufferLayout() { const Layout::FramebufferLayout& layout = render_window.GetFramebufferLayout(); diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index d9f16b8e6..2a357f9d0 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -58,9 +58,6 @@ public: void RefreshBaseSettings(); protected: - /// Refreshes settings specific to the rasterizer. - void RefreshRasterizerSetting(); - Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle. std::unique_ptr<RasterizerInterface> rasterizer; f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index fe1f55e85..35056d9bd 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -36,8 +36,8 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); -RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) - : emu_window{window}, stream_buffer(GL_ARRAY_BUFFER, STREAM_BUFFER_SIZE) { +RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info) + : emu_window{window}, screen_info{info}, stream_buffer(GL_ARRAY_BUFFER, STREAM_BUFFER_SIZE) { // Create sampler objects for (size_t i = 0; i < texture_samplers.size(); ++i) { texture_samplers[i].Create(); @@ -304,7 +304,8 @@ bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { } std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, - bool using_depth_fb) { + bool using_depth_fb, + bool preserve_contents) { const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; if (regs.rt[0].format == Tegra::RenderTargetFormat::NONE) { @@ -327,7 +328,7 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c Surface depth_surface; MathUtil::Rectangle<u32> surfaces_rect; std::tie(color_surface, depth_surface, surfaces_rect) = - res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb); + res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, preserve_contents); const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()}; const MathUtil::Rectangle<u32> draw_rect{ @@ -390,7 +391,7 @@ void RasterizerOpenGL::Clear() { ScopeAcquireGLContext acquire_context{emu_window}; auto [dirty_color_surface, dirty_depth_surface] = - ConfigureFramebuffers(use_color_fb, use_depth_fb); + ConfigureFramebuffers(use_color_fb, use_depth_fb, false); // TODO(Subv): Support clearing only partial colors. glClearColor(regs.clear_color[0], regs.clear_color[1], regs.clear_color[2], @@ -445,10 +446,11 @@ void RasterizerOpenGL::DrawArrays() { ScopeAcquireGLContext acquire_context{emu_window}; auto [dirty_color_surface, dirty_depth_surface] = - ConfigureFramebuffers(true, regs.zeta.Address() != 0 && regs.zeta_enable != 0); + ConfigureFramebuffers(true, regs.zeta.Address() != 0 && regs.zeta_enable != 0, true); SyncDepthTestState(); SyncBlendState(); + SyncLogicOpState(); SyncCullMode(); // TODO(bunnei): Sync framebuffer_scale uniform here @@ -574,8 +576,7 @@ bool RasterizerOpenGL::AccelerateFill(const void* config) { } bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, - VAddr framebuffer_addr, u32 pixel_stride, - ScreenInfo& screen_info) { + VAddr framebuffer_addr, u32 pixel_stride) { if (!framebuffer_addr) { return {}; } @@ -847,6 +848,9 @@ void RasterizerOpenGL::SyncBlendState() { if (!state.blend.enabled) return; + ASSERT_MSG(regs.logic_op.enable == 0, + "Blending and logic op can't be enabled at the same time."); + ASSERT_MSG(regs.independent_blend_enable == 1, "Only independent blending is implemented"); ASSERT_MSG(!regs.independent_blend[0].separate_alpha, "Unimplemented"); state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.independent_blend[0].equation_rgb); @@ -856,3 +860,17 @@ void RasterizerOpenGL::SyncBlendState() { state.blend.src_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_source_a); state.blend.dst_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_dest_a); } + +void RasterizerOpenGL::SyncLogicOpState() { + const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + + // TODO(Subv): Support more than just render target 0. + state.logic_op.enabled = regs.logic_op.enable != 0; + + if (!state.logic_op.enabled) + return; + + ASSERT_MSG(regs.blend.enable == 0, "Blending and logic op can't be enabled at the same time."); + + state.logic_op.operation = MaxwellToGL::LogicOp(regs.logic_op.operation); +} diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 74307f626..f40e70bf4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -30,7 +30,7 @@ class EmuWindow; class RasterizerOpenGL : public VideoCore::RasterizerInterface { public: - explicit RasterizerOpenGL(Core::Frontend::EmuWindow& renderer); + explicit RasterizerOpenGL(Core::Frontend::EmuWindow& renderer, ScreenInfo& info); ~RasterizerOpenGL() override; void DrawArrays() override; @@ -43,8 +43,8 @@ public: bool AccelerateDisplayTransfer(const void* config) override; bool AccelerateTextureCopy(const void* config) override; bool AccelerateFill(const void* config) override; - bool AccelerateDisplay(const Tegra::FramebufferConfig& framebuffer, VAddr framebuffer_addr, - u32 pixel_stride, ScreenInfo& screen_info) override; + bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, + u32 pixel_stride) override; bool AccelerateDrawBatch(bool is_indexed) override; /// OpenGL shader generated for a given Maxwell register state @@ -87,7 +87,8 @@ private: /// Configures the color and depth framebuffer states and returns the dirty <Color, Depth> /// surfaces if writing was enabled. - std::pair<Surface, Surface> ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb); + std::pair<Surface, Surface> ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb, + bool preserve_contents); /// Binds the framebuffer color and depth surface void BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface, @@ -141,6 +142,9 @@ private: /// Syncs the blend state to match the guest state void SyncBlendState(); + /// Syncs the LogicOp state to match the guest state + void SyncLogicOpState(); + bool has_ARB_direct_state_access = false; bool has_ARB_separate_shader_objects = false; bool has_ARB_vertex_attrib_binding = false; @@ -151,6 +155,8 @@ private: Core::Frontend::EmuWindow& emu_window; + ScreenInfo& screen_info; + std::unique_ptr<GLShader::ProgramManager> shader_program_manager; OGLVertexArray sw_vao; OGLVertexArray hw_vao; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index fb7476fb8..817fa07a8 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -686,7 +686,8 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu } SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool using_color_fb, - bool using_depth_fb) { + bool using_depth_fb, + bool preserve_contents) { const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; // TODO(bunnei): This is hard corded to use just the first render buffer @@ -708,7 +709,7 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool usin MathUtil::Rectangle<u32> color_rect{}; Surface color_surface; if (using_color_fb) { - color_surface = GetSurface(color_params); + color_surface = GetSurface(color_params, preserve_contents); if (color_surface) { color_rect = color_surface->GetSurfaceParams().GetRect(); } @@ -717,7 +718,7 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool usin MathUtil::Rectangle<u32> depth_rect{}; Surface depth_surface; if (using_depth_fb) { - depth_surface = GetSurface(depth_params); + depth_surface = GetSurface(depth_params, preserve_contents); if (depth_surface) { depth_rect = depth_surface->GetSurfaceParams().GetRect(); } @@ -752,7 +753,7 @@ void RasterizerCacheOpenGL::FlushSurface(const Surface& surface) { surface->FlushGLBuffer(); } -Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { +Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { if (params.addr == 0 || params.height * params.width == 0) { return {}; } @@ -774,9 +775,13 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { } else if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { // Use the cached surface as-is return surface; - } else { - // If surface parameters changed, recreate the surface from the old one + } else if (preserve_contents) { + // If surface parameters changed and we care about keeping the previous data, recreate + // the surface from the old one return RecreateSurface(surface, params); + } else { + // Delete the old surface before creating a new one to prevent collisions. + UnregisterSurface(surface); } } @@ -793,12 +798,58 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface, // Verify surface is compatible for blitting const auto& params{surface->GetSurfaceParams()}; ASSERT(params.type == new_params.type); + ASSERT_MSG(params.GetCompressionFactor(params.pixel_format) == 1, + "Compressed texture reinterpretation is not supported"); // Create a new surface with the new parameters, and blit the previous surface to it Surface new_surface{std::make_shared<CachedSurface>(new_params)}; - BlitTextures(surface->Texture().handle, params.GetRect(), new_surface->Texture().handle, - new_surface->GetSurfaceParams().GetRect(), params.type, read_framebuffer.handle, - draw_framebuffer.handle); + + auto source_format = GetFormatTuple(params.pixel_format, params.component_type); + auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type); + + size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes()); + + // Use a Pixel Buffer Object to download the previous texture and then upload it to the new one + // using the new format. + OGLBuffer pbo; + pbo.Create(); + + glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo.handle); + glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB); + glGetTextureImage(surface->Texture().handle, 0, source_format.format, source_format.type, + params.SizeInBytes(), nullptr); + + // If the new texture is bigger than the previous one, we need to fill in the rest with data + // from the CPU. + if (params.SizeInBytes() < new_params.SizeInBytes()) { + // Upload the rest of the memory. + if (new_params.is_tiled) { + // TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest of + // the data in this case. Games like Super Mario Odyssey seem to hit this case when + // drawing, it re-uses the memory of a previous texture as a bigger framebuffer but it + // doesn't clear it beforehand, the texture is already full of zeros. + LOG_CRITICAL(HW_GPU, "Trying to upload extra texture data from the CPU during " + "reinterpretation but the texture is tiled."); + } + size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes(); + auto address = Core::System::GetInstance().GPU().memory_manager->GpuToCpuAddress( + new_params.addr + params.SizeInBytes()); + std::vector<u8> data(remaining_size); + Memory::ReadBlock(*address, data.data(), data.size()); + glBufferSubData(GL_PIXEL_PACK_BUFFER, params.SizeInBytes(), remaining_size, data.data()); + } + + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + + const auto& dest_rect{new_params.GetRect()}; + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo.handle); + glTextureSubImage2D( + new_surface->Texture().handle, 0, 0, 0, static_cast<GLsizei>(dest_rect.GetWidth()), + static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format, dest_format.type, nullptr); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + + pbo.Release(); // Update cache accordingly UnregisterSurface(surface); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index fc8b44219..907e7d606 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -722,7 +722,8 @@ public: Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); /// Get the color and depth surfaces based on the framebuffer configuration - SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb); + SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, + bool preserve_contents); /// Flushes the surface to Switch memory void FlushSurface(const Surface& surface); @@ -738,7 +739,7 @@ public: private: void LoadSurface(const Surface& surface); - Surface GetSurface(const SurfaceParams& params); + Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true); /// Recreates a surface with new parameters Surface RecreateSurface(const Surface& surface, const SurfaceParams& new_params); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index ac6ccfec7..5b976b636 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -26,6 +26,7 @@ using Tegra::Shader::Sampler; using Tegra::Shader::SubOp; constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; +constexpr u32 PROGRAM_HEADER_SIZE = 0x50; class DecompileFail : public std::runtime_error { public: @@ -439,12 +440,13 @@ public: } declarations.AddNewLine(); - const auto& samplers = GetSamplers(); - for (const auto& sampler : samplers) { - declarations.AddLine("uniform " + sampler.GetTypeString() + ' ' + sampler.GetName() + - ';'); + // Append the sampler2D array for the used textures. + size_t num_samplers = GetSamplers().size(); + if (num_samplers > 0) { + declarations.AddLine("uniform sampler2D " + SamplerEntry::GetArrayName(stage) + '[' + + std::to_string(num_samplers) + "];"); + declarations.AddNewLine(); } - declarations.AddNewLine(); } /// Returns a list of constant buffer declarations @@ -456,14 +458,13 @@ public: } /// Returns a list of samplers used in the shader - const std::vector<SamplerEntry>& GetSamplers() const { + std::vector<SamplerEntry> GetSamplers() const { return used_samplers; } /// Returns the GLSL sampler used for the input shader sampler, and creates a new one if /// necessary. - std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type, - bool is_array) { + std::string AccessSampler(const Sampler& sampler) { size_t offset = static_cast<size_t>(sampler.index.Value()); // If this sampler has already been used, return the existing mapping. @@ -472,13 +473,12 @@ public: [&](const SamplerEntry& entry) { return entry.GetOffset() == offset; }); if (itr != used_samplers.end()) { - ASSERT(itr->GetType() == type && itr->IsArray() == is_array); return itr->GetName(); } // Otherwise create a new mapping for this sampler size_t next_index = used_samplers.size(); - SamplerEntry entry{stage, offset, next_index, type, is_array}; + SamplerEntry entry{stage, offset, next_index}; used_samplers.emplace_back(entry); return entry.GetName(); } @@ -621,6 +621,23 @@ public: } private: + // Shader program header for a Fragment Shader. + struct FragmentHeader { + INSERT_PADDING_WORDS(5); + INSERT_PADDING_WORDS(13); + u32 enabled_color_outputs; + union { + BitField<0, 1, u32> writes_samplemask; + BitField<1, 1, u32> writes_depth; + }; + + bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { + u32 bit = render_target * 4 + component; + return enabled_color_outputs & (1 << bit); + } + }; + static_assert(sizeof(FragmentHeader) == PROGRAM_HEADER_SIZE, "FragmentHeader size is wrong"); + /// Gets the Subroutine object corresponding to the specified address. const Subroutine& GetSubroutine(u32 begin, u32 end) const { auto iter = subroutines.find(Subroutine{begin, end, suffix}); @@ -639,8 +656,8 @@ private: } /// Generates code representing a texture sampler. - std::string GetSampler(const Sampler& sampler, Tegra::Shader::TextureType type, bool is_array) { - return regs.AccessSampler(sampler, type, is_array); + std::string GetSampler(const Sampler& sampler) { + return regs.AccessSampler(sampler); } /** @@ -894,6 +911,36 @@ private: shader.AddLine('}'); } + /// Writes the output values from a fragment shader to the corresponding GLSL output variables. + void EmitFragmentOutputsWrite() { + ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment); + FragmentHeader header; + std::memcpy(&header, program_code.data(), PROGRAM_HEADER_SIZE); + + ASSERT_MSG(header.writes_samplemask == 0, "Samplemask write is unimplemented"); + + // Write the color outputs using the data in the shader registers, disabled + // rendertargets/components are skipped in the register assignment. + u32 current_reg = 0; + for (u32 render_target = 0; render_target < Maxwell3D::Regs::NumRenderTargets; + ++render_target) { + // TODO(Subv): Figure out how dual-source blending is configured in the Switch. + for (u32 component = 0; component < 4; ++component) { + if (header.IsColorComponentOutputEnabled(render_target, component)) { + shader.AddLine(fmt::format("color[{}][{}] = {};", render_target, component, + regs.GetRegisterAsFloat(current_reg))); + ++current_reg; + } + } + } + + if (header.writes_depth) { + // The depth output is always 2 registers after the last color output, and current_reg + // already contains one past the last color register. + shader.AddLine("gl_FragDepth = " + regs.GetRegisterAsFloat(current_reg + 1) + ';'); + } + } + /** * Compiles a single instruction from Tegra to GLSL. * @param offset the offset of the Tegra shader instruction. @@ -1508,29 +1555,10 @@ private: break; } case OpCode::Id::TEX: { - ASSERT_MSG(instr.tex.array == 0, "TEX arrays unimplemented"); - std::string coord{}; - - switch (instr.tex.texture_type) { - case Tegra::Shader::TextureType::Texture2D: { - std::string x = regs.GetRegisterAsFloat(instr.gpr8); - std::string y = regs.GetRegisterAsFloat(instr.gpr20); - coord = "vec2 coords = vec2(" + x + ", " + y + ");"; - break; - } - case Tegra::Shader::TextureType::Texture3D: { - std::string x = regs.GetRegisterAsFloat(instr.gpr8); - std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - std::string z = regs.GetRegisterAsFloat(instr.gpr20); - coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; - break; - } - default: - UNIMPLEMENTED(); - } - - const std::string sampler = - GetSampler(instr.sampler, instr.tex.texture_type, instr.tex.array); + const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); + const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); + const std::string sampler = GetSampler(instr.sampler); + const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");"; // Add an extra scope and declare the texture coords inside to prevent // overwriting them in case they are used as outputs of the texs instruction. shader.AddLine("{"); @@ -1552,60 +1580,20 @@ private: break; } case OpCode::Id::TEXS: { - std::string coord{}; - - switch (instr.texs.GetTextureType()) { - case Tegra::Shader::TextureType::Texture2D: { - if (instr.texs.IsArrayTexture()) { - std::string index = regs.GetRegisterAsInteger(instr.gpr8); - std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - std::string y = regs.GetRegisterAsFloat(instr.gpr20); - coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");"; - } else { - std::string x = regs.GetRegisterAsFloat(instr.gpr8); - std::string y = regs.GetRegisterAsFloat(instr.gpr20); - coord = "vec2 coords = vec2(" + x + ", " + y + ");"; - } - break; - } - case Tegra::Shader::TextureType::TextureCube: { - std::string x = regs.GetRegisterAsFloat(instr.gpr8); - std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); - std::string z = regs.GetRegisterAsFloat(instr.gpr20); - coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");"; - break; - } - default: - UNIMPLEMENTED(); - } - const std::string sampler = GetSampler(instr.sampler, instr.texs.GetTextureType(), - instr.texs.IsArrayTexture()); + const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); + const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20); + const std::string sampler = GetSampler(instr.sampler); + const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");"; const std::string texture = "texture(" + sampler + ", coords)"; WriteTexsInstruction(instr, coord, texture); break; } case OpCode::Id::TLDS: { - ASSERT(instr.tlds.GetTextureType() == Tegra::Shader::TextureType::Texture2D); - ASSERT(instr.tlds.IsArrayTexture() == false); - std::string coord{}; - - switch (instr.tlds.GetTextureType()) { - case Tegra::Shader::TextureType::Texture2D: { - if (instr.tlds.IsArrayTexture()) { - UNIMPLEMENTED(); - } else { - std::string x = regs.GetRegisterAsInteger(instr.gpr8); - std::string y = regs.GetRegisterAsInteger(instr.gpr20); - coord = "ivec2 coords = ivec2(" + x + ", " + y + ");"; - } - break; - } - default: - UNIMPLEMENTED(); - } - const std::string sampler = GetSampler(instr.sampler, instr.tlds.GetTextureType(), - instr.tlds.IsArrayTexture()); + const std::string op_a = regs.GetRegisterAsInteger(instr.gpr8); + const std::string op_b = regs.GetRegisterAsInteger(instr.gpr20); + const std::string sampler = GetSampler(instr.sampler); + const std::string coord = "ivec2 coords = ivec2(" + op_a + ", " + op_b + ");"; const std::string texture = "texelFetch(" + sampler + ", coords, 0)"; WriteTexsInstruction(instr, coord, texture); break; @@ -1623,11 +1611,12 @@ private: break; } default: - UNIMPLEMENTED(); + LOG_CRITICAL(HW_GPU, "Unhandled texture type {}", + static_cast<u32>(instr.tld4.texture_type.Value())); + UNREACHABLE(); } - const std::string sampler = - GetSampler(instr.sampler, instr.tld4.texture_type, instr.tld4.array); + const std::string sampler = GetSampler(instr.sampler); // Add an extra scope and declare the texture coords inside to prevent // overwriting them in case they are used as outputs of the texs instruction. shader.AddLine("{"); @@ -1653,8 +1642,7 @@ private: const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20); // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. - const std::string sampler = - GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false); + const std::string sampler = GetSampler(instr.sampler); const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");"; const std::string texture = "textureGather(" + sampler + ", coords, " + std::to_string(instr.tld4s.component) + ')'; @@ -1960,12 +1948,8 @@ private: default: { switch (opcode->GetId()) { case OpCode::Id::EXIT: { - // Final color output is currently hardcoded to GPR0-3 for fragment shaders if (stage == Maxwell3D::Regs::ShaderStage::Fragment) { - shader.AddLine("color.r = " + regs.GetRegisterAsFloat(0) + ';'); - shader.AddLine("color.g = " + regs.GetRegisterAsFloat(1) + ';'); - shader.AddLine("color.b = " + regs.GetRegisterAsFloat(2) + ';'); - shader.AddLine("color.a = " + regs.GetRegisterAsFloat(3) + ';'); + EmitFragmentOutputsWrite(); } switch (instr.flow.cond) { diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 57e0e1726..01c7b9720 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -87,7 +87,7 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSCo .get_value_or({}); out += R"( in vec4 position; -out vec4 color; +layout(location = 0) out vec4 color[8]; layout (std140) uniform fs_config { vec4 viewport_flip; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index db48da645..4729ce0fc 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -11,7 +11,6 @@ #include <vector> #include "common/common_types.h" #include "common/hash.h" -#include "video_core/engines/shader_bytecode.h" namespace GLShader { @@ -73,9 +72,8 @@ class SamplerEntry { using Maxwell = Tegra::Engines::Maxwell3D::Regs; public: - SamplerEntry(Maxwell::ShaderStage stage, size_t offset, size_t index, - Tegra::Shader::TextureType type, bool is_array) - : offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array) {} + SamplerEntry(Maxwell::ShaderStage stage, size_t offset, size_t index) + : offset(offset), stage(stage), sampler_index(index) {} size_t GetOffset() const { return offset; @@ -90,41 +88,8 @@ public: } std::string GetName() const { - return std::string(TextureSamplerNames[static_cast<size_t>(stage)]) + '_' + - std::to_string(sampler_index); - } - - std::string GetTypeString() const { - using Tegra::Shader::TextureType; - std::string glsl_type; - - switch (type) { - case TextureType::Texture1D: - glsl_type = "sampler1D"; - break; - case TextureType::Texture2D: - glsl_type = "sampler2D"; - break; - case TextureType::Texture3D: - glsl_type = "sampler3D"; - break; - case TextureType::TextureCube: - glsl_type = "samplerCube"; - break; - default: - UNIMPLEMENTED(); - } - if (is_array) - glsl_type += "Array"; - return glsl_type; - } - - Tegra::Shader::TextureType GetType() const { - return type; - } - - bool IsArray() const { - return is_array; + return std::string(TextureSamplerNames[static_cast<size_t>(stage)]) + '[' + + std::to_string(sampler_index) + ']'; } static std::string GetArrayName(Maxwell::ShaderStage stage) { @@ -135,14 +100,11 @@ private: static constexpr std::array<const char*, Maxwell::MaxShaderStage> TextureSamplerNames = { "tex_vs", "tex_tessc", "tex_tesse", "tex_gs", "tex_fs", }; - /// Offset in TSC memory from which to read the sampler object, as specified by the sampling /// instruction. size_t offset; - Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used. - size_t sampler_index; ///< Value used to index into the generated GLSL sampler array. - Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc) - bool is_array; ///< Whether the texture is being sampled as an array texture or not. + Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used. + size_t sampler_index; ///< Value used to index into the generated GLSL sampler array. }; struct ShaderEntries { diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 1d1975179..13399ceb8 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -45,7 +45,8 @@ OpenGLState::OpenGLState() { blend.color.blue = 0.0f; blend.color.alpha = 0.0f; - logic_op = GL_COPY; + logic_op.enabled = false; + logic_op.operation = GL_COPY; for (auto& texture_unit : texture_units) { texture_unit.Reset(); @@ -148,11 +149,10 @@ void OpenGLState::Apply() const { // Blending if (blend.enabled != cur_state.blend.enabled) { if (blend.enabled) { + ASSERT(!logic_op.enabled); glEnable(GL_BLEND); - glDisable(GL_COLOR_LOGIC_OP); } else { glDisable(GL_BLEND); - glEnable(GL_COLOR_LOGIC_OP); } } @@ -176,8 +176,18 @@ void OpenGLState::Apply() const { glBlendEquationSeparate(blend.rgb_equation, blend.a_equation); } - if (logic_op != cur_state.logic_op) { - glLogicOp(logic_op); + // Logic Operation + if (logic_op.enabled != cur_state.logic_op.enabled) { + if (logic_op.enabled) { + ASSERT(!blend.enabled); + glEnable(GL_COLOR_LOGIC_OP); + } else { + glDisable(GL_COLOR_LOGIC_OP); + } + } + + if (logic_op.operation != cur_state.logic_op.operation) { + glLogicOp(logic_op.operation); } // Textures diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index bdb02ba25..219b65a8a 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -83,7 +83,10 @@ public: } color; // GL_BLEND_COLOR } blend; - GLenum logic_op; // GL_LOGIC_OP_MODE + struct { + bool enabled; // GL_LOGIC_OP_MODE + GLenum operation; + } logic_op; // 3 texture units - one for each that is used in PICA fragment shader emulation struct TextureUnit { diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 45592daaf..3b0cdf6bc 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#pragma once + #include <tuple> #include <glad/glad.h> #include "common/common_types.h" diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 5d91a0c2f..0d55b3e17 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -107,6 +107,8 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { switch (topology) { case Maxwell::PrimitiveTopology::Points: return GL_POINTS; + case Maxwell::PrimitiveTopology::Lines: + return GL_LINES; case Maxwell::PrimitiveTopology::LineStrip: return GL_LINE_STRIP; case Maxwell::PrimitiveTopology::Triangles: @@ -317,4 +319,44 @@ inline GLenum CullFace(Maxwell::Cull::CullFace cull_face) { return {}; } +inline GLenum LogicOp(Maxwell::LogicOperation operation) { + switch (operation) { + case Maxwell::LogicOperation::Clear: + return GL_CLEAR; + case Maxwell::LogicOperation::And: + return GL_AND; + case Maxwell::LogicOperation::AndReverse: + return GL_AND_REVERSE; + case Maxwell::LogicOperation::Copy: + return GL_COPY; + case Maxwell::LogicOperation::AndInverted: + return GL_AND_INVERTED; + case Maxwell::LogicOperation::NoOp: + return GL_NOOP; + case Maxwell::LogicOperation::Xor: + return GL_XOR; + case Maxwell::LogicOperation::Or: + return GL_OR; + case Maxwell::LogicOperation::Nor: + return GL_NOR; + case Maxwell::LogicOperation::Equiv: + return GL_EQUIV; + case Maxwell::LogicOperation::Invert: + return GL_INVERT; + case Maxwell::LogicOperation::OrReverse: + return GL_OR_REVERSE; + case Maxwell::LogicOperation::CopyInverted: + return GL_COPY_INVERTED; + case Maxwell::LogicOperation::OrInverted: + return GL_OR_INVERTED; + case Maxwell::LogicOperation::Nand: + return GL_NAND; + case Maxwell::LogicOperation::Set: + return GL_SET; + } + LOG_CRITICAL(Render_OpenGL, "Unimplemented logic operation={}", static_cast<u32>(operation)); + UNREACHABLE(); + return {}; +} + } // namespace MaxwellToGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index bf30eda6d..eef13dddc 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -16,6 +16,7 @@ #include "core/memory.h" #include "core/settings.h" #include "core/tracer/recorder.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/utils.h" @@ -130,7 +131,7 @@ void RendererOpenGL::SwapBuffers(boost::optional<const Tegra::FramebufferConfig& } // Load the framebuffer from memory, draw it to the screen, and swap buffers - LoadFBToScreenInfo(*framebuffer, screen_info); + LoadFBToScreenInfo(*framebuffer); DrawScreen(); render_window.SwapBuffers(); } @@ -142,14 +143,12 @@ void RendererOpenGL::SwapBuffers(boost::optional<const Tegra::FramebufferConfig& // Restore the rasterizer state prev_state.Apply(); - RefreshRasterizerSetting(); } /** * Loads framebuffer from emulated memory into the active OpenGL texture. */ -void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer, - ScreenInfo& screen_info) { +void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) { const u32 bytes_per_pixel{Tegra::FramebufferConfig::BytesPerPixel(framebuffer.pixel_format)}; const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel}; const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; @@ -162,8 +161,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf // only allows rows to have a memory alignement of 4. ASSERT(framebuffer.stride % 4 == 0); - if (!rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride, - screen_info)) { + if (!rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) { // Reset the screen info's display texture to its own permanent texture screen_info.display_texture = screen_info.texture.resource.handle; @@ -276,6 +274,14 @@ void RendererOpenGL::InitOpenGLObjects() { LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); } +void RendererOpenGL::CreateRasterizer() { + if (rasterizer) { + return; + } + + rasterizer = std::make_unique<RasterizerOpenGL>(render_window, screen_info); +} + void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, const Tegra::FramebufferConfig& framebuffer) { @@ -432,7 +438,7 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum break; case GL_DEBUG_SEVERITY_NOTIFICATION: case GL_DEBUG_SEVERITY_LOW: - LOG_TRACE(Render_OpenGL, format, str_source, str_type, id, message); + LOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message); break; } } @@ -463,8 +469,7 @@ bool RendererOpenGL::Init() { } InitOpenGLObjects(); - - RefreshRasterizerSetting(); + CreateRasterizer(); return true; } diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index a5eab6997..7ae103f04 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -59,6 +59,8 @@ public: private: void InitOpenGLObjects(); + void CreateRasterizer(); + void ConfigureFramebufferTexture(TextureInfo& texture, const Tegra::FramebufferConfig& framebuffer); void DrawScreen(); @@ -66,7 +68,7 @@ private: void UpdateFramerate(); // Loads framebuffer from emulated memory into the display information structure - void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer, ScreenInfo& screen_info); + void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer); // Fills active OpenGL texture with the given RGBA color. void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, const TextureInfo& texture); |