diff options
Diffstat (limited to 'src/video_core')
25 files changed, 914 insertions, 663 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 31ea3adad..dc485e811 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -29,10 +29,10 @@ enum class BufferMethods { }; void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params) { - LOG_WARNING(HW_GPU, - "Processing method {:08X} on subchannel {} value " - "{:08X} remaining params {}", - method, subchannel, value, remaining_params); + LOG_TRACE(HW_GPU, + "Processing method {:08X} on subchannel {} value " + "{:08X} remaining params {}", + method, subchannel, value, remaining_params); if (method == static_cast<u32>(BufferMethods::BindObject)) { // Bind the current subchannel to the desired engine id. diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h index f7214ffec..a01153e0b 100644 --- a/src/video_core/command_processor.h +++ b/src/video_core/command_processor.h @@ -30,8 +30,7 @@ union CommandHeader { BitField<29, 3, SubmissionMode> mode; }; -static_assert(std::is_standard_layout<CommandHeader>::value == true, - "CommandHeader does not use standard layout"); +static_assert(std::is_standard_layout_v<CommandHeader>, "CommandHeader is not standard layout"); static_assert(sizeof(CommandHeader) == sizeof(u32), "CommandHeader has incorrect size!"); } // namespace Tegra diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index a235b543e..a46ed4bd7 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -23,12 +23,17 @@ Maxwell3D::Maxwell3D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& : memory_manager(memory_manager), rasterizer{rasterizer}, macro_interpreter(*this) {} void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { - auto macro_code = uploaded_macros.find(method); + // Reset the current macro. + executing_macro = 0; + // The requested macro must have been uploaded already. - ASSERT_MSG(macro_code != uploaded_macros.end(), "Macro %08X was not uploaded", method); + auto macro_code = uploaded_macros.find(method); + if (macro_code == uploaded_macros.end()) { + LOG_ERROR(HW_GPU, "Macro {:04X} was not uploaded", method); + return; + } - // Reset the current macro and execute it. - executing_macro = 0; + // Execute the current macro. macro_interpreter.Execute(macro_code->second, std::move(parameters)); } @@ -238,6 +243,8 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) { auto& buffer = shader.const_buffers[bind_data.index]; + ASSERT(bind_data.index < Regs::MaxConstBuffers); + buffer.enabled = bind_data.valid.Value() != 0; buffer.index = bind_data.index; buffer.address = regs.const_buffer.BufferAddress(); @@ -285,8 +292,6 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { // TODO(Subv): Different data types for separate components are not supported ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); - // TODO(Subv): Only UNORM formats are supported for now. - ASSERT(r_type == Texture::ComponentType::UNORM); return tic_entry; } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 4d0ff96a5..1b30ce018 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -44,7 +44,7 @@ public: static constexpr size_t MaxShaderProgram = 6; static constexpr size_t MaxShaderStage = 5; // Maximum number of const buffers per shader stage. - static constexpr size_t MaxConstBuffers = 16; + static constexpr size_t MaxConstBuffers = 18; enum class QueryMode : u32 { Write = 0, @@ -93,6 +93,7 @@ public: struct VertexAttribute { enum class Size : u32 { + Invalid = 0x0, Size_32_32_32_32 = 0x01, Size_32_32_32 = 0x02, Size_16_16_16_16 = 0x03, @@ -257,6 +258,10 @@ public: bool IsNormalized() const { return (type == Type::SignedNorm) || (type == Type::UnsignedNorm); } + + bool IsValid() const { + return size != Size::Invalid; + } }; enum class PrimitiveTopology : u32 { @@ -352,6 +357,27 @@ public: OneMinusConstantColor = 0x62, ConstantAlpha = 0x63, OneMinusConstantAlpha = 0x64, + + // These values are used by Nouveau and some games. + ZeroGL = 0x4000, + OneGL = 0x4001, + SourceColorGL = 0x4300, + OneMinusSourceColorGL = 0x4301, + SourceAlphaGL = 0x4302, + OneMinusSourceAlphaGL = 0x4303, + DestAlphaGL = 0x4304, + OneMinusDestAlphaGL = 0x4305, + DestColorGL = 0x4306, + OneMinusDestColorGL = 0x4307, + SourceAlphaSaturateGL = 0x4308, + ConstantColorGL = 0xc001, + OneMinusConstantColorGL = 0xc002, + ConstantAlphaGL = 0xc003, + OneMinusConstantAlphaGL = 0xc004, + Source1ColorGL = 0xc900, + OneMinusSource1ColorGL = 0xc901, + Source1AlphaGL = 0xc902, + OneMinusSource1AlphaGL = 0xc903, }; u32 separate_alpha; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index c7e3fb4b1..2526ebf28 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -74,6 +74,7 @@ union Attribute { enum class Index : u64 { Position = 7, Attribute_0 = 8, + Attribute_31 = 39, // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval // shader. @@ -199,6 +200,14 @@ enum class IMinMaxExchange : u64 { XHi = 3, }; +enum class XmadMode : u64 { + None = 0, + CLo = 1, + CHi = 2, + CSfu = 3, + CBcc = 4, +}; + enum class FlowCondition : u64 { Always = 0xF, Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for? @@ -254,20 +263,15 @@ union Instruction { BitField<56, 1, u64> invert_b; } lop32i; - float GetImm20_19() const { - float result{}; + u32 GetImm20_19() const { u32 imm{static_cast<u32>(imm20_19)}; imm <<= 12; imm |= negate_imm ? 0x80000000 : 0; - std::memcpy(&result, &imm, sizeof(imm)); - return result; + return imm; } - float GetImm20_32() const { - float result{}; - s32 imm{static_cast<s32>(imm20_32)}; - std::memcpy(&result, &imm, sizeof(imm)); - return result; + u32 GetImm20_32() const { + return static_cast<u32>(imm20_32); } s32 GetSignedImm20_20() const { @@ -461,6 +465,18 @@ union Instruction { } bra; union { + BitField<20, 16, u64> imm20_16; + BitField<36, 1, u64> product_shift_left; + BitField<37, 1, u64> merge_37; + BitField<48, 1, u64> sign_a; + BitField<49, 1, u64> sign_b; + BitField<50, 3, XmadMode> mode; + BitField<52, 1, u64> high_b; + BitField<53, 1, u64> high_a; + BitField<56, 1, u64> merge_56; + } xmad; + + union { BitField<20, 14, u64> offset; BitField<34, 5, u64> index; } cbuf34; @@ -480,8 +496,7 @@ union Instruction { u64 value; }; static_assert(sizeof(Instruction) == 0x8, "Incorrect structure size"); -static_assert(std::is_standard_layout<Instruction>::value, - "Structure does not have standard layout"); +static_assert(std::is_standard_layout_v<Instruction>, "Instruction is not standard layout"); class OpCode { public: @@ -598,9 +613,17 @@ public: IntegerSetPredicate, PredicateSetPredicate, Conversion, + Xmad, Unknown, }; + /// Returns whether an opcode has an execution predicate field or not (ie, whether it can be + /// conditionally executed). + static bool IsPredicatedInstruction(Id opcode) { + // TODO(Subv): Add the rest of unpredicated instructions. + return opcode != Id::SSY; + } + class Matcher { public: Matcher(const char* const name, u16 mask, u16 expected, OpCode::Id id, OpCode::Type type) @@ -780,10 +803,10 @@ private: INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"), INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"), INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), - INST("0011011-00------", Id::XMAD_IMM, Type::Arithmetic, "XMAD_IMM"), - INST("0100111---------", Id::XMAD_CR, Type::Arithmetic, "XMAD_CR"), - INST("010100010-------", Id::XMAD_RC, Type::Arithmetic, "XMAD_RC"), - INST("0101101100------", Id::XMAD_RR, Type::Arithmetic, "XMAD_RR"), + INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), + INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), + INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), + INST("0101101100------", Id::XMAD_RR, Type::Xmad, "XMAD_RR"), }; #undef INST std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) { diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index b2a83ce0b..5a593c1f7 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/assert.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_compute.h" @@ -11,6 +12,15 @@ namespace Tegra { +u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { + switch (format) { + case PixelFormat::ABGR8: + return 4; + } + + UNREACHABLE(); +} + GPU::GPU(VideoCore::RasterizerInterface& rasterizer) { memory_manager = std::make_unique<MemoryManager>(); maxwell_3d = std::make_unique<Engines::Maxwell3D>(rasterizer, *memory_manager); @@ -34,18 +44,59 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { switch (format) { case RenderTargetFormat::RGBA32_FLOAT: + case RenderTargetFormat::RGBA32_UINT: return 16; + case RenderTargetFormat::RGBA16_UINT: + case RenderTargetFormat::RGBA16_UNORM: case RenderTargetFormat::RGBA16_FLOAT: case RenderTargetFormat::RG32_FLOAT: + case RenderTargetFormat::RG32_UINT: return 8; case RenderTargetFormat::RGBA8_UNORM: + case RenderTargetFormat::RGBA8_SNORM: + case RenderTargetFormat::RGBA8_SRGB: case RenderTargetFormat::RGB10_A2_UNORM: case RenderTargetFormat::BGRA8_UNORM: + case RenderTargetFormat::RG16_UNORM: + case RenderTargetFormat::RG16_SNORM: + case RenderTargetFormat::RG16_UINT: + case RenderTargetFormat::RG16_SINT: + case RenderTargetFormat::RG16_FLOAT: case RenderTargetFormat::R32_FLOAT: + case RenderTargetFormat::R11G11B10_FLOAT: + case RenderTargetFormat::R32_UINT: return 4; + case RenderTargetFormat::R16_UNORM: + case RenderTargetFormat::R16_SNORM: + case RenderTargetFormat::R16_UINT: + case RenderTargetFormat::R16_SINT: + case RenderTargetFormat::R16_FLOAT: + case RenderTargetFormat::RG8_UNORM: + case RenderTargetFormat::RG8_SNORM: + return 2; + case RenderTargetFormat::R8_UNORM: + case RenderTargetFormat::R8_UINT: + return 1; default: UNIMPLEMENTED_MSG("Unimplemented render target format {}", static_cast<u32>(format)); } } +u32 DepthFormatBytesPerPixel(DepthFormat format) { + switch (format) { + case DepthFormat::Z32_S8_X24_FLOAT: + return 8; + case DepthFormat::Z32_FLOAT: + case DepthFormat::S8_Z24_UNORM: + case DepthFormat::Z24_X8_UNORM: + case DepthFormat::Z24_S8_UNORM: + case DepthFormat::Z24_C8_UNORM: + return 4; + case DepthFormat::Z16_UNORM: + return 2; + default: + UNIMPLEMENTED_MSG("Unimplemented Depth format {}", static_cast<u32>(format)); + } +} + } // namespace Tegra diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 440505c9d..97dcccb92 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -6,7 +6,6 @@ #include <memory> #include <unordered_map> -#include <vector> #include "common/common_types.h" #include "core/hle/service/nvflinger/buffer_queue.h" #include "video_core/memory_manager.h" @@ -21,21 +20,34 @@ enum class RenderTargetFormat : u32 { NONE = 0x0, RGBA32_FLOAT = 0xC0, RGBA32_UINT = 0xC2, + RGBA16_UNORM = 0xC6, + RGBA16_UINT = 0xC9, RGBA16_FLOAT = 0xCA, RG32_FLOAT = 0xCB, + RG32_UINT = 0xCD, BGRA8_UNORM = 0xCF, RGB10_A2_UNORM = 0xD1, RGBA8_UNORM = 0xD5, RGBA8_SRGB = 0xD6, + RGBA8_SNORM = 0xD7, RG16_UNORM = 0xDA, RG16_SNORM = 0xDB, RG16_SINT = 0xDC, RG16_UINT = 0xDD, RG16_FLOAT = 0xDE, R11G11B10_FLOAT = 0xE0, + R32_UINT = 0xE4, R32_FLOAT = 0xE5, + B5G6R5_UNORM = 0xE8, + RG8_UNORM = 0xEA, + RG8_SNORM = 0xEB, + R16_UNORM = 0xEE, + R16_SNORM = 0xEF, + R16_SINT = 0xF0, + R16_UINT = 0xF1, R16_FLOAT = 0xF2, R8_UNORM = 0xF3, + R8_UINT = 0xF6, }; enum class DepthFormat : u32 { @@ -51,6 +63,9 @@ enum class DepthFormat : u32 { /// Returns the number of bytes per pixel of each rendertarget format. u32 RenderTargetBytesPerPixel(RenderTargetFormat format); +/// Returns the number of bytes per pixel of each depth format. +u32 DepthFormatBytesPerPixel(DepthFormat format); + class DebugContext; /** @@ -64,14 +79,7 @@ struct FramebufferConfig { /** * Returns the number of bytes per pixel. */ - static u32 BytesPerPixel(PixelFormat format) { - switch (format) { - case PixelFormat::ABGR8: - return 4; - } - - UNREACHABLE(); - } + static u32 BytesPerPixel(PixelFormat format); VAddr address; u32 offset; diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index 3ca350243..afd86a83a 100644 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp @@ -4,18 +4,23 @@ #include <memory> #include "core/frontend/emu_window.h" +#include "core/settings.h" #include "video_core/renderer_base.h" #include "video_core/renderer_opengl/gl_rasterizer.h" namespace VideoCore { -RendererBase::RendererBase(EmuWindow& window) : render_window{window} {} +RendererBase::RendererBase(Core::Frontend::EmuWindow& window) : render_window{window} { + RefreshBaseSettings(); +} + RendererBase::~RendererBase() = default; -void RendererBase::UpdateCurrentFramebufferLayout() { - const Layout::FramebufferLayout& layout = render_window.GetFramebufferLayout(); +void RendererBase::RefreshBaseSettings() { + RefreshRasterizerSetting(); + UpdateCurrentFramebufferLayout(); - render_window.UpdateCurrentFramebufferLayout(layout.width, layout.height); + renderer_settings.use_framelimiter = Settings::values.toggle_framelimit; } void RendererBase::RefreshRasterizerSetting() { @@ -24,4 +29,10 @@ void RendererBase::RefreshRasterizerSetting() { } } +void RendererBase::UpdateCurrentFramebufferLayout() { + const Layout::FramebufferLayout& layout = render_window.GetFramebufferLayout(); + + render_window.UpdateCurrentFramebufferLayout(layout.width, layout.height); +} + } // namespace VideoCore diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 235de23a1..d9f16b8e6 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -4,23 +4,26 @@ #pragma once +#include <atomic> #include <memory> #include <boost/optional.hpp> -#include "common/assert.h" #include "common/common_types.h" #include "video_core/gpu.h" #include "video_core/rasterizer_interface.h" +namespace Core::Frontend { class EmuWindow; +} namespace VideoCore { +struct RendererSettings { + std::atomic_bool use_framelimiter{false}; +}; + class RendererBase : NonCopyable { public: - /// Used to reference a framebuffer - enum kFramebuffer { kFramebuffer_VirtualXFB = 0, kFramebuffer_EFB, kFramebuffer_Texture }; - - explicit RendererBase(EmuWindow& window); + explicit RendererBase(Core::Frontend::EmuWindow& window); virtual ~RendererBase(); /// Swap buffers (render frame) @@ -32,9 +35,6 @@ public: /// Shutdown the renderer virtual void ShutDown() = 0; - /// Updates the framebuffer layout of the contained render window handle. - void UpdateCurrentFramebufferLayout(); - // Getter/setter functions: // ------------------------ @@ -54,13 +54,23 @@ public: return *rasterizer; } - void RefreshRasterizerSetting(); + /// Refreshes the settings common to all renderers + void RefreshBaseSettings(); protected: - EmuWindow& render_window; ///< Reference to the render window handle. + /// Refreshes settings specific to the rasterizer. + void RefreshRasterizerSetting(); + + Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle. std::unique_ptr<RasterizerInterface> rasterizer; f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer int m_current_frame = 0; ///< Current frame, should be set by the renderer + + RendererSettings renderer_settings; + +private: + /// Updates the framebuffer layout of the contained render window handle. + void UpdateCurrentFramebufferLayout(); }; } // namespace VideoCore diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c2a931469..52a649e2f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -36,30 +36,21 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); -RasterizerOpenGL::RasterizerOpenGL(EmuWindow& window) : emu_window{window} { +RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) + : emu_window{window}, stream_buffer(GL_ARRAY_BUFFER, STREAM_BUFFER_SIZE) { // Create sampler objects for (size_t i = 0; i < texture_samplers.size(); ++i) { texture_samplers[i].Create(); state.texture_units[i].sampler = texture_samplers[i].sampler.handle; } - // Create SSBOs - for (size_t stage = 0; stage < ssbos.size(); ++stage) { - for (size_t buffer = 0; buffer < ssbos[stage].size(); ++buffer) { - ssbos[stage][buffer].Create(); - state.draw.const_buffers[stage][buffer].ssbo = ssbos[stage][buffer].handle; - } - } - GLint ext_num; glGetIntegerv(GL_NUM_EXTENSIONS, &ext_num); for (GLint i = 0; i < ext_num; i++) { const std::string_view extension{ reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i))}; - if (extension == "GL_ARB_buffer_storage") { - has_ARB_buffer_storage = true; - } else if (extension == "GL_ARB_direct_state_access") { + if (extension == "GL_ARB_direct_state_access") { has_ARB_direct_state_access = true; } else if (extension == "GL_ARB_separate_shader_objects") { has_ARB_separate_shader_objects = true; @@ -86,47 +77,31 @@ RasterizerOpenGL::RasterizerOpenGL(EmuWindow& window) : emu_window{window} { hw_vao.Create(); - stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); - stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2); - state.draw.vertex_buffer = stream_buffer->GetHandle(); + state.draw.vertex_buffer = stream_buffer.GetHandle(); shader_program_manager = std::make_unique<GLShader::ProgramManager>(); state.draw.shader_program = 0; state.draw.vertex_array = hw_vao.handle; state.Apply(); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); - - for (unsigned index = 0; index < uniform_buffers.size(); ++index) { - auto& buffer = uniform_buffers[index]; - buffer.Create(); - glBindBuffer(GL_UNIFORM_BUFFER, buffer.handle); - glBufferData(GL_UNIFORM_BUFFER, sizeof(GLShader::MaxwellUniformData), nullptr, - GL_STREAM_COPY); - glBindBufferBase(GL_UNIFORM_BUFFER, index, buffer.handle); - } + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer.GetHandle()); glEnable(GL_BLEND); + glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment); + LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); } -RasterizerOpenGL::~RasterizerOpenGL() { - if (stream_buffer != nullptr) { - state.draw.vertex_buffer = stream_buffer->GetHandle(); - state.Apply(); - stream_buffer->Release(); - } -} +RasterizerOpenGL::~RasterizerOpenGL() {} std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset) { MICROPROFILE_SCOPE(OpenGL_VAO); const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; - const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager; state.draw.vertex_array = hw_vao.handle; - state.draw.vertex_buffer = stream_buffer->GetHandle(); + state.draw.vertex_buffer = stream_buffer.GetHandle(); state.Apply(); // Upload all guest vertex arrays sequentially to our buffer @@ -141,16 +116,15 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, ASSERT(end > start); u64 size = end - start + 1; - // Copy vertex array data - Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size); + GLintptr vertex_buffer_offset; + std::tie(array_ptr, buffer_offset, vertex_buffer_offset) = + UploadMemory(array_ptr, buffer_offset, start, size); // Bind the vertex array to the buffer at the current offset. - glBindVertexBuffer(index, stream_buffer->GetHandle(), buffer_offset, vertex_array.stride); + glBindVertexBuffer(index, stream_buffer.GetHandle(), vertex_buffer_offset, + vertex_array.stride); ASSERT_MSG(vertex_array.divisor == 0, "Vertex buffer divisor unimplemented"); - - array_ptr += size; - buffer_offset += size; } // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. @@ -161,11 +135,16 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, // assume every shader uses them all. for (unsigned index = 0; index < 16; ++index) { auto& attrib = regs.vertex_attrib_format[index]; - LOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", + + // Ignore invalid attributes. + if (!attrib.IsValid()) + continue; + + auto& buffer = regs.vertex_array[attrib.buffer]; + LOG_TRACE(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), attrib.offset.Value(), attrib.IsNormalized()); - auto& buffer = regs.vertex_array[attrib.buffer]; ASSERT(buffer.IsEnabled()); glEnableVertexAttribArray(index); @@ -196,22 +175,12 @@ static GLShader::ProgramCode GetShaderProgramCode(Maxwell::ShaderProgram program return program_code; } -void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { - // Helper function for uploading uniform data - const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { - if (has_ARB_direct_state_access) { - glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size); - } else { - glBindBuffer(GL_COPY_WRITE_BUFFER, handle); - glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size); - } - }; - +std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); // Next available bindpoints to use when uploading the const buffers and textures to the GLSL // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points. - u32 current_constbuffer_bindpoint = uniform_buffers.size(); + u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; u32 current_texture_bindpoint = 0; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { @@ -223,22 +192,21 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { continue; } + std::tie(buffer_ptr, buffer_offset) = + AlignBuffer(buffer_ptr, buffer_offset, static_cast<size_t>(uniform_buffer_alignment)); + const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 GLShader::MaxwellUniformData ubo{}; ubo.SetFromRegs(gpu.state.shader_stages[stage]); std::memcpy(buffer_ptr, &ubo, sizeof(ubo)); - // Flush the buffer so that the GPU can see the data we just wrote. - glFlushMappedBufferRange(GL_ARRAY_BUFFER, buffer_offset, sizeof(ubo)); + // Bind the buffer + glBindBufferRange(GL_UNIFORM_BUFFER, stage, stream_buffer.GetHandle(), buffer_offset, + sizeof(ubo)); - // Upload uniform data as one UBO per stage - const GLintptr ubo_offset = buffer_offset; - copy_buffer(uniform_buffers[stage].handle, ubo_offset, - sizeof(GLShader::MaxwellUniformData)); - - buffer_ptr += sizeof(GLShader::MaxwellUniformData); - buffer_offset += sizeof(GLShader::MaxwellUniformData); + buffer_ptr += sizeof(ubo); + buffer_offset += sizeof(ubo); GLShader::ShaderSetup setup{GetShaderProgramCode(program)}; GLShader::ShaderEntries shader_resources; @@ -277,9 +245,9 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { static_cast<Maxwell::ShaderStage>(stage)); // Configure the const buffers for this shader stage. - current_constbuffer_bindpoint = - SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program, - current_constbuffer_bindpoint, shader_resources.const_buffer_entries); + std::tie(buffer_ptr, buffer_offset, current_constbuffer_bindpoint) = SetupConstBuffers( + buffer_ptr, buffer_offset, static_cast<Maxwell::ShaderStage>(stage), gl_stage_program, + current_constbuffer_bindpoint, shader_resources.const_buffer_entries); // Configure the textures for this shader stage. current_texture_bindpoint = @@ -294,6 +262,8 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { } shader_program_manager->UseTrivialGeometryShader(); + + return {buffer_ptr, buffer_offset}; } size_t RasterizerOpenGL::CalculateVertexArraysSize() const { @@ -324,11 +294,14 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c bool using_depth_fb) { const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; + if (regs.rt[0].format == Tegra::RenderTargetFormat::NONE) { + LOG_ERROR(HW_GPU, "RenderTargetFormat is not configured"); + using_color_fb = false; + } + // TODO(bunnei): Implement this const bool has_stencil = false; - const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()}; - const bool write_color_fb = state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE || state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE; @@ -341,9 +314,10 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c Surface depth_surface; MathUtil::Rectangle<u32> surfaces_rect; std::tie(color_surface, depth_surface, surfaces_rect) = - res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect); + res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb); - MathUtil::Rectangle<u32> draw_rect{ + const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()}; + const MathUtil::Rectangle<u32> draw_rect{ static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left, surfaces_rect.left, surfaces_rect.right)), // Left static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top, @@ -423,6 +397,31 @@ void RasterizerOpenGL::Clear() { } } +std::pair<u8*, GLintptr> RasterizerOpenGL::AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset, + size_t alignment) { + // Align the offset, not the mapped pointer + GLintptr offset_aligned = + static_cast<GLintptr>(Common::AlignUp(static_cast<size_t>(buffer_offset), alignment)); + return {buffer_ptr + (offset_aligned - buffer_offset), offset_aligned}; +} + +std::tuple<u8*, GLintptr, GLintptr> RasterizerOpenGL::UploadMemory(u8* buffer_ptr, + GLintptr buffer_offset, + Tegra::GPUVAddr gpu_addr, + size_t size, size_t alignment) { + std::tie(buffer_ptr, buffer_offset) = AlignBuffer(buffer_ptr, buffer_offset, alignment); + GLintptr uploaded_offset = buffer_offset; + + const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager; + const boost::optional<VAddr> cpu_addr{memory_manager->GpuToCpuAddress(gpu_addr)}; + Memory::ReadBlock(*cpu_addr, buffer_ptr, size); + + buffer_ptr += size; + buffer_offset += size; + + return {buffer_ptr, buffer_offset, uploaded_offset}; +} + void RasterizerOpenGL::DrawArrays() { if (accelerate_draw == AccelDraw::Disabled) return; @@ -447,7 +446,7 @@ void RasterizerOpenGL::DrawArrays() { const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()}; const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count}; - state.draw.vertex_buffer = stream_buffer->GetHandle(); + state.draw.vertex_buffer = stream_buffer.GetHandle(); state.Apply(); size_t buffer_size = CalculateVertexArraysSize(); @@ -457,41 +456,31 @@ void RasterizerOpenGL::DrawArrays() { } // Uniform space for the 5 shader stages - buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + - sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage; + buffer_size = + Common::AlignUp<size_t>(buffer_size, 4) + + (sizeof(GLShader::MaxwellUniformData) + uniform_buffer_alignment) * Maxwell::MaxShaderStage; + + // Add space for at least 18 constant buffers + buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); u8* buffer_ptr; GLintptr buffer_offset; - std::tie(buffer_ptr, buffer_offset) = - stream_buffer->Map(static_cast<GLsizeiptr>(buffer_size), 4); + std::tie(buffer_ptr, buffer_offset, std::ignore) = + stream_buffer.Map(static_cast<GLsizeiptr>(buffer_size), 4); + u8* buffer_ptr_base = buffer_ptr; - u8* offseted_buffer; - std::tie(offseted_buffer, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset); - - offseted_buffer = - reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4)); - buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4); + std::tie(buffer_ptr, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset); // If indexed mode, copy the index buffer GLintptr index_buffer_offset = 0; if (is_indexed) { - const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager; - const boost::optional<VAddr> index_data_addr{ - memory_manager->GpuToCpuAddress(regs.index_array.StartAddress())}; - Memory::ReadBlock(*index_data_addr, offseted_buffer, index_buffer_size); - - index_buffer_offset = buffer_offset; - offseted_buffer += index_buffer_size; - buffer_offset += index_buffer_size; + std::tie(buffer_ptr, buffer_offset, index_buffer_offset) = UploadMemory( + buffer_ptr, buffer_offset, regs.index_array.StartAddress(), index_buffer_size); } - offseted_buffer = - reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4)); - buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4); - - SetupShaders(offseted_buffer, buffer_offset); + std::tie(buffer_ptr, buffer_offset) = SetupShaders(buffer_ptr, buffer_offset); - stream_buffer->Unmap(); + stream_buffer.Unmap(buffer_ptr - buffer_ptr_base); shader_program_manager->ApplyTo(state); state.Apply(); @@ -638,32 +627,22 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr } } -u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint program, - u32 current_bindpoint, - const std::vector<GLShader::ConstBufferEntry>& entries) { +std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers( + u8* buffer_ptr, GLintptr buffer_offset, Maxwell::ShaderStage stage, GLuint program, + u32 current_bindpoint, const std::vector<GLShader::ConstBufferEntry>& entries) { const auto& gpu = Core::System::GetInstance().GPU(); const auto& maxwell3d = gpu.Maxwell3D(); - // Reset all buffer draw state for this stage. - for (auto& buffer : state.draw.const_buffers[static_cast<size_t>(stage)]) { - buffer.bindpoint = 0; - buffer.enabled = false; - } - // Upload only the enabled buffers from the 16 constbuffers of each shader stage const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)]; for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& used_buffer = entries[bindpoint]; const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()]; - auto& buffer_draw_state = - state.draw.const_buffers[static_cast<size_t>(stage)][used_buffer.GetIndex()]; - - ASSERT_MSG(buffer.enabled, "Attempted to upload disabled constbuffer"); - buffer_draw_state.enabled = true; - buffer_draw_state.bindpoint = current_bindpoint + bindpoint; - boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address); + if (!buffer.enabled) { + continue; + } size_t size = 0; @@ -686,25 +665,26 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr size = Common::AlignUp(size, sizeof(GLvec4)); ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); - std::vector<u8> data(size); - Memory::ReadBlock(*addr, data.data(), data.size()); + GLintptr const_buffer_offset; + std::tie(buffer_ptr, buffer_offset, const_buffer_offset) = + UploadMemory(buffer_ptr, buffer_offset, buffer.address, size, + static_cast<size_t>(uniform_buffer_alignment)); - glBindBuffer(GL_UNIFORM_BUFFER, buffer_draw_state.ssbo); - glBufferData(GL_UNIFORM_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW); - glBindBuffer(GL_UNIFORM_BUFFER, 0); + glBindBufferRange(GL_UNIFORM_BUFFER, current_bindpoint + bindpoint, + stream_buffer.GetHandle(), const_buffer_offset, size); // Now configure the bindpoint of the buffer inside the shader const std::string buffer_name = used_buffer.GetName(); const GLuint index = glGetProgramResourceIndex(program, GL_UNIFORM_BLOCK, buffer_name.c_str()); if (index != GL_INVALID_INDEX) { - glUniformBlockBinding(program, index, buffer_draw_state.bindpoint); + glUniformBlockBinding(program, index, current_bindpoint + bindpoint); } } state.Apply(); - return current_bindpoint + static_cast<u32>(entries.size()); + return {buffer_ptr, buffer_offset, current_bindpoint + static_cast<u32>(entries.size())}; } u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, u32 current_unit, @@ -804,9 +784,7 @@ void RasterizerOpenGL::SyncClipCoef() { void RasterizerOpenGL::SyncCullMode() { const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; - // TODO(bunnei): Enable the below once more things work - until then, this may hide regressions - // state.cull.enabled = regs.cull.enabled != 0; - state.cull.enabled = false; + state.cull.enabled = regs.cull.enabled != 0; if (state.cull.enabled) { state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 6d6d85cc1..74307f626 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -7,6 +7,7 @@ #include <array> #include <cstddef> #include <memory> +#include <tuple> #include <utility> #include <vector> #include <glad/glad.h> @@ -21,12 +22,15 @@ #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" -class EmuWindow; struct ScreenInfo; +namespace Core::Frontend { +class EmuWindow; +} + class RasterizerOpenGL : public VideoCore::RasterizerInterface { public: - explicit RasterizerOpenGL(EmuWindow& renderer); + explicit RasterizerOpenGL(Core::Frontend::EmuWindow& renderer); ~RasterizerOpenGL() override; void DrawArrays() override; @@ -97,9 +101,10 @@ private: * @param entries Vector describing the buffers that are actually used in the guest shader. * @returns The next available bindpoint for use in the next shader stage. */ - u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, GLuint program, - u32 current_bindpoint, - const std::vector<GLShader::ConstBufferEntry>& entries); + std::tuple<u8*, GLintptr, u32> SetupConstBuffers( + u8* buffer_ptr, GLintptr buffer_offset, Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, + GLuint program, u32 current_bindpoint, + const std::vector<GLShader::ConstBufferEntry>& entries); /* * Configures the current textures to use for the draw command. @@ -136,7 +141,6 @@ private: /// Syncs the blend state to match the guest state void SyncBlendState(); - bool has_ARB_buffer_storage = false; bool has_ARB_direct_state_access = false; bool has_ARB_separate_shader_objects = false; bool has_ARB_vertex_attrib_binding = false; @@ -145,29 +149,31 @@ private: RasterizerCacheOpenGL res_cache; - EmuWindow& emu_window; + Core::Frontend::EmuWindow& emu_window; std::unique_ptr<GLShader::ProgramManager> shader_program_manager; OGLVertexArray sw_vao; OGLVertexArray hw_vao; std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers; - std::array<std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers>, - Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> - ssbos; static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; - std::unique_ptr<OGLStreamBuffer> stream_buffer; + OGLStreamBuffer stream_buffer; OGLBuffer uniform_buffer; OGLFramebuffer framebuffer; + GLint uniform_buffer_alignment; size_t CalculateVertexArraysSize() const; std::pair<u8*, GLintptr> SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset); - std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers; + std::pair<u8*, GLintptr> SetupShaders(u8* buffer_ptr, GLintptr buffer_offset); + + std::pair<u8*, GLintptr> AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset, size_t alignment); - void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset); + std::tuple<u8*, GLintptr, GLintptr> UploadMemory(u8* buffer_ptr, GLintptr buffer_offset, + Tegra::GPUVAddr gpu_addr, size_t size, + size_t alignment = 4); enum class AccelDraw { Disabled, Arrays, Indexed }; AccelDraw accelerate_draw = AccelDraw::Disabled; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index c8f0c4e28..5d58ebd4f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -46,6 +46,8 @@ struct FormatTuple { params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); params.unaligned_height = config.tic.Height(); params.size_in_bytes = params.SizeInBytes(); + params.cache_width = Common::AlignUp(params.width, 16); + params.cache_height = Common::AlignUp(params.height, 16); return params; } @@ -63,6 +65,8 @@ struct FormatTuple { params.height = config.height; params.unaligned_height = config.height; params.size_in_bytes = params.SizeInBytes(); + params.cache_width = Common::AlignUp(params.width, 16); + params.cache_height = Common::AlignUp(params.height, 16); return params; } @@ -82,17 +86,23 @@ struct FormatTuple { params.height = zeta_height; params.unaligned_height = zeta_height; params.size_in_bytes = params.SizeInBytes(); + params.cache_width = Common::AlignUp(params.width, 16); + params.cache_height = Common::AlignUp(params.height, 16); return params; } static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8 + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U + {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm, false}, // A2B10G10R10 {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5 {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8 + {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RGBA16U + {GL_RGBA16UI, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float, false}, // R11FG11FB10F {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI @@ -103,7 +113,10 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXT45 {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1 - {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, + true}, // DXN2UNORM + {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM + {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // BC7U {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // G8R8 @@ -113,6 +126,9 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_R32F, GL_RED, GL_FLOAT, ComponentType::Float, false}, // R32F {GL_R16F, GL_RED, GL_HALF_FLOAT, ComponentType::Float, false}, // R16F {GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16UNORM + {GL_R16_SNORM, GL_RED, GL_SHORT, ComponentType::SNorm, false}, // R16S + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // R16UI + {GL_R16I, GL_RED_INTEGER, GL_SHORT, ComponentType::SInt, false}, // R16I {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RG16 {GL_RG16F, GL_RG, GL_HALF_FLOAT, ComponentType::Float, false}, // RG16F {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RG16UI @@ -120,6 +136,10 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S {GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // SRGBA8 + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U + {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S + {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI // DepthStencil formats {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, @@ -174,69 +194,121 @@ MathUtil::Rectangle<u32> SurfaceParams::GetRect() const { return {0, actual_height, width, 0}; } +/// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN +static bool IsFormatBCn(PixelFormat format) { + switch (format) { + case PixelFormat::DXT1: + case PixelFormat::DXT23: + case PixelFormat::DXT45: + case PixelFormat::DXN1: + case PixelFormat::DXN2SNORM: + case PixelFormat::DXN2UNORM: + case PixelFormat::BC7U: + return true; + } + return false; +} + template <bool morton_to_gl, PixelFormat format> -void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr addr) { +void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_buffer, + Tegra::GPUVAddr addr) { constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); const auto& gpu = Core::System::GetInstance().GPU(); if (morton_to_gl) { - if (SurfaceParams::GetFormatType(format) == SurfaceType::ColorTexture) { - auto data = Tegra::Texture::UnswizzleTexture( - *gpu.memory_manager->GpuToCpuAddress(addr), - SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height); - std::memcpy(gl_buffer, data.data(), data.size()); - } else { - auto data = Tegra::Texture::UnswizzleDepthTexture( - *gpu.memory_manager->GpuToCpuAddress(addr), - SurfaceParams::DepthFormatFromPixelFormat(format), stride, height, block_height); - std::memcpy(gl_buffer, data.data(), data.size()); - } + // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual + // pixel values. + const u32 tile_size{IsFormatBCn(format) ? 4U : 1U}; + const std::vector<u8> data = + Tegra::Texture::UnswizzleTexture(*gpu.memory_manager->GpuToCpuAddress(addr), tile_size, + bytes_per_pixel, stride, height, block_height); + const size_t size_to_copy{std::min(gl_buffer.size(), data.size())}; + gl_buffer.assign(data.begin(), data.begin() + size_to_copy); } else { // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should // check the configuration for this and perform more generic un/swizzle LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); VideoCore::MortonCopyPixels128( stride, height, bytes_per_pixel, gl_bytes_per_pixel, - Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(addr)), gl_buffer, + Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(addr)), gl_buffer.data(), morton_to_gl); } } -static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), +static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr), SurfaceParams::MaxPixelFormat> morton_to_gl_fns = { - MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, - MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>, - MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>, - MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::RGBA32UI>, - MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>, - MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>, - MortonCopy<true, PixelFormat::BC7U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>, - MortonCopy<true, PixelFormat::G8R8>, MortonCopy<true, PixelFormat::BGRA8>, - MortonCopy<true, PixelFormat::RGBA32F>, MortonCopy<true, PixelFormat::RG32F>, - MortonCopy<true, PixelFormat::R32F>, MortonCopy<true, PixelFormat::R16F>, - MortonCopy<true, PixelFormat::R16UNORM>, MortonCopy<true, PixelFormat::RG16>, - MortonCopy<true, PixelFormat::RG16F>, MortonCopy<true, PixelFormat::RG16UI>, - MortonCopy<true, PixelFormat::RG16I>, MortonCopy<true, PixelFormat::RG16S>, - MortonCopy<true, PixelFormat::RGB32F>, MortonCopy<true, PixelFormat::SRGBA8>, - MortonCopy<true, PixelFormat::Z24S8>, MortonCopy<true, PixelFormat::S8Z24>, - MortonCopy<true, PixelFormat::Z32F>, MortonCopy<true, PixelFormat::Z16>, + // clang-format off + MortonCopy<true, PixelFormat::ABGR8U>, + MortonCopy<true, PixelFormat::ABGR8S>, + MortonCopy<true, PixelFormat::B5G6R5>, + MortonCopy<true, PixelFormat::A2B10G10R10>, + MortonCopy<true, PixelFormat::A1B5G5R5>, + MortonCopy<true, PixelFormat::R8>, + MortonCopy<true, PixelFormat::R8UI>, + MortonCopy<true, PixelFormat::RGBA16F>, + MortonCopy<true, PixelFormat::RGBA16U>, + MortonCopy<true, PixelFormat::RGBA16UI>, + MortonCopy<true, PixelFormat::R11FG11FB10F>, + MortonCopy<true, PixelFormat::RGBA32UI>, + MortonCopy<true, PixelFormat::DXT1>, + MortonCopy<true, PixelFormat::DXT23>, + MortonCopy<true, PixelFormat::DXT45>, + MortonCopy<true, PixelFormat::DXN1>, + MortonCopy<true, PixelFormat::DXN2UNORM>, + MortonCopy<true, PixelFormat::DXN2SNORM>, + MortonCopy<true, PixelFormat::BC7U>, + MortonCopy<true, PixelFormat::ASTC_2D_4X4>, + MortonCopy<true, PixelFormat::G8R8>, + MortonCopy<true, PixelFormat::BGRA8>, + MortonCopy<true, PixelFormat::RGBA32F>, + MortonCopy<true, PixelFormat::RG32F>, + MortonCopy<true, PixelFormat::R32F>, + MortonCopy<true, PixelFormat::R16F>, + MortonCopy<true, PixelFormat::R16UNORM>, + MortonCopy<true, PixelFormat::R16S>, + MortonCopy<true, PixelFormat::R16UI>, + MortonCopy<true, PixelFormat::R16I>, + MortonCopy<true, PixelFormat::RG16>, + MortonCopy<true, PixelFormat::RG16F>, + MortonCopy<true, PixelFormat::RG16UI>, + MortonCopy<true, PixelFormat::RG16I>, + MortonCopy<true, PixelFormat::RG16S>, + MortonCopy<true, PixelFormat::RGB32F>, + MortonCopy<true, PixelFormat::SRGBA8>, + MortonCopy<true, PixelFormat::RG8U>, + MortonCopy<true, PixelFormat::RG8S>, + MortonCopy<true, PixelFormat::RG32UI>, + MortonCopy<true, PixelFormat::R32UI>, + MortonCopy<true, PixelFormat::Z24S8>, + MortonCopy<true, PixelFormat::S8Z24>, + MortonCopy<true, PixelFormat::Z32F>, + MortonCopy<true, PixelFormat::Z16>, MortonCopy<true, PixelFormat::Z32FS8>, + // clang-format on }; -static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), +static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr), SurfaceParams::MaxPixelFormat> gl_to_morton_fns = { - MortonCopy<false, PixelFormat::ABGR8>, + // clang-format off + MortonCopy<false, PixelFormat::ABGR8U>, + MortonCopy<false, PixelFormat::ABGR8S>, MortonCopy<false, PixelFormat::B5G6R5>, MortonCopy<false, PixelFormat::A2B10G10R10>, MortonCopy<false, PixelFormat::A1B5G5R5>, MortonCopy<false, PixelFormat::R8>, + MortonCopy<false, PixelFormat::R8UI>, MortonCopy<false, PixelFormat::RGBA16F>, + MortonCopy<false, PixelFormat::RGBA16U>, + MortonCopy<false, PixelFormat::RGBA16UI>, MortonCopy<false, PixelFormat::R11FG11FB10F>, MortonCopy<false, PixelFormat::RGBA32UI>, - // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/BC7U/ASTC_2D_4X4 formats is not supported + // TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/DXN2/BC7U/ASTC_2D_4X4 formats is not + // supported + nullptr, + nullptr, nullptr, nullptr, nullptr, @@ -250,6 +322,9 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), MortonCopy<false, PixelFormat::R32F>, MortonCopy<false, PixelFormat::R16F>, MortonCopy<false, PixelFormat::R16UNORM>, + MortonCopy<false, PixelFormat::R16S>, + MortonCopy<false, PixelFormat::R16UI>, + MortonCopy<false, PixelFormat::R16I>, MortonCopy<false, PixelFormat::RG16>, MortonCopy<false, PixelFormat::RG16F>, MortonCopy<false, PixelFormat::RG16UI>, @@ -257,11 +332,16 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr), MortonCopy<false, PixelFormat::RG16S>, MortonCopy<false, PixelFormat::RGB32F>, MortonCopy<false, PixelFormat::SRGBA8>, + MortonCopy<false, PixelFormat::RG8U>, + MortonCopy<false, PixelFormat::RG8S>, + MortonCopy<false, PixelFormat::RG32UI>, + MortonCopy<false, PixelFormat::R32UI>, MortonCopy<false, PixelFormat::Z24S8>, MortonCopy<false, PixelFormat::S8Z24>, MortonCopy<false, PixelFormat::Z32F>, MortonCopy<false, PixelFormat::Z16>, MortonCopy<false, PixelFormat::Z32FS8>, + // clang-format on }; // Allocate an uninitialized texture of appropriate size and format for the surface @@ -441,22 +521,24 @@ MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64 void CachedSurface::LoadGLBuffer() { ASSERT(params.type != SurfaceType::Fill); - u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr()); + const u8* const texture_src_data = Memory::GetPointer(params.GetCpuAddr()); ASSERT(texture_src_data); - gl_buffer.resize(params.width * params.height * GetGLBytesPerPixel(params.pixel_format)); + const u32 bytes_per_pixel = GetGLBytesPerPixel(params.pixel_format); + const u32 copy_size = params.width * params.height * bytes_per_pixel; MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); - if (!params.is_tiled) { - const u32 bytes_per_pixel{params.GetFormatBpp() >> 3}; + if (params.is_tiled) { + gl_buffer.resize(copy_size); - std::memcpy(gl_buffer.data(), texture_src_data, - bytes_per_pixel * params.width * params.height); - } else { morton_to_gl_fns[static_cast<size_t>(params.pixel_format)]( - params.width, params.block_height, params.height, gl_buffer.data(), params.addr); + params.width, params.block_height, params.height, gl_buffer, params.addr); + } else { + const u8* const texture_src_data_end = texture_src_data + copy_size; + + gl_buffer.assign(texture_src_data, texture_src_data_end); } ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer, params.pixel_format, params.width, params.height); @@ -479,7 +561,7 @@ void CachedSurface::FlushGLBuffer() { std::memcpy(dst_buffer, gl_buffer.data(), params.size_in_bytes); } else { gl_to_morton_fns[static_cast<size_t>(params.pixel_format)]( - params.width, params.block_height, params.height, gl_buffer.data(), params.addr); + params.width, params.block_height, params.height, gl_buffer, params.addr); } } @@ -594,8 +676,8 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu return GetSurface(SurfaceParams::CreateForTexture(config)); } -SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( - bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport) { +SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool using_color_fb, + bool using_depth_fb) { const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; // TODO(bunnei): This is hard corded to use just the first render buffer @@ -680,12 +762,12 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params) { // If use_accurate_framebuffers is enabled, always load from memory FlushSurface(surface); UnregisterSurface(surface); - } else if (surface->GetSurfaceParams() != params) { - // If surface parameters changed, recreate the surface from the old one - return RecreateSurface(surface, params); - } else { + } else if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { // Use the cached surface as-is return surface; + } else { + // If surface parameters changed, recreate the surface from the old one + return RecreateSurface(surface, params); } } @@ -751,10 +833,12 @@ void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr /*addr*/, size_t /*size* } void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, size_t size) { - for (const auto& pair : surface_cache) { - const auto& surface{pair.second}; + for (auto iter = surface_cache.cbegin(); iter != surface_cache.cend();) { + const auto& surface{iter->second}; const auto& params{surface->GetSurfaceParams()}; + ++iter; + if (params.IsOverlappingRegion(addr, size)) { UnregisterSurface(surface); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 4e1e18d9c..36a41522b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -9,6 +9,7 @@ #include <memory> #include <vector> #include <boost/icl/interval_map.hpp> + #include "common/common_types.h" #include "common/math_util.h" #include "video_core/engines/maxwell_3d.h" @@ -22,43 +23,56 @@ using PageMap = boost::icl::interval_map<u64, int>; struct SurfaceParams { enum class PixelFormat { - ABGR8 = 0, - B5G6R5 = 1, - A2B10G10R10 = 2, - A1B5G5R5 = 3, - R8 = 4, - RGBA16F = 5, - R11FG11FB10F = 6, - RGBA32UI = 7, - DXT1 = 8, - DXT23 = 9, - DXT45 = 10, - DXN1 = 11, // This is also known as BC4 - BC7U = 12, - ASTC_2D_4X4 = 13, - G8R8 = 14, - BGRA8 = 15, - RGBA32F = 16, - RG32F = 17, - R32F = 18, - R16F = 19, - R16UNORM = 20, - RG16 = 21, - RG16F = 22, - RG16UI = 23, - RG16I = 24, - RG16S = 25, - RGB32F = 26, - SRGBA8 = 27, + ABGR8U = 0, + ABGR8S = 1, + B5G6R5 = 2, + A2B10G10R10 = 3, + A1B5G5R5 = 4, + R8 = 5, + R8UI = 6, + RGBA16F = 7, + RGBA16U = 8, + RGBA16UI = 9, + R11FG11FB10F = 10, + RGBA32UI = 11, + DXT1 = 12, + DXT23 = 13, + DXT45 = 14, + DXN1 = 15, // This is also known as BC4 + DXN2UNORM = 16, + DXN2SNORM = 17, + BC7U = 18, + ASTC_2D_4X4 = 19, + G8R8 = 20, + BGRA8 = 21, + RGBA32F = 22, + RG32F = 23, + R32F = 24, + R16F = 25, + R16UNORM = 26, + R16S = 27, + R16UI = 28, + R16I = 29, + RG16 = 30, + RG16F = 31, + RG16UI = 32, + RG16I = 33, + RG16S = 34, + RGB32F = 35, + SRGBA8 = 36, + RG8U = 37, + RG8S = 38, + RG32UI = 39, + R32UI = 40, MaxColorFormat, // DepthStencil formats - Z24S8 = 28, - S8Z24 = 29, - Z32F = 30, - Z16 = 31, - Z32FS8 = 32, + Z24S8 = 41, + S8Z24 = 42, + Z32F = 43, + Z16 = 44, + Z32FS8 = 45, MaxDepthStencilFormat, @@ -96,18 +110,24 @@ struct SurfaceParams { return 0; constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{ - 1, // ABGR8 + 1, // ABGR8U + 1, // ABGR8S 1, // B5G6R5 1, // A2B10G10R10 1, // A1B5G5R5 1, // R8 + 1, // R8UI 1, // RGBA16F + 1, // RGBA16U + 1, // RGBA16UI 1, // R11FG11FB10F 1, // RGBA32UI 4, // DXT1 4, // DXT23 4, // DXT45 4, // DXN1 + 4, // DXN2UNORM + 4, // DXN2SNORM 4, // BC7U 4, // ASTC_2D_4X4 1, // G8R8 @@ -117,6 +137,9 @@ struct SurfaceParams { 1, // R32F 1, // R16F 1, // R16UNORM + 1, // R16S + 1, // R16UI + 1, // R16I 1, // RG16 1, // RG16F 1, // RG16UI @@ -124,6 +147,10 @@ struct SurfaceParams { 1, // RG16S 1, // RGB32F 1, // SRGBA8 + 1, // RG8U + 1, // RG8S + 1, // RG32UI + 1, // R32UI 1, // Z24S8 1, // S8Z24 1, // Z32F @@ -140,18 +167,24 @@ struct SurfaceParams { return 0; constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ - 32, // ABGR8 + 32, // ABGR8U + 32, // ABGR8S 16, // B5G6R5 32, // A2B10G10R10 16, // A1B5G5R5 8, // R8 + 8, // R8UI 64, // RGBA16F + 64, // RGBA16U + 64, // RGBA16UI 32, // R11FG11FB10F 128, // RGBA32UI 64, // DXT1 128, // DXT23 128, // DXT45 64, // DXN1 + 128, // DXN2UNORM + 128, // DXN2SNORM 128, // BC7U 32, // ASTC_2D_4X4 16, // G8R8 @@ -161,6 +194,9 @@ struct SurfaceParams { 32, // R32F 16, // R16F 16, // R16UNORM + 16, // R16S + 16, // R16UI + 16, // R16I 32, // RG16 32, // RG16F 32, // RG16UI @@ -168,6 +204,10 @@ struct SurfaceParams { 32, // RG16S 96, // RGB32F 32, // SRGBA8 + 16, // RG8U + 16, // RG8S + 64, // RG32UI + 32, // R32UI 32, // Z24S8 32, // S8Z24 32, // Z32F @@ -203,26 +243,37 @@ struct SurfaceParams { static PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { switch (format) { + // TODO (Hexagon12): Converting SRGBA to RGBA is a hack and doesn't completely correct the + // gamma. case Tegra::RenderTargetFormat::RGBA8_SRGB: - return PixelFormat::SRGBA8; case Tegra::RenderTargetFormat::RGBA8_UNORM: - return PixelFormat::ABGR8; + return PixelFormat::ABGR8U; + case Tegra::RenderTargetFormat::RGBA8_SNORM: + return PixelFormat::ABGR8S; case Tegra::RenderTargetFormat::BGRA8_UNORM: return PixelFormat::BGRA8; case Tegra::RenderTargetFormat::RGB10_A2_UNORM: return PixelFormat::A2B10G10R10; case Tegra::RenderTargetFormat::RGBA16_FLOAT: return PixelFormat::RGBA16F; + case Tegra::RenderTargetFormat::RGBA16_UNORM: + return PixelFormat::RGBA16U; + case Tegra::RenderTargetFormat::RGBA16_UINT: + return PixelFormat::RGBA16UI; case Tegra::RenderTargetFormat::RGBA32_FLOAT: return PixelFormat::RGBA32F; case Tegra::RenderTargetFormat::RG32_FLOAT: return PixelFormat::RG32F; case Tegra::RenderTargetFormat::R11G11B10_FLOAT: return PixelFormat::R11FG11FB10F; + case Tegra::RenderTargetFormat::B5G6R5_UNORM: + return PixelFormat::B5G6R5; case Tegra::RenderTargetFormat::RGBA32_UINT: return PixelFormat::RGBA32UI; case Tegra::RenderTargetFormat::R8_UNORM: return PixelFormat::R8; + case Tegra::RenderTargetFormat::R8_UINT: + return PixelFormat::R8UI; case Tegra::RenderTargetFormat::RG16_FLOAT: return PixelFormat::RG16F; case Tegra::RenderTargetFormat::RG16_UINT: @@ -233,10 +284,26 @@ struct SurfaceParams { return PixelFormat::RG16; case Tegra::RenderTargetFormat::RG16_SNORM: return PixelFormat::RG16S; + case Tegra::RenderTargetFormat::RG8_UNORM: + return PixelFormat::RG8U; + case Tegra::RenderTargetFormat::RG8_SNORM: + return PixelFormat::RG8S; case Tegra::RenderTargetFormat::R16_FLOAT: return PixelFormat::R16F; + case Tegra::RenderTargetFormat::R16_UNORM: + return PixelFormat::R16UNORM; + case Tegra::RenderTargetFormat::R16_SNORM: + return PixelFormat::R16S; + case Tegra::RenderTargetFormat::R16_UINT: + return PixelFormat::R16UI; + case Tegra::RenderTargetFormat::R16_SINT: + return PixelFormat::R16I; case Tegra::RenderTargetFormat::R32_FLOAT: return PixelFormat::R32F; + case Tegra::RenderTargetFormat::R32_UINT: + return PixelFormat::R32UI; + case Tegra::RenderTargetFormat::RG32_UINT: + return PixelFormat::RG32UI; default: LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); @@ -248,7 +315,15 @@ struct SurfaceParams { // TODO(Subv): Properly implement this switch (format) { case Tegra::Texture::TextureFormat::A8R8G8B8: - return PixelFormat::ABGR8; + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::ABGR8U; + case Tegra::Texture::ComponentType::SNORM: + return PixelFormat::ABGR8S; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::B5G6R5: return PixelFormat::B5G6R5; case Tegra::Texture::TextureFormat::A2B10G10R10: @@ -256,7 +331,15 @@ struct SurfaceParams { case Tegra::Texture::TextureFormat::A1B5G5R5: return PixelFormat::A1B5G5R5; case Tegra::Texture::TextureFormat::R8: - return PixelFormat::R8; + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::R8; + case Tegra::Texture::ComponentType::UINT: + return PixelFormat::R8UI; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::G8R8: return PixelFormat::G8R8; case Tegra::Texture::TextureFormat::R16_G16_B16_A16: @@ -274,7 +357,15 @@ struct SurfaceParams { static_cast<u32>(component_type)); UNREACHABLE(); case Tegra::Texture::TextureFormat::R32_G32: - return PixelFormat::RG32F; + switch (component_type) { + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::RG32F; + case Tegra::Texture::ComponentType::UINT: + return PixelFormat::RG32UI; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::R32_G32_B32: return PixelFormat::RGB32F; case Tegra::Texture::TextureFormat::R16: @@ -283,12 +374,26 @@ struct SurfaceParams { return PixelFormat::R16F; case Tegra::Texture::ComponentType::UNORM: return PixelFormat::R16UNORM; + case Tegra::Texture::ComponentType::SNORM: + return PixelFormat::R16S; + case Tegra::Texture::ComponentType::UINT: + return PixelFormat::R16UI; + case Tegra::Texture::ComponentType::SINT: + return PixelFormat::R16I; } LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", static_cast<u32>(component_type)); UNREACHABLE(); case Tegra::Texture::TextureFormat::R32: - return PixelFormat::R32F; + switch (component_type) { + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::R32F; + case Tegra::Texture::ComponentType::UINT: + return PixelFormat::R32UI; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::ZF32: return PixelFormat::Z32F; case Tegra::Texture::TextureFormat::Z24S8: @@ -301,6 +406,16 @@ struct SurfaceParams { return PixelFormat::DXT45; case Tegra::Texture::TextureFormat::DXN1: return PixelFormat::DXN1; + case Tegra::Texture::TextureFormat::DXN2: + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::DXN2UNORM; + case Tegra::Texture::ComponentType::SNORM: + return PixelFormat::DXN2SNORM; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::BC7U: return PixelFormat::BC7U; case Tegra::Texture::TextureFormat::ASTC_2D_4X4: @@ -328,89 +443,6 @@ struct SurfaceParams { } } - static Tegra::Texture::TextureFormat TextureFormatFromPixelFormat(PixelFormat format) { - // TODO(Subv): Properly implement this - switch (format) { - case PixelFormat::ABGR8: - case PixelFormat::SRGBA8: - return Tegra::Texture::TextureFormat::A8R8G8B8; - case PixelFormat::B5G6R5: - return Tegra::Texture::TextureFormat::B5G6R5; - case PixelFormat::A2B10G10R10: - return Tegra::Texture::TextureFormat::A2B10G10R10; - case PixelFormat::A1B5G5R5: - return Tegra::Texture::TextureFormat::A1B5G5R5; - case PixelFormat::R8: - return Tegra::Texture::TextureFormat::R8; - case PixelFormat::G8R8: - return Tegra::Texture::TextureFormat::G8R8; - case PixelFormat::RGBA16F: - return Tegra::Texture::TextureFormat::R16_G16_B16_A16; - case PixelFormat::R11FG11FB10F: - return Tegra::Texture::TextureFormat::BF10GF11RF11; - case PixelFormat::RGBA32UI: - return Tegra::Texture::TextureFormat::R32_G32_B32_A32; - case PixelFormat::DXT1: - return Tegra::Texture::TextureFormat::DXT1; - case PixelFormat::DXT23: - return Tegra::Texture::TextureFormat::DXT23; - case PixelFormat::DXT45: - return Tegra::Texture::TextureFormat::DXT45; - case PixelFormat::DXN1: - return Tegra::Texture::TextureFormat::DXN1; - case PixelFormat::BC7U: - return Tegra::Texture::TextureFormat::BC7U; - case PixelFormat::ASTC_2D_4X4: - return Tegra::Texture::TextureFormat::ASTC_2D_4X4; - case PixelFormat::BGRA8: - // TODO(bunnei): This is fine for unswizzling (since we just need the right component - // sizes), but could be a bug if we used this function in different ways. - return Tegra::Texture::TextureFormat::A8R8G8B8; - case PixelFormat::RGBA32F: - return Tegra::Texture::TextureFormat::R32_G32_B32_A32; - case PixelFormat::RGB32F: - return Tegra::Texture::TextureFormat::R32_G32_B32; - case PixelFormat::RG32F: - return Tegra::Texture::TextureFormat::R32_G32; - case PixelFormat::R32F: - return Tegra::Texture::TextureFormat::R32; - case PixelFormat::R16F: - case PixelFormat::R16UNORM: - return Tegra::Texture::TextureFormat::R16; - case PixelFormat::Z32F: - return Tegra::Texture::TextureFormat::ZF32; - case PixelFormat::Z24S8: - return Tegra::Texture::TextureFormat::Z24S8; - case PixelFormat::RG16F: - case PixelFormat::RG16: - case PixelFormat::RG16UI: - case PixelFormat::RG16I: - case PixelFormat::RG16S: - return Tegra::Texture::TextureFormat::R16_G16; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); - UNREACHABLE(); - } - } - - static Tegra::DepthFormat DepthFormatFromPixelFormat(PixelFormat format) { - switch (format) { - case PixelFormat::S8Z24: - return Tegra::DepthFormat::S8_Z24_UNORM; - case PixelFormat::Z24S8: - return Tegra::DepthFormat::Z24_S8_UNORM; - case PixelFormat::Z32F: - return Tegra::DepthFormat::Z32_FLOAT; - case PixelFormat::Z16: - return Tegra::DepthFormat::Z16_UNORM; - case PixelFormat::Z32FS8: - return Tegra::DepthFormat::Z32_S8_X24_FLOAT; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); - UNREACHABLE(); - } - } - static ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) { // TODO(Subv): Implement more component types switch (type) { @@ -439,8 +471,15 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::RGB10_A2_UNORM: case Tegra::RenderTargetFormat::R8_UNORM: case Tegra::RenderTargetFormat::RG16_UNORM: + case Tegra::RenderTargetFormat::R16_UNORM: + case Tegra::RenderTargetFormat::B5G6R5_UNORM: + case Tegra::RenderTargetFormat::RG8_UNORM: + case Tegra::RenderTargetFormat::RGBA16_UNORM: return ComponentType::UNorm; + case Tegra::RenderTargetFormat::RGBA8_SNORM: case Tegra::RenderTargetFormat::RG16_SNORM: + case Tegra::RenderTargetFormat::R16_SNORM: + case Tegra::RenderTargetFormat::RG8_SNORM: return ComponentType::SNorm; case Tegra::RenderTargetFormat::RGBA16_FLOAT: case Tegra::RenderTargetFormat::R11G11B10_FLOAT: @@ -451,9 +490,15 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::R32_FLOAT: return ComponentType::Float; case Tegra::RenderTargetFormat::RGBA32_UINT: + case Tegra::RenderTargetFormat::RGBA16_UINT: case Tegra::RenderTargetFormat::RG16_UINT: + case Tegra::RenderTargetFormat::R8_UINT: + case Tegra::RenderTargetFormat::R16_UINT: + case Tegra::RenderTargetFormat::RG32_UINT: + case Tegra::RenderTargetFormat::R32_UINT: return ComponentType::UInt; case Tegra::RenderTargetFormat::RG16_SINT: + case Tegra::RenderTargetFormat::R16_SINT: return ComponentType::SInt; default: LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); @@ -464,7 +509,7 @@ struct SurfaceParams { static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { switch (format) { case Tegra::FramebufferConfig::PixelFormat::ABGR8: - return PixelFormat::ABGR8; + return PixelFormat::ABGR8U; default: LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); @@ -546,6 +591,12 @@ struct SurfaceParams { return !operator==(other); } + /// Checks if surfaces are compatible for caching + bool IsCompatibleSurface(const SurfaceParams& other) const { + return std::tie(pixel_format, type, cache_width, cache_height) == + std::tie(other.pixel_format, other.type, other.cache_width, other.cache_height); + } + Tegra::GPUVAddr addr; bool is_tiled; u32 block_height; @@ -556,6 +607,10 @@ struct SurfaceParams { u32 height; u32 unaligned_height; size_t size_in_bytes; + + // Parameters used for caching only + u32 cache_width; + u32 cache_height; }; class CachedSurface final { @@ -600,8 +655,7 @@ public: Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config); /// Get the color and depth surfaces based on the framebuffer configuration - SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, - const MathUtil::Rectangle<s32>& viewport); + SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb); /// Flushes the surface to Switch memory void FlushSurface(const Surface& surface); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index e3217db81..6834d7085 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -141,6 +141,15 @@ private: ExitMethod jmp = Scan(target, end, labels); return exit_method = ParallelExit(no_jmp, jmp); } + case OpCode::Id::SSY: { + // The SSY instruction uses a similar encoding as the BRA instruction. + ASSERT_MSG(instr.bra.constant_buffer == 0, + "Constant buffer SSY is not supported"); + u32 target = offset + instr.bra.GetBranchTarget(); + labels.insert(target); + // Continue scanning for an exit method. + break; + } } } } @@ -347,9 +356,14 @@ public: * @param reg The register to use as the source value. */ void SetOutputAttributeToRegister(Attribute::Index attribute, u64 elem, const Register& reg) { - std::string dest = GetOutputAttribute(attribute) + GetSwizzle(elem); + std::string dest = GetOutputAttribute(attribute); std::string src = GetRegisterAsFloat(reg); - shader.AddLine(dest + " = " + src + ';'); + + if (!dest.empty()) { + // Can happen with unknown/unimplemented output attributes, in which case we ignore the + // instruction for now. + shader.AddLine(dest + GetSwizzle(elem) + " = " + src + ';'); + } } /// Generates code representing a uniform (C buffer) register, interpreted as the input type. @@ -362,6 +376,8 @@ public: return value; } else if (type == GLSLRegister::Type::Integer) { return "floatBitsToInt(" + value + ')'; + } else if (type == GLSLRegister::Type::UnsignedInteger) { + return "floatBitsToUint(" + value + ')'; } else { UNREACHABLE(); } @@ -507,6 +523,8 @@ private: /// Build the GLSL register list. void BuildRegisterList() { + regs.reserve(Register::NumRegisters); + for (size_t index = 0; index < Register::NumRegisters; ++index) { regs.emplace_back(index, suffix); } @@ -526,14 +544,17 @@ private: default: const u32 index{static_cast<u32>(attribute) - static_cast<u32>(Attribute::Index::Attribute_0)}; - if (attribute >= Attribute::Index::Attribute_0) { + if (attribute >= Attribute::Index::Attribute_0 && + attribute <= Attribute::Index::Attribute_31) { declr_input_attribute.insert(attribute); return "input_attribute_" + std::to_string(index); } - LOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index); + LOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", static_cast<u32>(attribute)); UNREACHABLE(); } + + return "vec4(0, 0, 0, 0)"; } /// Generates code representing an output attribute register. @@ -551,6 +572,7 @@ private: LOG_CRITICAL(HW_GPU, "Unhandled output attribute: {}", index); UNREACHABLE(); + return {}; } } @@ -602,12 +624,12 @@ private: /// Generates code representing a 19-bit immediate value static std::string GetImmediate19(const Instruction& instr) { - return std::to_string(instr.alu.GetImm20_19()); + return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_19()); } /// Generates code representing a 32-bit immediate value static std::string GetImmediate32(const Instruction& instr) { - return std::to_string(instr.alu.GetImm20_32()); + return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_32()); } /// Generates code representing a texture sampler. @@ -650,16 +672,17 @@ private: * @param instr Instruction to generate the if condition for. * @returns string containing the predicate condition. */ - std::string GetPredicateCondition(u64 index, bool negate) const { + std::string GetPredicateCondition(u64 index, bool negate) { using Tegra::Shader::Pred; std::string variable; // Index 7 is used as an 'Always True' condition. - if (index == static_cast<u64>(Pred::UnusedIndex)) + if (index == static_cast<u64>(Pred::UnusedIndex)) { variable = "true"; - else + } else { variable = 'p' + std::to_string(index) + '_' + suffix; - + declr_predicates.insert(variable); + } if (negate) { return "!(" + variable + ')'; } @@ -818,7 +841,11 @@ private: ASSERT_MSG(instr.pred.full_pred != Pred::NeverExecute, "NeverExecute predicate not implemented"); - if (instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { + // Some instructions (like SSY) don't have a predicate field, they are always + // unconditionally executed. + bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->GetId()); + + if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { shader.AddLine("if (" + GetPredicateCondition(instr.pred.pred_index, instr.negate_pred != 0) + ')'); @@ -1605,6 +1632,99 @@ private: } break; } + case OpCode::Type::Xmad: { + ASSERT_MSG(!instr.xmad.sign_a, "Unimplemented"); + ASSERT_MSG(!instr.xmad.sign_b, "Unimplemented"); + + std::string op_a{regs.GetRegisterAsInteger(instr.gpr8, 0, instr.xmad.sign_a)}; + std::string op_b; + std::string op_c; + + // TODO(bunnei): Needs to be fixed once op_a or op_b is signed + ASSERT_MSG(instr.xmad.sign_a == instr.xmad.sign_b, "Unimplemented"); + const bool is_signed{instr.xmad.sign_a == 1}; + + bool is_merge{}; + switch (opcode->GetId()) { + case OpCode::Id::XMAD_CR: { + is_merge = instr.xmad.merge_56; + op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + instr.xmad.sign_b ? GLSLRegister::Type::Integer + : GLSLRegister::Type::UnsignedInteger); + op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed); + break; + } + case OpCode::Id::XMAD_RR: { + is_merge = instr.xmad.merge_37; + op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.xmad.sign_b); + op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed); + break; + } + case OpCode::Id::XMAD_RC: { + op_b += regs.GetRegisterAsInteger(instr.gpr39, 0, instr.xmad.sign_b); + op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + is_signed ? GLSLRegister::Type::Integer + : GLSLRegister::Type::UnsignedInteger); + break; + } + case OpCode::Id::XMAD_IMM: { + is_merge = instr.xmad.merge_37; + op_b += std::to_string(instr.xmad.imm20_16); + op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed); + break; + } + default: { + LOG_CRITICAL(HW_GPU, "Unhandled XMAD instruction: {}", opcode->GetName()); + UNREACHABLE(); + } + } + + // TODO(bunnei): Ensure this is right with signed operands + if (instr.xmad.high_a) { + op_a = "((" + op_a + ") >> 16)"; + } else { + op_a = "((" + op_a + ") & 0xFFFF)"; + } + + std::string src2 = '(' + op_b + ')'; // Preserve original source 2 + if (instr.xmad.high_b) { + op_b = '(' + src2 + " >> 16)"; + } else { + op_b = '(' + src2 + " & 0xFFFF)"; + } + + std::string product = '(' + op_a + " * " + op_b + ')'; + if (instr.xmad.product_shift_left) { + product = '(' + product + " << 16)"; + } + + switch (instr.xmad.mode) { + case Tegra::Shader::XmadMode::None: + break; + case Tegra::Shader::XmadMode::CLo: + op_c = "((" + op_c + ") & 0xFFFF)"; + break; + case Tegra::Shader::XmadMode::CHi: + op_c = "((" + op_c + ") >> 16)"; + break; + case Tegra::Shader::XmadMode::CBcc: + op_c = "((" + op_c + ") + (" + src2 + "<< 16))"; + break; + default: { + LOG_CRITICAL(HW_GPU, "Unhandled XMAD mode: {}", + static_cast<u32>(instr.xmad.mode.Value())); + UNREACHABLE(); + } + } + + std::string sum{'(' + product + " + " + op_c + ')'}; + if (is_merge) { + sum = "((" + sum + " & 0xFFFF) | (" + src2 + "<< 16))"; + } + + regs.SetRegisterToInteger(instr.gpr0, is_signed, 0, sum, 1, 1); + break; + } default: { switch (opcode->GetId()) { case OpCode::Id::EXIT: { @@ -1642,7 +1762,15 @@ private: } case OpCode::Id::KIL: { ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always); + + // Enclose "discard" in a conditional, so that GLSL compilation does not complain + // about unexecuted instructions that may follow this. + shader.AddLine("if (true) {"); + ++shader.scope; shader.AddLine("discard;"); + --shader.scope; + shader.AddLine("}"); + break; } case OpCode::Id::BRA: { @@ -1658,16 +1786,25 @@ private: break; } case OpCode::Id::SSY: { - // The SSY opcode tells the GPU where to re-converge divergent execution paths, we - // can ignore this when generating GLSL code. + // The SSY opcode tells the GPU where to re-converge divergent execution paths, it + // sets the target of the jump that the SYNC instruction will make. The SSY opcode + // has a similar structure to the BRA opcode. + ASSERT_MSG(instr.bra.constant_buffer == 0, "Constant buffer SSY is not supported"); + + u32 target = offset + instr.bra.GetBranchTarget(); + shader.AddLine("ssy_target = " + std::to_string(target) + "u;"); break; } - case OpCode::Id::SYNC: + case OpCode::Id::SYNC: { + // The SYNC opcode jumps to the address previously set by the SSY opcode ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always); + shader.AddLine("{ jmp_to = ssy_target; break; }"); + break; + } case OpCode::Id::DEPBAR: { - // TODO(Subv): Find out if we actually have to care about these instructions or if + // TODO(Subv): Find out if we actually have to care about this instruction or if // the GLSL compiler takes care of that for us. - LOG_WARNING(HW_GPU, "DEPBAR/SYNC instruction is stubbed"); + LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed"); break; } default: { @@ -1681,7 +1818,7 @@ private: } // Close the predicate condition scope. - if (instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { + if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { --shader.scope; shader.AddLine('}'); } @@ -1732,6 +1869,7 @@ private: } else { labels.insert(subroutine.begin); shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;"); + shader.AddLine("uint ssy_target = 0u;"); shader.AddLine("while (true) {"); ++shader.scope; @@ -1747,7 +1885,7 @@ private: u32 compile_end = CompileRange(label, next_label); if (compile_end > next_label && compile_end != PROGRAM_END) { // This happens only when there is a label inside a IF/LOOP block - shader.AddLine("{ jmp_to = " + std::to_string(compile_end) + "u; break; }"); + shader.AddLine(" jmp_to = " + std::to_string(compile_end) + "u; break; }"); labels.emplace(compile_end); } diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 68bacd4c5..1d1975179 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -203,21 +203,6 @@ void OpenGLState::Apply() const { } } - // Constbuffers - for (std::size_t stage = 0; stage < draw.const_buffers.size(); ++stage) { - for (std::size_t buffer_id = 0; buffer_id < draw.const_buffers[stage].size(); ++buffer_id) { - const auto& current = cur_state.draw.const_buffers[stage][buffer_id]; - const auto& new_state = draw.const_buffers[stage][buffer_id]; - - if (current.enabled != new_state.enabled || current.bindpoint != new_state.bindpoint || - current.ssbo != new_state.ssbo) { - if (new_state.enabled) { - glBindBufferBase(GL_UNIFORM_BUFFER, new_state.bindpoint, new_state.ssbo); - } - } - } - } - // Framebuffer if (draw.read_framebuffer != cur_state.draw.read_framebuffer) { glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 24b1d956b..bdb02ba25 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -7,6 +7,10 @@ #include <array> #include <glad/glad.h> +#include "video_core/engines/maxwell_3d.h" + +using Regs = Tegra::Engines::Maxwell3D::Regs; + namespace TextureUnits { struct TextureUnit { @@ -115,12 +119,6 @@ public: GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING GLuint shader_program; // GL_CURRENT_PROGRAM GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING - struct ConstBufferConfig { - bool enabled = false; - GLuint bindpoint; - GLuint ssbo; - }; - std::array<std::array<ConstBufferConfig, 16>, 5> const_buffers{}; } draw; struct { diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index a2713e9f0..03a8ed8b7 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -9,174 +9,91 @@ #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" -class OrphanBuffer : public OGLStreamBuffer { -public: - explicit OrphanBuffer(GLenum target) : OGLStreamBuffer(target) {} - ~OrphanBuffer() override; - -private: - void Create(size_t size, size_t sync_subdivide) override; - void Release() override; - - std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override; - void Unmap() override; - - std::vector<u8> data; -}; - -class StorageBuffer : public OGLStreamBuffer { -public: - explicit StorageBuffer(GLenum target) : OGLStreamBuffer(target) {} - ~StorageBuffer() override; - -private: - void Create(size_t size, size_t sync_subdivide) override; - void Release() override; - - std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override; - void Unmap() override; - - struct Fence { - OGLSync sync; - size_t offset; - }; - std::deque<Fence> head; - std::deque<Fence> tail; - - u8* mapped_ptr; -}; - -OGLStreamBuffer::OGLStreamBuffer(GLenum target) { - gl_target = target; -} - -GLuint OGLStreamBuffer::GetHandle() const { - return gl_buffer.handle; -} +OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent) + : gl_target(target), buffer_size(size) { + gl_buffer.Create(); + glBindBuffer(gl_target, gl_buffer.handle); -std::unique_ptr<OGLStreamBuffer> OGLStreamBuffer::MakeBuffer(bool storage_buffer, GLenum target) { - if (storage_buffer) { - return std::make_unique<StorageBuffer>(target); + GLsizeiptr allocate_size = size; + if (target == GL_ARRAY_BUFFER) { + // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer + // read position is near the end and is an out-of-bound access to the vertex buffer. This is + // probably a bug in the driver and is related to the usage of vec3<byte> attributes in the + // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the + // crash. + allocate_size *= 2; } - return std::make_unique<OrphanBuffer>(target); -} -OrphanBuffer::~OrphanBuffer() { - Release(); + if (GLAD_GL_ARB_buffer_storage) { + persistent = true; + coherent = prefer_coherent; + GLbitfield flags = + GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); + glBufferStorage(gl_target, allocate_size, nullptr, flags); + mapped_ptr = static_cast<u8*>(glMapBufferRange( + gl_target, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT))); + } else { + glBufferData(gl_target, allocate_size, nullptr, GL_STREAM_DRAW); + } } -void OrphanBuffer::Create(size_t size, size_t /*sync_subdivide*/) { - buffer_pos = 0; - buffer_size = size; - data.resize(buffer_size); - - if (gl_buffer.handle == 0) { - gl_buffer.Create(); +OGLStreamBuffer::~OGLStreamBuffer() { + if (persistent) { glBindBuffer(gl_target, gl_buffer.handle); + glUnmapBuffer(gl_target); } - - glBufferData(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr, GL_STREAM_DRAW); -} - -void OrphanBuffer::Release() { gl_buffer.Release(); } -std::pair<u8*, GLintptr> OrphanBuffer::Map(size_t size, size_t alignment) { - buffer_pos = Common::AlignUp(buffer_pos, alignment); - - if (buffer_pos + size > buffer_size) { - Create(std::max(buffer_size, size), 0); - } - - mapped_size = size; - return std::make_pair(&data[buffer_pos], static_cast<GLintptr>(buffer_pos)); -} - -void OrphanBuffer::Unmap() { - glBufferSubData(gl_target, static_cast<GLintptr>(buffer_pos), - static_cast<GLsizeiptr>(mapped_size), &data[buffer_pos]); - buffer_pos += mapped_size; -} - -StorageBuffer::~StorageBuffer() { - Release(); +GLuint OGLStreamBuffer::GetHandle() const { + return gl_buffer.handle; } -void StorageBuffer::Create(size_t size, size_t sync_subdivide) { - if (gl_buffer.handle != 0) - return; - - buffer_pos = 0; - buffer_size = size; - buffer_sync_subdivide = std::max<size_t>(sync_subdivide, 1); - - gl_buffer.Create(); - glBindBuffer(gl_target, gl_buffer.handle); - - glBufferStorage(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr, - GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT); - mapped_ptr = reinterpret_cast<u8*>( - glMapBufferRange(gl_target, 0, static_cast<GLsizeiptr>(buffer_size), - GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_FLUSH_EXPLICIT_BIT)); +GLsizeiptr OGLStreamBuffer::GetSize() const { + return buffer_size; } -void StorageBuffer::Release() { - if (gl_buffer.handle == 0) - return; - - glUnmapBuffer(gl_target); - - gl_buffer.Release(); - head.clear(); - tail.clear(); -} - -std::pair<u8*, GLintptr> StorageBuffer::Map(size_t size, size_t alignment) { +std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { ASSERT(size <= buffer_size); + ASSERT(alignment <= buffer_size); + mapped_size = size; - OGLSync sync; - - buffer_pos = Common::AlignUp(buffer_pos, alignment); - size_t effective_offset = Common::AlignDown(buffer_pos, buffer_sync_subdivide); - - if (!head.empty() && - (effective_offset > head.back().offset || buffer_pos + size > buffer_size)) { - ASSERT(head.back().sync.handle == 0); - head.back().sync.Create(); + if (alignment > 0) { + buffer_pos = Common::AlignUp<size_t>(buffer_pos, alignment); } + bool invalidate = false; if (buffer_pos + size > buffer_size) { - if (!tail.empty()) { - std::swap(sync, tail.back().sync); - tail.clear(); - } - std::swap(tail, head); buffer_pos = 0; - effective_offset = 0; - } + invalidate = true; - while (!tail.empty() && buffer_pos + size > tail.front().offset) { - std::swap(sync, tail.front().sync); - tail.pop_front(); + if (persistent) { + glUnmapBuffer(gl_target); + } } - if (sync.handle != 0) { - glClientWaitSync(sync.handle, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); - sync.Release(); + if (invalidate | !persistent) { + GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) | + (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) | + (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT); + mapped_ptr = static_cast<u8*>( + glMapBufferRange(gl_target, buffer_pos, buffer_size - buffer_pos, flags)); + mapped_offset = buffer_pos; } - if (head.empty() || effective_offset > head.back().offset) { - head.emplace_back(); - head.back().offset = effective_offset; + return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate); +} + +void OGLStreamBuffer::Unmap(GLsizeiptr size) { + ASSERT(size <= mapped_size); + + if (!coherent && size > 0) { + glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size); } - mapped_size = size; - return std::make_pair(&mapped_ptr[buffer_pos], static_cast<GLintptr>(buffer_pos)); -} + if (!persistent) { + glUnmapBuffer(gl_target); + } -void StorageBuffer::Unmap() { - glFlushMappedBufferRange(gl_target, static_cast<GLintptr>(buffer_pos), - static_cast<GLsizeiptr>(mapped_size)); - buffer_pos += mapped_size; + buffer_pos += size; } diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index e78dc5784..45592daaf 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -2,35 +2,41 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#pragma once - -#include <memory> +#include <tuple> #include <glad/glad.h> #include "common/common_types.h" #include "video_core/renderer_opengl/gl_resource_manager.h" class OGLStreamBuffer : private NonCopyable { public: - explicit OGLStreamBuffer(GLenum target); - virtual ~OGLStreamBuffer() = default; - -public: - static std::unique_ptr<OGLStreamBuffer> MakeBuffer(bool storage_buffer, GLenum target); - - virtual void Create(size_t size, size_t sync_subdivide) = 0; - virtual void Release() {} + explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent = false); + ~OGLStreamBuffer(); GLuint GetHandle() const; + GLsizeiptr GetSize() const; + + /* + * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes + * and the optional alignment requirement. + * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. + * The return values are the pointer to the new chunk, the offset within the buffer, + * and the invalidation flag for previous chunks. + * The actual used size must be specified on unmapping the chunk. + */ + std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0); - virtual std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) = 0; - virtual void Unmap() = 0; + void Unmap(GLsizeiptr size); -protected: +private: OGLBuffer gl_buffer; GLenum gl_target; - size_t buffer_pos = 0; - size_t buffer_size = 0; - size_t buffer_sync_subdivide = 0; - size_t mapped_size = 0; + bool coherent = false; + bool persistent = false; + + GLintptr buffer_pos = 0; + GLsizeiptr buffer_size = 0; + GLintptr mapped_offset = 0; + GLsizeiptr mapped_size = 0; + u8* mapped_ptr = nullptr; }; diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 16b1bd606..83ea0cfc0 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -27,9 +27,12 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { case Maxwell::VertexAttribute::Type::UnsignedNorm: { switch (attrib.size) { + case Maxwell::VertexAttribute::Size::Size_8: + case Maxwell::VertexAttribute::Size::Size_8_8: case Maxwell::VertexAttribute::Size::Size_8_8_8_8: return GL_UNSIGNED_BYTE; case Maxwell::VertexAttribute::Size::Size_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: return GL_UNSIGNED_SHORT; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return GL_UNSIGNED_INT_2_10_10_10_REV; @@ -43,6 +46,9 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { case Maxwell::VertexAttribute::Type::SignedNorm: { switch (attrib.size) { + case Maxwell::VertexAttribute::Size::Size_32_32_32: + return GL_INT; + case Maxwell::VertexAttribute::Size::Size_8_8: case Maxwell::VertexAttribute::Size::Size_8_8_8_8: return GL_BYTE; case Maxwell::VertexAttribute::Size::Size_16_16: @@ -84,6 +90,10 @@ inline GLenum IndexFormat(Maxwell::IndexFormat index_format) { inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { switch (topology) { + case Maxwell::PrimitiveTopology::Points: + return GL_POINTS; + case Maxwell::PrimitiveTopology::LineStrip: + return GL_LINE_STRIP; case Maxwell::PrimitiveTopology::Triangles: return GL_TRIANGLES; case Maxwell::PrimitiveTopology::TriangleStrip: @@ -149,42 +159,61 @@ inline GLenum BlendEquation(Maxwell::Blend::Equation equation) { inline GLenum BlendFunc(Maxwell::Blend::Factor factor) { switch (factor) { case Maxwell::Blend::Factor::Zero: + case Maxwell::Blend::Factor::ZeroGL: return GL_ZERO; case Maxwell::Blend::Factor::One: + case Maxwell::Blend::Factor::OneGL: return GL_ONE; case Maxwell::Blend::Factor::SourceColor: + case Maxwell::Blend::Factor::SourceColorGL: return GL_SRC_COLOR; case Maxwell::Blend::Factor::OneMinusSourceColor: + case Maxwell::Blend::Factor::OneMinusSourceColorGL: return GL_ONE_MINUS_SRC_COLOR; case Maxwell::Blend::Factor::SourceAlpha: + case Maxwell::Blend::Factor::SourceAlphaGL: return GL_SRC_ALPHA; case Maxwell::Blend::Factor::OneMinusSourceAlpha: + case Maxwell::Blend::Factor::OneMinusSourceAlphaGL: return GL_ONE_MINUS_SRC_ALPHA; case Maxwell::Blend::Factor::DestAlpha: + case Maxwell::Blend::Factor::DestAlphaGL: return GL_DST_ALPHA; case Maxwell::Blend::Factor::OneMinusDestAlpha: + case Maxwell::Blend::Factor::OneMinusDestAlphaGL: return GL_ONE_MINUS_DST_ALPHA; case Maxwell::Blend::Factor::DestColor: + case Maxwell::Blend::Factor::DestColorGL: return GL_DST_COLOR; case Maxwell::Blend::Factor::OneMinusDestColor: + case Maxwell::Blend::Factor::OneMinusDestColorGL: return GL_ONE_MINUS_DST_COLOR; case Maxwell::Blend::Factor::SourceAlphaSaturate: + case Maxwell::Blend::Factor::SourceAlphaSaturateGL: return GL_SRC_ALPHA_SATURATE; case Maxwell::Blend::Factor::Source1Color: + case Maxwell::Blend::Factor::Source1ColorGL: return GL_SRC1_COLOR; case Maxwell::Blend::Factor::OneMinusSource1Color: + case Maxwell::Blend::Factor::OneMinusSource1ColorGL: return GL_ONE_MINUS_SRC1_COLOR; case Maxwell::Blend::Factor::Source1Alpha: + case Maxwell::Blend::Factor::Source1AlphaGL: return GL_SRC1_ALPHA; case Maxwell::Blend::Factor::OneMinusSource1Alpha: + case Maxwell::Blend::Factor::OneMinusSource1AlphaGL: return GL_ONE_MINUS_SRC1_ALPHA; case Maxwell::Blend::Factor::ConstantColor: + case Maxwell::Blend::Factor::ConstantColorGL: return GL_CONSTANT_COLOR; case Maxwell::Blend::Factor::OneMinusConstantColor: + case Maxwell::Blend::Factor::OneMinusConstantColorGL: return GL_ONE_MINUS_CONSTANT_COLOR; case Maxwell::Blend::Factor::ConstantAlpha: + case Maxwell::Blend::Factor::ConstantAlphaGL: return GL_CONSTANT_ALPHA; case Maxwell::Blend::Factor::OneMinusConstantAlpha: + case Maxwell::Blend::Factor::OneMinusConstantAlphaGL: return GL_ONE_MINUS_CONSTANT_ALPHA; } LOG_CRITICAL(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor)); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index bf9131193..95f1aa0fe 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -18,7 +18,6 @@ #include "core/tracer/recorder.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/utils.h" -#include "video_core/video_core.h" static const char vertex_shader[] = R"( #version 150 core @@ -92,7 +91,8 @@ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, cons return matrix; } -ScopeAcquireGLContext::ScopeAcquireGLContext(EmuWindow& emu_window_) : emu_window{emu_window_} { +ScopeAcquireGLContext::ScopeAcquireGLContext(Core::Frontend::EmuWindow& emu_window_) + : emu_window{emu_window_} { if (Settings::values.use_multi_core) { emu_window.MakeCurrent(); } @@ -103,7 +103,9 @@ ScopeAcquireGLContext::~ScopeAcquireGLContext() { } } -RendererOpenGL::RendererOpenGL(EmuWindow& window) : VideoCore::RendererBase{window} {} +RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window) + : VideoCore::RendererBase{window} {} + RendererOpenGL::~RendererOpenGL() = default; /// Swap buffers (render frame) @@ -430,7 +432,7 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum break; case GL_DEBUG_SEVERITY_NOTIFICATION: case GL_DEBUG_SEVERITY_LOW: - LOG_DEBUG(Render_OpenGL, format, str_source, str_type, id, message); + LOG_TRACE(Render_OpenGL, format, str_source, str_type, id, message); break; } } diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 428afa3b7..a5eab6997 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -12,7 +12,9 @@ #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_state.h" +namespace Core::Frontend { class EmuWindow; +} /// Structure used for storing information about the textures for the Switch screen struct TextureInfo { @@ -34,16 +36,16 @@ struct ScreenInfo { /// Helper class to acquire/release OpenGL context within a given scope class ScopeAcquireGLContext : NonCopyable { public: - explicit ScopeAcquireGLContext(EmuWindow& window); + explicit ScopeAcquireGLContext(Core::Frontend::EmuWindow& window); ~ScopeAcquireGLContext(); private: - EmuWindow& emu_window; + Core::Frontend::EmuWindow& emu_window; }; class RendererOpenGL : public VideoCore::RendererBase { public: - explicit RendererOpenGL(EmuWindow& window); + explicit RendererOpenGL(Core::Frontend::EmuWindow& window); ~RendererOpenGL() override; /// Swap buffers (render frame) diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 65db84ad3..70746a34e 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -54,6 +54,7 @@ u32 BytesPerPixel(TextureFormat format) { return 8; case TextureFormat::DXT23: case TextureFormat::DXT45: + case TextureFormat::DXN2: case TextureFormat::BC7U: // In this case a 'pixel' actually refers to a 4x4 tile. return 16; @@ -85,87 +86,11 @@ u32 BytesPerPixel(TextureFormat format) { } } -static u32 DepthBytesPerPixel(DepthFormat format) { - switch (format) { - case DepthFormat::Z16_UNORM: - return 2; - case DepthFormat::S8_Z24_UNORM: - case DepthFormat::Z24_S8_UNORM: - case DepthFormat::Z32_FLOAT: - return 4; - case DepthFormat::Z32_S8_X24_FLOAT: - return 8; - default: - UNIMPLEMENTED_MSG("Format not implemented"); - break; - } -} - -std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height, - u32 block_height) { - u8* data = Memory::GetPointer(address); - u32 bytes_per_pixel = BytesPerPixel(format); - +std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width, + u32 height, u32 block_height) { std::vector<u8> unswizzled_data(width * height * bytes_per_pixel); - - switch (format) { - case TextureFormat::DXT1: - case TextureFormat::DXT23: - case TextureFormat::DXT45: - case TextureFormat::DXN1: - case TextureFormat::BC7U: - // In the DXT and DXN formats, each 4x4 tile is swizzled instead of just individual pixel - // values. - CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data, - unswizzled_data.data(), true, block_height); - break; - case TextureFormat::A8R8G8B8: - case TextureFormat::A2B10G10R10: - case TextureFormat::A1B5G5R5: - case TextureFormat::B5G6R5: - case TextureFormat::R8: - case TextureFormat::G8R8: - case TextureFormat::R16_G16_B16_A16: - case TextureFormat::R32_G32_B32_A32: - case TextureFormat::R32_G32: - case TextureFormat::R32: - case TextureFormat::R16: - case TextureFormat::R16_G16: - case TextureFormat::BF10GF11RF11: - case TextureFormat::ASTC_2D_4X4: - case TextureFormat::R32_G32_B32: - CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data, - unswizzled_data.data(), true, block_height); - break; - default: - UNIMPLEMENTED_MSG("Format not implemented"); - break; - } - - return unswizzled_data; -} - -std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 width, u32 height, - u32 block_height) { - u8* data = Memory::GetPointer(address); - u32 bytes_per_pixel = DepthBytesPerPixel(format); - - std::vector<u8> unswizzled_data(width * height * bytes_per_pixel); - - switch (format) { - case DepthFormat::Z16_UNORM: - case DepthFormat::S8_Z24_UNORM: - case DepthFormat::Z24_S8_UNORM: - case DepthFormat::Z32_FLOAT: - case DepthFormat::Z32_S8_X24_FLOAT: - CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data, - unswizzled_data.data(), true, block_height); - break; - default: - UNIMPLEMENTED_MSG("Format not implemented"); - break; - } - + CopySwizzledData(width / tile_size, height / tile_size, bytes_per_pixel, bytes_per_pixel, + Memory::GetPointer(address), unswizzled_data.data(), true, block_height); return unswizzled_data; } @@ -179,6 +104,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat case TextureFormat::DXT23: case TextureFormat::DXT45: case TextureFormat::DXN1: + case TextureFormat::DXN2: case TextureFormat::BC7U: case TextureFormat::ASTC_2D_4X4: case TextureFormat::A8R8G8B8: diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index 73a4924d1..1f7b731be 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -13,8 +13,8 @@ namespace Tegra::Texture { /** * Unswizzles a swizzled texture without changing its format. */ -std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height, - u32 block_height = TICEntry::DefaultBlockHeight); +std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size, u32 bytes_per_pixel, u32 width, + u32 height, u32 block_height = TICEntry::DefaultBlockHeight); /** * Unswizzles a swizzled depth texture without changing its format. diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 5085ef96b..6780d1c16 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -9,9 +9,7 @@ namespace VideoCore { -std::atomic<bool> g_toggle_framelimit_enabled; - -std::unique_ptr<RendererBase> CreateRenderer(EmuWindow& emu_window) { +std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window) { return std::make_unique<RendererOpenGL>(emu_window); } diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index 7c01c0b8d..f79f85dfe 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -4,27 +4,22 @@ #pragma once -#include <atomic> #include <memory> +namespace Core::Frontend { class EmuWindow; +} namespace VideoCore { class RendererBase; -enum class Renderer { Software, OpenGL }; - -// TODO: Wrap these in a user settings struct along with any other graphics settings (often set from -// qt ui) -extern std::atomic<bool> g_toggle_framelimit_enabled; - /** * Creates a renderer instance. * * @note The returned renderer instance is simply allocated. Its Init() * function still needs to be called to fully complete its setup. */ -std::unique_ptr<RendererBase> CreateRenderer(EmuWindow& emu_window); +std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window); } // namespace VideoCore |