diff options
Diffstat (limited to 'src')
51 files changed, 1594 insertions, 1905 deletions
diff --git a/src/common/math_util.h b/src/common/math_util.h index d6c35ee89..83ef0201f 100644 --- a/src/common/math_util.h +++ b/src/common/math_util.h @@ -24,17 +24,29 @@ struct Rectangle { : left(left), top(top), right(right), bottom(bottom) {} T GetWidth() const { - return std::abs(static_cast<std::make_signed_t<T>>(right - left)); + if constexpr (std::is_floating_point_v<T>) { + return std::abs(right - left); + } else { + return std::abs(static_cast<std::make_signed_t<T>>(right - left)); + } } + T GetHeight() const { - return std::abs(static_cast<std::make_signed_t<T>>(bottom - top)); + if constexpr (std::is_floating_point_v<T>) { + return std::abs(bottom - top); + } else { + return std::abs(static_cast<std::make_signed_t<T>>(bottom - top)); + } } + Rectangle<T> TranslateX(const T x) const { return Rectangle{left + x, top, right + x, bottom}; } + Rectangle<T> TranslateY(const T y) const { return Rectangle{left, top + y, right, bottom + y}; } + Rectangle<T> Scale(const float s) const { return Rectangle{left, top, static_cast<T>(left + GetWidth() * s), static_cast<T>(top + GetHeight() * s)}; diff --git a/src/core/core.cpp b/src/core/core.cpp index a82faf127..218508126 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -174,6 +174,7 @@ struct System::Impl { } interrupt_manager = std::make_unique<Core::Hardware::InterruptManager>(system); gpu_core = VideoCore::CreateGPU(system); + renderer->Rasterizer().SetupDirtyFlags(); is_powered_on = true; exit_lock = false; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 4b0c6346f..14f3b4569 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -2,6 +2,8 @@ add_library(video_core STATIC buffer_cache/buffer_block.h buffer_cache/buffer_cache.h buffer_cache/map_interval.h + dirty_flags.cpp + dirty_flags.h dma_pusher.cpp dma_pusher.h engines/const_buffer_engine_interface.h @@ -69,8 +71,8 @@ add_library(video_core STATIC renderer_opengl/gl_shader_manager.h renderer_opengl/gl_shader_util.cpp renderer_opengl/gl_shader_util.h - renderer_opengl/gl_state.cpp - renderer_opengl/gl_state.h + renderer_opengl/gl_state_tracker.cpp + renderer_opengl/gl_state_tracker.h renderer_opengl/gl_stream_buffer.cpp renderer_opengl/gl_stream_buffer.h renderer_opengl/gl_texture_cache.cpp @@ -198,6 +200,8 @@ if (ENABLE_VULKAN) renderer_vulkan/vk_shader_util.h renderer_vulkan/vk_staging_buffer_pool.cpp renderer_vulkan/vk_staging_buffer_pool.h + renderer_vulkan/vk_state_tracker.cpp + renderer_vulkan/vk_state_tracker.h renderer_vulkan/vk_stream_buffer.cpp renderer_vulkan/vk_stream_buffer.h renderer_vulkan/vk_swapchain.cpp diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp new file mode 100644 index 000000000..4429f3405 --- /dev/null +++ b/src/video_core/dirty_flags.cpp @@ -0,0 +1,46 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> +#include <cstddef> + +#include "common/common_types.h" +#include "video_core/dirty_flags.h" + +#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name) +#define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / sizeof(u32)) + +namespace VideoCommon::Dirty { + +using Tegra::Engines::Maxwell3D; + +void SetupCommonOnWriteStores(Tegra::Engines::Maxwell3D::DirtyState::Flags& store) { + store[RenderTargets] = true; + store[ZetaBuffer] = true; + for (std::size_t i = 0; i < Maxwell3D::Regs::NumRenderTargets; ++i) { + store[ColorBuffer0 + i] = true; + } +} + +void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) { + static constexpr std::size_t num_per_rt = NUM(rt[0]); + static constexpr std::size_t begin = OFF(rt); + static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets; + for (std::size_t rt = 0; rt < Maxwell3D::Regs::NumRenderTargets; ++rt) { + FillBlock(tables[0], begin + rt * num_per_rt, num_per_rt, ColorBuffer0 + rt); + } + FillBlock(tables[1], begin, num, RenderTargets); + + static constexpr std::array zeta_flags{ZetaBuffer, RenderTargets}; + for (std::size_t i = 0; i < std::size(zeta_flags); ++i) { + const u8 flag = zeta_flags[i]; + auto& table = tables[i]; + table[OFF(zeta_enable)] = flag; + table[OFF(zeta_width)] = flag; + table[OFF(zeta_height)] = flag; + FillBlock(table, OFF(zeta), NUM(zeta), flag); + } +} + +} // namespace VideoCommon::Dirty diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h new file mode 100644 index 000000000..0dbafd3ef --- /dev/null +++ b/src/video_core/dirty_flags.h @@ -0,0 +1,51 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <algorithm> +#include <cstddef> +#include <iterator> + +#include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" + +namespace VideoCommon::Dirty { + +enum : u8 { + NullEntry = 0, + + RenderTargets, + ColorBuffer0, + ColorBuffer1, + ColorBuffer2, + ColorBuffer3, + ColorBuffer4, + ColorBuffer5, + ColorBuffer6, + ColorBuffer7, + ZetaBuffer, + + LastCommonEntry, +}; + +template <typename Integer> +void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Table& table, std::size_t begin, + std::size_t num, Integer dirty_index) { + const auto it = std::begin(table) + begin; + std::fill(it, it + num, static_cast<u8>(dirty_index)); +} + +template <typename Integer1, typename Integer2> +void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables, std::size_t begin, + std::size_t num, Integer1 index_a, Integer2 index_b) { + FillBlock(tables[0], begin, num, index_a); + FillBlock(tables[1], begin, num, index_b); +} + +void SetupCommonOnWriteStores(Tegra::Engines::Maxwell3D::DirtyState::Flags& store); + +void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables); + +} // namespace VideoCommon::Dirty diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 0094fd715..713c14182 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -22,7 +22,7 @@ void DmaPusher::DispatchCalls() { MICROPROFILE_SCOPE(DispatchCalls); // On entering GPU code, assume all memory may be touched by the ARM core. - gpu.Maxwell3D().dirty.OnMemoryWrite(); + gpu.Maxwell3D().OnMemoryWrite(); dma_pushbuffer_subindex = 0; diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 4b824aa4e..ae52afa79 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -39,7 +39,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { const bool is_last_call = method_call.IsLastCall(); upload_state.ProcessData(method_call.argument, is_last_call); if (is_last_call) { - system.GPU().Maxwell3D().dirty.OnMemoryWrite(); + system.GPU().Maxwell3D().OnMemoryWrite(); } break; } diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index fa4a7c5c1..597872e43 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -34,7 +34,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { const bool is_last_call = method_call.IsLastCall(); upload_state.ProcessData(method_call.argument, is_last_call); if (is_last_call) { - system.GPU().Maxwell3D().dirty.OnMemoryWrite(); + system.GPU().Maxwell3D().OnMemoryWrite(); } break; } diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index b28de1092..89050361e 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -26,7 +26,8 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste MemoryManager& memory_manager) : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{*this}, upload_state{memory_manager, regs.upload} { - InitDirtySettings(); + dirty.flags.flip(); + InitializeRegisterDefaults(); } @@ -75,8 +76,8 @@ void Maxwell3D::InitializeRegisterDefaults() { regs.stencil_back_mask = 0xFFFFFFFF; regs.depth_test_func = Regs::ComparisonOp::Always; - regs.cull.front_face = Regs::Cull::FrontFace::CounterClockWise; - regs.cull.cull_face = Regs::Cull::CullFace::Back; + regs.front_face = Regs::FrontFace::CounterClockWise; + regs.cull_face = Regs::CullFace::Back; // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a // register carrying a default value. Assume it's OpenGL's default (1). @@ -95,7 +96,7 @@ void Maxwell3D::InitializeRegisterDefaults() { regs.rasterize_enable = 1; regs.rt_separate_frag_data = 1; regs.framebuffer_srgb = 1; - regs.cull.front_face = Maxwell3D::Regs::Cull::FrontFace::ClockWise; + regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise; mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_end_gl)] = true; mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)] = true; @@ -103,164 +104,6 @@ void Maxwell3D::InitializeRegisterDefaults() { mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true; } -#define DIRTY_REGS_POS(field_name) static_cast<u8>(offsetof(Maxwell3D::DirtyRegs, field_name)) - -void Maxwell3D::InitDirtySettings() { - const auto set_block = [this](std::size_t start, std::size_t range, u8 position) { - const auto start_itr = dirty_pointers.begin() + start; - const auto end_itr = start_itr + range; - std::fill(start_itr, end_itr, position); - }; - dirty.regs.fill(true); - - // Init Render Targets - constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32); - constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt); - constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8; - u8 rt_dirty_reg = DIRTY_REGS_POS(render_target); - for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) { - set_block(rt_reg, registers_per_rt, rt_dirty_reg); - ++rt_dirty_reg; - } - constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer); - dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag; - dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag; - dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag; - constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32); - constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta); - set_block(zeta_reg, registers_in_zeta, depth_buffer_flag); - - // Init Vertex Arrays - constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array); - constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32); - constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays; - u8 va_dirty_reg = DIRTY_REGS_POS(vertex_array); - u8 vi_dirty_reg = DIRTY_REGS_POS(vertex_instance); - for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end; - vertex_reg += vertex_array_size) { - set_block(vertex_reg, 3, va_dirty_reg); - // The divisor concerns vertex array instances - dirty_pointers[static_cast<std::size_t>(vertex_reg) + 3] = vi_dirty_reg; - ++va_dirty_reg; - ++vi_dirty_reg; - } - constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit); - constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32); - constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays; - va_dirty_reg = DIRTY_REGS_POS(vertex_array); - for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end; - vertex_reg += vertex_limit_size) { - set_block(vertex_reg, vertex_limit_size, va_dirty_reg); - va_dirty_reg++; - } - constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays); - constexpr u32 vertex_instance_size = - sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32); - constexpr u32 vertex_instance_end = - vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays; - vi_dirty_reg = DIRTY_REGS_POS(vertex_instance); - for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end; - vertex_reg += vertex_instance_size) { - set_block(vertex_reg, vertex_instance_size, vi_dirty_reg); - vi_dirty_reg++; - } - set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(), - DIRTY_REGS_POS(vertex_attrib_format)); - - // Init Shaders - constexpr u32 shader_registers_count = - sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); - set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count, - DIRTY_REGS_POS(shaders)); - - // State - - // Viewport - constexpr u8 viewport_dirty_reg = DIRTY_REGS_POS(viewport); - constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports); - constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32); - set_block(viewport_start, viewport_size, viewport_dirty_reg); - constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control); - constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32); - set_block(view_volume_start, view_volume_size, viewport_dirty_reg); - - // Viewport transformation - constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform); - constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32); - set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform)); - - // Cullmode - constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull); - constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32); - set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode)); - - // Screen y control - dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control); - - // Primitive Restart - constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart); - constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32); - set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart)); - - // Depth Test - constexpr u8 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test); - dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg; - dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg; - dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg; - - // Stencil Test - constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test); - dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg; - dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg; - dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg; - dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg; - dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg; - dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg; - dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg; - dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg; - dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg; - dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg; - dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg; - dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg; - dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg; - dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg; - dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg; - dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg; - - // Color Mask - constexpr u8 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask); - dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg; - set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32), - color_mask_dirty_reg); - // Blend State - constexpr u8 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state); - set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32), - blend_state_dirty_reg); - dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg; - set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg); - set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32), - blend_state_dirty_reg); - - // Scissor State - constexpr u8 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test); - set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32), - scissor_test_dirty_reg); - - // Polygon Offset - constexpr u8 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset); - dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg; - dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg; - dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg; - dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg; - dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg; - dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg; - - // Depth bounds - constexpr u8 depth_bounds_values_dirty_reg = DIRTY_REGS_POS(depth_bounds_values); - dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[0])] = depth_bounds_values_dirty_reg; - dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[1])] = depth_bounds_values_dirty_reg; -} - void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters) { // Reset the current macro. executing_macro = 0; @@ -319,19 +162,9 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { if (regs.reg_array[method] != method_call.argument) { regs.reg_array[method] = method_call.argument; - const std::size_t dirty_reg = dirty_pointers[method]; - if (dirty_reg) { - dirty.regs[dirty_reg] = true; - if (dirty_reg >= DIRTY_REGS_POS(vertex_array) && - dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) { - dirty.vertex_array_buffers = true; - } else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) && - dirty_reg < DIRTY_REGS_POS(vertex_instances)) { - dirty.vertex_instances = true; - } else if (dirty_reg >= DIRTY_REGS_POS(render_target) && - dirty_reg < DIRTY_REGS_POS(render_settings)) { - dirty.render_settings = true; - } + + for (const auto& table : dirty.tables) { + dirty.flags[table[method]] = true; } } @@ -419,7 +252,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { const bool is_last_call = method_call.IsLastCall(); upload_state.ProcessData(method_call.argument, is_last_call); if (is_last_call) { - dirty.OnMemoryWrite(); + OnMemoryWrite(); } break; } @@ -727,7 +560,7 @@ void Maxwell3D::FinishCBData() { const u32 id = cb_data_state.id; memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size); - dirty.OnMemoryWrite(); + OnMemoryWrite(); cb_data_state.id = null_cb_data; cb_data_state.current = null_cb_data; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 6ea7cc6a5..491cff370 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -6,6 +6,7 @@ #include <array> #include <bitset> +#include <limits> #include <optional> #include <type_traits> #include <unordered_map> @@ -431,21 +432,15 @@ public: GeneratedPrimitives = 0x1F, }; - struct Cull { - enum class FrontFace : u32 { - ClockWise = 0x0900, - CounterClockWise = 0x0901, - }; - - enum class CullFace : u32 { - Front = 0x0404, - Back = 0x0405, - FrontAndBack = 0x0408, - }; + enum class FrontFace : u32 { + ClockWise = 0x0900, + CounterClockWise = 0x0901, + }; - u32 enabled; - FrontFace front_face; - CullFace cull_face; + enum class CullFace : u32 { + Front = 0x0404, + Back = 0x0405, + FrontAndBack = 0x0408, }; struct Blend { @@ -574,7 +569,7 @@ public: f32 translate_z; INSERT_UNION_PADDING_WORDS(2); - Common::Rectangle<s32> GetRect() const { + Common::Rectangle<f32> GetRect() const { return { GetX(), // left GetY() + GetHeight(), // top @@ -583,20 +578,20 @@ public: }; }; - s32 GetX() const { - return static_cast<s32>(std::max(0.0f, translate_x - std::fabs(scale_x))); + f32 GetX() const { + return std::max(0.0f, translate_x - std::fabs(scale_x)); } - s32 GetY() const { - return static_cast<s32>(std::max(0.0f, translate_y - std::fabs(scale_y))); + f32 GetY() const { + return std::max(0.0f, translate_y - std::fabs(scale_y)); } - s32 GetWidth() const { - return static_cast<s32>(translate_x + std::fabs(scale_x)) - GetX(); + f32 GetWidth() const { + return translate_x + std::fabs(scale_x) - GetX(); } - s32 GetHeight() const { - return static_cast<s32>(translate_y + std::fabs(scale_y)) - GetY(); + f32 GetHeight() const { + return translate_y + std::fabs(scale_y) - GetY(); } }; @@ -872,16 +867,7 @@ public: INSERT_UNION_PADDING_WORDS(0x35); - union { - BitField<0, 1, u32> c0; - BitField<1, 1, u32> c1; - BitField<2, 1, u32> c2; - BitField<3, 1, u32> c3; - BitField<4, 1, u32> c4; - BitField<5, 1, u32> c5; - BitField<6, 1, u32> c6; - BitField<7, 1, u32> c7; - } clip_distance_enabled; + u32 clip_distance_enabled; u32 samplecnt_enable; @@ -1060,7 +1046,9 @@ public: INSERT_UNION_PADDING_WORDS(1); - Cull cull; + u32 cull_test_enabled; + FrontFace front_face; + CullFace cull_face; u32 pixel_center_integer; @@ -1238,79 +1226,6 @@ public: State state{}; - struct DirtyRegs { - static constexpr std::size_t NUM_REGS = 256; - static_assert(NUM_REGS - 1 <= std::numeric_limits<u8>::max()); - - union { - struct { - bool null_dirty; - - // Vertex Attributes - bool vertex_attrib_format; - - // Vertex Arrays - std::array<bool, 32> vertex_array; - - bool vertex_array_buffers; - - // Vertex Instances - std::array<bool, 32> vertex_instance; - - bool vertex_instances; - - // Render Targets - std::array<bool, 8> render_target; - bool depth_buffer; - - bool render_settings; - - // Shaders - bool shaders; - - // Rasterizer State - bool viewport; - bool clip_coefficient; - bool cull_mode; - bool primitive_restart; - bool depth_test; - bool stencil_test; - bool blend_state; - bool scissor_test; - bool transform_feedback; - bool color_mask; - bool polygon_offset; - bool depth_bounds_values; - - // Complementary - bool viewport_transform; - bool screen_y_control; - - bool memory_general; - }; - std::array<bool, NUM_REGS> regs; - }; - - void ResetVertexArrays() { - vertex_array.fill(true); - vertex_array_buffers = true; - } - - void ResetRenderTargets() { - depth_buffer = true; - render_target.fill(true); - render_settings = true; - } - - void OnMemoryWrite() { - shaders = true; - memory_general = true; - ResetRenderTargets(); - ResetVertexArrays(); - } - - } dirty{}; - /// Reads a register value located at the input method address u32 GetRegisterValue(u32 method) const; @@ -1356,6 +1271,11 @@ public: return execute_on; } + /// Notify a memory write has happened. + void OnMemoryWrite() { + dirty.flags |= dirty.on_write_stores; + } + enum class MMEDrawMode : u32 { Undefined, Array, @@ -1371,6 +1291,16 @@ public: u32 gl_end_count{}; } mme_draw; + struct DirtyState { + using Flags = std::bitset<std::numeric_limits<u8>::max()>; + using Table = std::array<u8, Regs::NUM_REGS>; + using Tables = std::array<Table, 2>; + + Flags flags; + Flags on_write_stores; + Tables tables{}; + } dirty; + private: void InitializeRegisterDefaults(); @@ -1417,8 +1347,6 @@ private: /// Retrieves information about a specific TSC entry from the TSC buffer. Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; - void InitDirtySettings(); - /** * Call a macro on this engine. * @param method Method to call @@ -1561,7 +1489,9 @@ ASSERT_REG_POSITION(index_array, 0x5F2); ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); ASSERT_REG_POSITION(instanced_arrays, 0x620); ASSERT_REG_POSITION(vp_point_size, 0x644); -ASSERT_REG_POSITION(cull, 0x646); +ASSERT_REG_POSITION(cull_test_enabled, 0x646); +ASSERT_REG_POSITION(front_face, 0x647); +ASSERT_REG_POSITION(cull_face, 0x648); ASSERT_REG_POSITION(pixel_center_integer, 0x649); ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B); ASSERT_REG_POSITION(view_volume_clip_control, 0x64F); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index ad8453c5f..c2610f992 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -57,7 +57,7 @@ void MaxwellDMA::HandleCopy() { } // All copies here update the main memory, so mark all rasterizer states as invalid. - system.GPU().Maxwell3D().dirty.OnMemoryWrite(); + system.GPU().Maxwell3D().OnMemoryWrite(); if (regs.exec.is_dst_linear && regs.exec.is_src_linear) { // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index f18eaf4bc..3e4514b94 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -89,6 +89,9 @@ public: virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, const DiskResourceLoadCallback& callback = {}) {} + /// Initializes renderer dirty flags + virtual void SetupDirtyFlags() {} + /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. GuestDriverProfile& AccessGuestDriverProfile() { return guest_driver_profile; diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp index 874ed3c6e..b8a512cb6 100644 --- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp @@ -11,7 +11,6 @@ #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_framebuffer_cache.h" -#include "video_core/renderer_opengl/gl_state.h" namespace OpenGL { @@ -36,8 +35,7 @@ OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheK framebuffer.Create(); // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs. - local_state.draw.draw_framebuffer = framebuffer.handle; - local_state.ApplyFramebufferState(); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle); if (key.zeta) { const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil; diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h index 02ec80ae9..8f698fee0 100644 --- a/src/video_core/renderer_opengl/gl_framebuffer_cache.h +++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.h @@ -13,7 +13,6 @@ #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_texture_cache.h" namespace OpenGL { @@ -63,7 +62,6 @@ public: private: OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key); - OpenGLState local_state; std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache; }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 3fcd319fd..55324e6d5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -55,6 +55,8 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255 namespace { +constexpr std::size_t NumSupportedVertexAttributes = 16; + template <typename Engine, typename Entry> Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, ShaderType shader_type, std::size_t index = 0) { @@ -88,18 +90,23 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, return buffer.size; } +void oglEnable(GLenum cap, bool state) { + (state ? glEnable : glDisable)(cap); +} + +void oglEnablei(GLenum cap, bool state, GLuint index) { + (state ? glEnablei : glDisablei)(cap, index); +} + } // Anonymous namespace RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, - ScreenInfo& info) - : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device}, + ScreenInfo& info, GLShader::ProgramManager& program_manager, + StateTracker& state_tracker) + : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker}, shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system}, - screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { - shader_program_manager = std::make_unique<GLShader::ProgramManager>(); - state.draw.shader_program = 0; - state.Apply(); - - LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); + screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker}, + buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { CheckExtensions(); } @@ -113,93 +120,72 @@ void RasterizerOpenGL::CheckExtensions() { } } -GLuint RasterizerOpenGL::SetupVertexFormat() { +void RasterizerOpenGL::SetupVertexFormat() { auto& gpu = system.GPU().Maxwell3D(); - const auto& regs = gpu.regs; - - if (!gpu.dirty.vertex_attrib_format) { - return state.draw.vertex_array; + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::VertexFormats]) { + return; } - gpu.dirty.vertex_attrib_format = false; + flags[Dirty::VertexFormats] = false; MICROPROFILE_SCOPE(OpenGL_VAO); - auto [iter, is_cache_miss] = vertex_array_cache.try_emplace(regs.vertex_attrib_format); - auto& vao_entry = iter->second; - - if (is_cache_miss) { - vao_entry.Create(); - const GLuint vao = vao_entry.handle; - - // Eventhough we are using DSA to create this vertex array, there is a bug on Intel's blob - // that fails to properly create the vertex array if it's not bound even after creating it - // with glCreateVertexArrays - state.draw.vertex_array = vao; - state.ApplyVertexArrayState(); - - // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. - // Enables the first 16 vertex attributes always, as we don't know which ones are actually - // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16 - // for now to avoid OpenGL errors. - // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't - // assume every shader uses them all. - for (u32 index = 0; index < 16; ++index) { - const auto& attrib = regs.vertex_attrib_format[index]; - - // Ignore invalid attributes. - if (!attrib.IsValid()) - continue; - - const auto& buffer = regs.vertex_array[attrib.buffer]; - LOG_TRACE(Render_OpenGL, - "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", - index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), - attrib.offset.Value(), attrib.IsNormalized()); - - ASSERT(buffer.IsEnabled()); - - glEnableVertexArrayAttrib(vao, index); - if (attrib.type == Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::SignedInt || - attrib.type == - Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::UnsignedInt) { - glVertexArrayAttribIFormat(vao, index, attrib.ComponentCount(), - MaxwellToGL::VertexType(attrib), attrib.offset); - } else { - glVertexArrayAttribFormat( - vao, index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), - attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset); - } - glVertexArrayAttribBinding(vao, index, attrib.buffer); + // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables + // the first 16 vertex attributes always, as we don't know which ones are actually used until + // shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to + // avoid OpenGL errors. + // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't + // assume every shader uses them all. + for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { + if (!flags[Dirty::VertexFormat0 + index]) { + continue; } - } + flags[Dirty::VertexFormat0 + index] = false; + + const auto attrib = gpu.regs.vertex_attrib_format[index]; + const auto gl_index = static_cast<GLuint>(index); - // Rebinding the VAO invalidates the vertex buffer bindings. - gpu.dirty.ResetVertexArrays(); + // Ignore invalid attributes. + if (!attrib.IsValid()) { + glDisableVertexAttribArray(gl_index); + continue; + } + glEnableVertexAttribArray(gl_index); - state.draw.vertex_array = vao_entry.handle; - return vao_entry.handle; + if (attrib.type == Maxwell::VertexAttribute::Type::SignedInt || + attrib.type == Maxwell::VertexAttribute::Type::UnsignedInt) { + glVertexAttribIFormat(gl_index, attrib.ComponentCount(), + MaxwellToGL::VertexType(attrib), attrib.offset); + } else { + glVertexAttribFormat(gl_index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), + attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset); + } + glVertexAttribBinding(gl_index, attrib.buffer); + } } -void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { +void RasterizerOpenGL::SetupVertexBuffer() { auto& gpu = system.GPU().Maxwell3D(); - if (!gpu.dirty.vertex_array_buffers) + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::VertexBuffers]) { return; - gpu.dirty.vertex_array_buffers = false; - - const auto& regs = gpu.regs; + } + flags[Dirty::VertexBuffers] = false; MICROPROFILE_SCOPE(OpenGL_VB); // Upload all guest vertex arrays sequentially to our buffer - for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { - if (!gpu.dirty.vertex_array[index]) + const auto& regs = gpu.regs; + for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + if (!flags[Dirty::VertexBuffer0 + index]) { continue; - gpu.dirty.vertex_array[index] = false; - gpu.dirty.vertex_instance[index] = false; + } + flags[Dirty::VertexBuffer0 + index] = false; const auto& vertex_array = regs.vertex_array[index]; - if (!vertex_array.IsEnabled()) + if (!vertex_array.IsEnabled()) { continue; + } const GPUVAddr start = vertex_array.StartAddress(); const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); @@ -209,42 +195,30 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); // Bind the vertex array to the buffer at the current offset. - vertex_array_pushbuffer.SetVertexBuffer(index, vertex_buffer, vertex_buffer_offset, - vertex_array.stride); - - if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { - // Enable vertex buffer instancing with the specified divisor. - glVertexArrayBindingDivisor(vao, index, vertex_array.divisor); - } else { - // Disable the vertex buffer instancing. - glVertexArrayBindingDivisor(vao, index, 0); - } + vertex_array_pushbuffer.SetVertexBuffer(static_cast<GLuint>(index), vertex_buffer, + vertex_buffer_offset, vertex_array.stride); } } -void RasterizerOpenGL::SetupVertexInstances(GLuint vao) { +void RasterizerOpenGL::SetupVertexInstances() { auto& gpu = system.GPU().Maxwell3D(); - - if (!gpu.dirty.vertex_instances) + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::VertexInstances]) { return; - gpu.dirty.vertex_instances = false; + } + flags[Dirty::VertexInstances] = false; const auto& regs = gpu.regs; - // Upload all guest vertex arrays sequentially to our buffer - for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { - if (!gpu.dirty.vertex_instance[index]) + for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { + if (!flags[Dirty::VertexInstance0 + index]) { continue; - - gpu.dirty.vertex_instance[index] = false; - - if (regs.instanced_arrays.IsInstancingEnabled(index) && - regs.vertex_array[index].divisor != 0) { - // Enable vertex buffer instancing with the specified divisor. - glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor); - } else { - // Disable the vertex buffer instancing. - glVertexArrayBindingDivisor(vao, index, 0); } + flags[Dirty::VertexInstance0 + index] = false; + + const auto gl_index = static_cast<GLuint>(index); + const bool instancing_enabled = regs.instanced_arrays.IsInstancingEnabled(gl_index); + const GLuint divisor = instancing_enabled ? regs.vertex_array[index].divisor : 0; + glVertexBindingDivisor(gl_index, divisor); } } @@ -260,8 +234,7 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { MICROPROFILE_SCOPE(OpenGL_Shader); auto& gpu = system.GPU().Maxwell3D(); - - std::array<bool, Maxwell::NumClipDistances> clip_distances{}; + u32 clip_distances = 0; for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { const auto& shader_config = gpu.regs.shader_config[index]; @@ -271,10 +244,10 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { if (!gpu.regs.IsShaderConfigEnabled(index)) { switch (program) { case Maxwell::ShaderProgram::Geometry: - shader_program_manager->UseTrivialGeometryShader(); + program_manager.UseGeometryShader(0); break; case Maxwell::ShaderProgram::Fragment: - shader_program_manager->UseTrivialFragmentShader(); + program_manager.UseFragmentShader(0); break; default: break; @@ -305,13 +278,13 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { switch (program) { case Maxwell::ShaderProgram::VertexA: case Maxwell::ShaderProgram::VertexB: - shader_program_manager->UseProgrammableVertexShader(program_handle); + program_manager.UseVertexShader(program_handle); break; case Maxwell::ShaderProgram::Geometry: - shader_program_manager->UseProgrammableGeometryShader(program_handle); + program_manager.UseGeometryShader(program_handle); break; case Maxwell::ShaderProgram::Fragment: - shader_program_manager->UseProgrammableFragmentShader(program_handle); + program_manager.UseFragmentShader(program_handle); break; default: UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, @@ -322,9 +295,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { // When a clip distance is enabled but not set in the shader it crops parts of the screen // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the // clip distances only when it's written by a shader stage. - for (std::size_t i = 0; i < Maxwell::NumClipDistances; ++i) { - clip_distances[i] = clip_distances[i] || shader->GetShaderEntries().clip_distances[i]; - } + clip_distances |= shader->GetShaderEntries().clip_distances; // When VertexA is enabled, we have dual vertex shaders if (program == Maxwell::ShaderProgram::VertexA) { @@ -334,8 +305,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { } SyncClipEnabled(clip_distances); - - gpu.dirty.shaders = false; + gpu.dirty.flags[Dirty::Shaders] = false; } std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { @@ -368,20 +338,23 @@ void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading, shader_cache.LoadDiskCache(stop_loading, callback); } +void RasterizerOpenGL::SetupDirtyFlags() { + state_tracker.Initialize(); +} + void RasterizerOpenGL::ConfigureFramebuffers() { MICROPROFILE_SCOPE(OpenGL_Framebuffer); auto& gpu = system.GPU().Maxwell3D(); - if (!gpu.dirty.render_settings) { + if (!gpu.dirty.flags[VideoCommon::Dirty::RenderTargets]) { return; } - gpu.dirty.render_settings = false; + gpu.dirty.flags[VideoCommon::Dirty::RenderTargets] = false; texture_cache.GuardRenderTargets(true); View depth_surface = texture_cache.GetDepthBufferSurface(true); const auto& regs = gpu.regs; - state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0; UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); // Bind the framebuffer surfaces @@ -409,14 +382,11 @@ void RasterizerOpenGL::ConfigureFramebuffers() { texture_cache.GuardRenderTargets(false); - state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(key); - SyncViewport(state); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key)); } -void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb, - bool using_depth_fb, bool using_stencil_fb) { - using VideoCore::Surface::SurfaceType; - +void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color_fb, bool using_depth_fb, + bool using_stencil_fb) { auto& gpu = system.GPU().Maxwell3D(); const auto& regs = gpu.regs; @@ -435,80 +405,44 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, boo key.colors[0] = color_surface; key.zeta = depth_surface; - current_state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(key); - current_state.ApplyFramebufferState(); + state_tracker.NotifyFramebuffer(); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key)); } void RasterizerOpenGL::Clear() { - const auto& maxwell3d = system.GPU().Maxwell3D(); - - if (!maxwell3d.ShouldExecute()) { + const auto& gpu = system.GPU().Maxwell3D(); + if (!gpu.ShouldExecute()) { return; } - const auto& regs = maxwell3d.regs; + const auto& regs = gpu.regs; bool use_color{}; bool use_depth{}; bool use_stencil{}; - OpenGLState prev_state{OpenGLState::GetCurState()}; - SCOPE_EXIT({ - prev_state.AllDirty(); - prev_state.Apply(); - }); - - OpenGLState clear_state{OpenGLState::GetCurState()}; - clear_state.SetDefaultViewports(); if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || regs.clear_buffers.A) { use_color = true; } if (use_color) { - clear_state.color_mask[0].red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE; - clear_state.color_mask[0].green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE; - clear_state.color_mask[0].blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE; - clear_state.color_mask[0].alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE; + state_tracker.NotifyColorMask0(); + glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0, + regs.clear_buffers.B != 0, regs.clear_buffers.A != 0); + + // TODO(Rodrigo): Determine if clamping is used on clears + SyncFragmentColorClampState(); + SyncFramebufferSRGB(); } if (regs.clear_buffers.Z) { ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!"); use_depth = true; - // Always enable the depth write when clearing the depth buffer. The depth write mask is - // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to - // true. - clear_state.depth.test_enabled = true; - clear_state.depth.test_func = GL_ALWAYS; - clear_state.depth.write_mask = GL_TRUE; + state_tracker.NotifyDepthMask(); + glDepthMask(GL_TRUE); } if (regs.clear_buffers.S) { - ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); + ASSERT_MSG(regs.zeta_enable, "Tried to clear stencil but buffer is not enabled!"); use_stencil = true; - clear_state.stencil.test_enabled = true; - - if (regs.clear_flags.stencil) { - // Stencil affects the clear so fill it with the used masks - clear_state.stencil.front.test_func = GL_ALWAYS; - clear_state.stencil.front.test_mask = regs.stencil_front_func_mask; - clear_state.stencil.front.action_stencil_fail = GL_KEEP; - clear_state.stencil.front.action_depth_fail = GL_KEEP; - clear_state.stencil.front.action_depth_pass = GL_KEEP; - clear_state.stencil.front.write_mask = regs.stencil_front_mask; - if (regs.stencil_two_side_enable) { - clear_state.stencil.back.test_func = GL_ALWAYS; - clear_state.stencil.back.test_mask = regs.stencil_back_func_mask; - clear_state.stencil.back.action_stencil_fail = GL_KEEP; - clear_state.stencil.back.action_depth_fail = GL_KEEP; - clear_state.stencil.back.action_depth_pass = GL_KEEP; - clear_state.stencil.back.write_mask = regs.stencil_back_mask; - } else { - clear_state.stencil.back.test_func = GL_ALWAYS; - clear_state.stencil.back.test_mask = 0xFFFFFFFF; - clear_state.stencil.back.write_mask = 0xFFFFFFFF; - clear_state.stencil.back.action_stencil_fail = GL_KEEP; - clear_state.stencil.back.action_depth_fail = GL_KEEP; - clear_state.stencil.back.action_depth_pass = GL_KEEP; - } - } } if (!use_color && !use_depth && !use_stencil) { @@ -516,20 +450,18 @@ void RasterizerOpenGL::Clear() { return; } - ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil); + SyncRasterizeEnable(); - SyncViewport(clear_state); - SyncRasterizeEnable(clear_state); if (regs.clear_flags.scissor) { - SyncScissorTest(clear_state); + SyncScissorTest(); + } else { + state_tracker.NotifyScissor0(); + glDisablei(GL_SCISSOR_TEST, 0); } - if (regs.clear_flags.viewport) { - clear_state.EmulateViewportWithScissor(); - } + UNIMPLEMENTED_IF(regs.clear_flags.viewport); - clear_state.AllDirty(); - clear_state.Apply(); + ConfigureClearFramebuffer(use_color, use_depth, use_stencil); if (use_color) { glClearBufferfv(GL_COLOR, 0, regs.clear_color); @@ -553,21 +485,24 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { query_cache.UpdateCounters(); - SyncRasterizeEnable(state); + SyncViewport(); + SyncRasterizeEnable(); SyncColorMask(); SyncFragmentColorClampState(); SyncMultiSampleState(); SyncDepthTestState(); + SyncDepthClamp(); SyncStencilTestState(); SyncBlendState(); SyncLogicOpState(); SyncCullMode(); SyncPrimitiveRestart(); - SyncScissorTest(state); + SyncScissorTest(); SyncTransformFeedback(); SyncPointState(); SyncPolygonOffset(); SyncAlphaTest(); + SyncFramebufferSRGB(); buffer_cache.Acquire(); @@ -591,13 +526,12 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { buffer_cache.Map(buffer_size); // Prepare vertex array format. - const GLuint vao = SetupVertexFormat(); - vertex_array_pushbuffer.Setup(vao); + SetupVertexFormat(); + vertex_array_pushbuffer.Setup(); // Upload vertex and index data. - SetupVertexBuffer(vao); - SetupVertexInstances(vao); - + SetupVertexBuffer(); + SetupVertexInstances(); GLintptr index_buffer_offset; if (is_indexed) { index_buffer_offset = SetupIndexBuffer(); @@ -631,14 +565,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { bind_ubo_pushbuffer.Bind(); bind_ssbo_pushbuffer.Bind(); - if (invalidate) { - // As all cached buffers are invalidated, we need to recheck their state. - gpu.dirty.ResetVertexArrays(); - } - gpu.dirty.memory_general = false; - - shader_program_manager->ApplyTo(state); - state.Apply(); + program_manager.Update(); if (texture_cache.TextureBarrier()) { glTextureBarrier(); @@ -700,8 +627,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, launch_desc.block_dim_z, launch_desc.shared_alloc, launch_desc.local_pos_alloc); - state.draw.shader_program = kernel->GetHandle(variant); - state.draw.program_pipeline = 0; + glUseProgramStages(program_manager.GetHandle(), GL_COMPUTE_SHADER_BIT, + kernel->GetHandle(variant)); const std::size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers * @@ -719,11 +646,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { bind_ubo_pushbuffer.Bind(); bind_ssbo_pushbuffer.Bind(); - state.ApplyTextures(); - state.ApplyImages(); - state.ApplyShaderProgram(); - state.ApplyProgramPipeline(); - glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); ++num_queued_commands; } @@ -935,20 +857,20 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu const auto view = texture_cache.GetTextureSurface(texture.tic, entry); if (!view) { // Can occur when texture addr is null or its memory is unmapped/invalid - state.samplers[binding] = 0; - state.textures[binding] = 0; + glBindSampler(binding, 0); + glBindTextureUnit(binding, 0); return; } - state.textures[binding] = view->GetTexture(); + glBindTextureUnit(binding, view->GetTexture()); if (view->GetSurfaceParams().IsBuffer()) { return; } - state.samplers[binding] = sampler_cache.GetSampler(texture.tsc); - // Apply swizzle to textures that are not buffers. view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, texture.tic.w_source); + + glBindSampler(binding, sampler_cache.GetSampler(texture.tsc)); } void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { @@ -974,7 +896,7 @@ void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& t const GLShader::ImageEntry& entry) { const auto view = texture_cache.GetImageSurface(tic, entry); if (!view) { - state.images[binding] = 0; + glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); return; } if (!tic.IsBuffer()) { @@ -983,55 +905,85 @@ void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& t if (entry.IsWritten()) { view->MarkAsModified(texture_cache.Tick()); } - state.images[binding] = view->GetTexture(); + glBindImageTexture(binding, view->GetTexture(), 0, GL_TRUE, 0, GL_READ_WRITE, + view->GetFormat()); } -void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { - const auto& regs = system.GPU().Maxwell3D().regs; - const bool geometry_shaders_enabled = - regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry)); - const std::size_t viewport_count = - geometry_shaders_enabled ? Tegra::Engines::Maxwell3D::Regs::NumViewports : 1; - for (std::size_t i = 0; i < viewport_count; i++) { - auto& viewport = current_state.viewports[i]; - const auto& src = regs.viewports[i]; - const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()}; - viewport.x = viewport_rect.left; - viewport.y = viewport_rect.bottom; - viewport.width = viewport_rect.GetWidth(); - viewport.height = viewport_rect.GetHeight(); - viewport.depth_range_far = src.depth_range_far; - viewport.depth_range_near = src.depth_range_near; - } - state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0; - state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0; - - bool flip_y = false; - if (regs.viewport_transform[0].scale_y < 0.0) { - flip_y = !flip_y; - } - if (regs.screen_y_control.y_negate != 0) { - flip_y = !flip_y; - } - state.clip_control.origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT; - state.clip_control.depth_mode = - regs.depth_mode == Tegra::Engines::Maxwell3D::Regs::DepthMode::ZeroToOne - ? GL_ZERO_TO_ONE - : GL_NEGATIVE_ONE_TO_ONE; +void RasterizerOpenGL::SyncViewport() { + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + const auto& regs = gpu.regs; + + const bool dirty_viewport = flags[Dirty::Viewports]; + if (dirty_viewport || flags[Dirty::ClipControl]) { + flags[Dirty::ClipControl] = false; + + bool flip_y = false; + if (regs.viewport_transform[0].scale_y < 0.0) { + flip_y = !flip_y; + } + if (regs.screen_y_control.y_negate != 0) { + flip_y = !flip_y; + } + glClipControl(flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT, + regs.depth_mode == Maxwell::DepthMode::ZeroToOne ? GL_ZERO_TO_ONE + : GL_NEGATIVE_ONE_TO_ONE); + } + + if (dirty_viewport) { + flags[Dirty::Viewports] = false; + + const bool force = flags[Dirty::ViewportTransform]; + flags[Dirty::ViewportTransform] = false; + + for (std::size_t i = 0; i < Maxwell::NumViewports; ++i) { + if (!force && !flags[Dirty::Viewport0 + i]) { + continue; + } + flags[Dirty::Viewport0 + i] = false; + + const Common::Rectangle<f32> rect{regs.viewport_transform[i].GetRect()}; + glViewportIndexedf(static_cast<GLuint>(i), rect.left, rect.bottom, rect.GetWidth(), + rect.GetHeight()); + + const auto& src = regs.viewports[i]; + glDepthRangeIndexed(static_cast<GLuint>(i), static_cast<GLdouble>(src.depth_range_near), + static_cast<GLdouble>(src.depth_range_far)); + } + } } -void RasterizerOpenGL::SyncClipEnabled( - const std::array<bool, Maxwell::Regs::NumClipDistances>& clip_mask) { +void RasterizerOpenGL::SyncDepthClamp() { + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::DepthClampEnabled]) { + return; + } + flags[Dirty::DepthClampEnabled] = false; - const auto& regs = system.GPU().Maxwell3D().regs; - const std::array<bool, Maxwell::Regs::NumClipDistances> reg_state{ - regs.clip_distance_enabled.c0 != 0, regs.clip_distance_enabled.c1 != 0, - regs.clip_distance_enabled.c2 != 0, regs.clip_distance_enabled.c3 != 0, - regs.clip_distance_enabled.c4 != 0, regs.clip_distance_enabled.c5 != 0, - regs.clip_distance_enabled.c6 != 0, regs.clip_distance_enabled.c7 != 0}; + const auto& state = gpu.regs.view_volume_clip_control; + UNIMPLEMENTED_IF_MSG(state.depth_clamp_far != state.depth_clamp_near, + "Unimplemented depth clamp separation!"); + + oglEnable(GL_DEPTH_CLAMP, state.depth_clamp_far || state.depth_clamp_near); +} + +void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) { + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) { + return; + } + flags[Dirty::ClipDistances] = false; + + clip_mask &= gpu.regs.clip_distance_enabled; + if (clip_mask == last_clip_distance_mask) { + return; + } + last_clip_distance_mask = clip_mask; for (std::size_t i = 0; i < Maxwell::Regs::NumClipDistances; ++i) { - state.clip_distance[i] = reg_state[i] && clip_mask[i]; + oglEnable(static_cast<GLenum>(GL_CLIP_DISTANCE0 + i), (clip_mask >> i) & 1); } } @@ -1040,199 +992,269 @@ void RasterizerOpenGL::SyncClipCoef() { } void RasterizerOpenGL::SyncCullMode() { - const auto& regs = system.GPU().Maxwell3D().regs; + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + const auto& regs = gpu.regs; - state.cull.enabled = regs.cull.enabled != 0; - if (state.cull.enabled) { - state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); + if (flags[Dirty::CullTest]) { + flags[Dirty::CullTest] = false; + + if (regs.cull_test_enabled) { + glEnable(GL_CULL_FACE); + glCullFace(MaxwellToGL::CullFace(regs.cull_face)); + } else { + glDisable(GL_CULL_FACE); + } } - state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); + if (flags[Dirty::FrontFace]) { + flags[Dirty::FrontFace] = false; + glFrontFace(MaxwellToGL::FrontFace(regs.front_face)); + } } void RasterizerOpenGL::SyncPrimitiveRestart() { - const auto& regs = system.GPU().Maxwell3D().regs; + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::PrimitiveRestart]) { + return; + } + flags[Dirty::PrimitiveRestart] = false; - state.primitive_restart.enabled = regs.primitive_restart.enabled; - state.primitive_restart.index = regs.primitive_restart.index; + if (gpu.regs.primitive_restart.enabled) { + glEnable(GL_PRIMITIVE_RESTART); + glPrimitiveRestartIndex(gpu.regs.primitive_restart.index); + } else { + glDisable(GL_PRIMITIVE_RESTART); + } } void RasterizerOpenGL::SyncDepthTestState() { - const auto& regs = system.GPU().Maxwell3D().regs; - - state.depth.test_enabled = regs.depth_test_enable != 0; - state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; - if (!state.depth.test_enabled) { - return; + const auto& regs = gpu.regs; + if (flags[Dirty::DepthMask]) { + flags[Dirty::DepthMask] = false; + glDepthMask(regs.depth_write_enabled ? GL_TRUE : GL_FALSE); } - state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); + if (flags[Dirty::DepthTest]) { + flags[Dirty::DepthTest] = false; + if (regs.depth_test_enable) { + glEnable(GL_DEPTH_TEST); + glDepthFunc(MaxwellToGL::ComparisonOp(regs.depth_test_func)); + } else { + glDisable(GL_DEPTH_TEST); + } + } } void RasterizerOpenGL::SyncStencilTestState() { - auto& maxwell3d = system.GPU().Maxwell3D(); - if (!maxwell3d.dirty.stencil_test) { + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::StencilTest]) { return; } - maxwell3d.dirty.stencil_test = false; - - const auto& regs = maxwell3d.regs; - state.stencil.test_enabled = regs.stencil_enable != 0; - state.MarkDirtyStencilState(); + flags[Dirty::StencilTest] = false; + const auto& regs = gpu.regs; if (!regs.stencil_enable) { + glDisable(GL_STENCIL_TEST); return; } - state.stencil.front.test_func = MaxwellToGL::ComparisonOp(regs.stencil_front_func_func); - state.stencil.front.test_ref = regs.stencil_front_func_ref; - state.stencil.front.test_mask = regs.stencil_front_func_mask; - state.stencil.front.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_front_op_fail); - state.stencil.front.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_front_op_zfail); - state.stencil.front.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_front_op_zpass); - state.stencil.front.write_mask = regs.stencil_front_mask; + glEnable(GL_STENCIL_TEST); + glStencilFuncSeparate(GL_FRONT, MaxwellToGL::ComparisonOp(regs.stencil_front_func_func), + regs.stencil_front_func_ref, regs.stencil_front_func_mask); + glStencilOpSeparate(GL_FRONT, MaxwellToGL::StencilOp(regs.stencil_front_op_fail), + MaxwellToGL::StencilOp(regs.stencil_front_op_zfail), + MaxwellToGL::StencilOp(regs.stencil_front_op_zpass)); + glStencilMaskSeparate(GL_FRONT, regs.stencil_front_mask); + if (regs.stencil_two_side_enable) { - state.stencil.back.test_func = MaxwellToGL::ComparisonOp(regs.stencil_back_func_func); - state.stencil.back.test_ref = regs.stencil_back_func_ref; - state.stencil.back.test_mask = regs.stencil_back_func_mask; - state.stencil.back.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_fail); - state.stencil.back.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_zfail); - state.stencil.back.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_back_op_zpass); - state.stencil.back.write_mask = regs.stencil_back_mask; + glStencilFuncSeparate(GL_BACK, MaxwellToGL::ComparisonOp(regs.stencil_back_func_func), + regs.stencil_back_func_ref, regs.stencil_back_func_mask); + glStencilOpSeparate(GL_BACK, MaxwellToGL::StencilOp(regs.stencil_back_op_fail), + MaxwellToGL::StencilOp(regs.stencil_back_op_zfail), + MaxwellToGL::StencilOp(regs.stencil_back_op_zpass)); + glStencilMaskSeparate(GL_BACK, regs.stencil_back_mask); } else { - state.stencil.back.test_func = GL_ALWAYS; - state.stencil.back.test_ref = 0; - state.stencil.back.test_mask = 0xFFFFFFFF; - state.stencil.back.write_mask = 0xFFFFFFFF; - state.stencil.back.action_stencil_fail = GL_KEEP; - state.stencil.back.action_depth_fail = GL_KEEP; - state.stencil.back.action_depth_pass = GL_KEEP; + glStencilFuncSeparate(GL_BACK, GL_ALWAYS, 0, 0xFFFFFFFF); + glStencilOpSeparate(GL_BACK, GL_KEEP, GL_KEEP, GL_KEEP); + glStencilMaskSeparate(GL_BACK, 0xFFFFFFFF); } } -void RasterizerOpenGL::SyncRasterizeEnable(OpenGLState& current_state) { - const auto& regs = system.GPU().Maxwell3D().regs; - current_state.rasterizer_discard = regs.rasterize_enable == 0; +void RasterizerOpenGL::SyncRasterizeEnable() { + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::RasterizeEnable]) { + return; + } + flags[Dirty::RasterizeEnable] = false; + + oglEnable(GL_RASTERIZER_DISCARD, gpu.regs.rasterize_enable == 0); } void RasterizerOpenGL::SyncColorMask() { - auto& maxwell3d = system.GPU().Maxwell3D(); - if (!maxwell3d.dirty.color_mask) { + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::ColorMasks]) { return; } - const auto& regs = maxwell3d.regs; + flags[Dirty::ColorMasks] = false; - const std::size_t count = - regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; - for (std::size_t i = 0; i < count; i++) { - const auto& source = regs.color_mask[regs.color_mask_common ? 0 : i]; - auto& dest = state.color_mask[i]; - dest.red_enabled = (source.R == 0) ? GL_FALSE : GL_TRUE; - dest.green_enabled = (source.G == 0) ? GL_FALSE : GL_TRUE; - dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE; - dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE; + const bool force = flags[Dirty::ColorMaskCommon]; + flags[Dirty::ColorMaskCommon] = false; + + const auto& regs = gpu.regs; + if (regs.color_mask_common) { + if (!force && !flags[Dirty::ColorMask0]) { + return; + } + flags[Dirty::ColorMask0] = false; + + auto& mask = regs.color_mask[0]; + glColorMask(mask.R != 0, mask.B != 0, mask.G != 0, mask.A != 0); + return; } - state.MarkDirtyColorMask(); - maxwell3d.dirty.color_mask = false; + // Path without color_mask_common set + for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) { + if (!force && !flags[Dirty::ColorMask0 + i]) { + continue; + } + flags[Dirty::ColorMask0 + i] = false; + + const auto& mask = regs.color_mask[i]; + glColorMaski(static_cast<GLuint>(i), mask.R != 0, mask.G != 0, mask.B != 0, mask.A != 0); + } } void RasterizerOpenGL::SyncMultiSampleState() { + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::MultisampleControl]) { + return; + } + flags[Dirty::MultisampleControl] = false; + const auto& regs = system.GPU().Maxwell3D().regs; - state.multisample_control.alpha_to_coverage = regs.multisample_control.alpha_to_coverage != 0; - state.multisample_control.alpha_to_one = regs.multisample_control.alpha_to_one != 0; + oglEnable(GL_SAMPLE_ALPHA_TO_COVERAGE, regs.multisample_control.alpha_to_coverage); + oglEnable(GL_SAMPLE_ALPHA_TO_ONE, regs.multisample_control.alpha_to_one); } void RasterizerOpenGL::SyncFragmentColorClampState() { - const auto& regs = system.GPU().Maxwell3D().regs; - state.fragment_color_clamp.enabled = regs.frag_color_clamp != 0; + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::FragmentClampColor]) { + return; + } + flags[Dirty::FragmentClampColor] = false; + + glClampColor(GL_CLAMP_FRAGMENT_COLOR, gpu.regs.frag_color_clamp ? GL_TRUE : GL_FALSE); } void RasterizerOpenGL::SyncBlendState() { - auto& maxwell3d = system.GPU().Maxwell3D(); - if (!maxwell3d.dirty.blend_state) { + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + const auto& regs = gpu.regs; + + if (flags[Dirty::BlendColor]) { + flags[Dirty::BlendColor] = false; + glBlendColor(regs.blend_color.r, regs.blend_color.g, regs.blend_color.b, + regs.blend_color.a); + } + + // TODO(Rodrigo): Revisit blending, there are several registers we are not reading + + if (!flags[Dirty::BlendStates]) { return; } - const auto& regs = maxwell3d.regs; - - state.blend_color.red = regs.blend_color.r; - state.blend_color.green = regs.blend_color.g; - state.blend_color.blue = regs.blend_color.b; - state.blend_color.alpha = regs.blend_color.a; - - state.independant_blend.enabled = regs.independent_blend_enable; - if (!state.independant_blend.enabled) { - auto& blend = state.blend[0]; - const auto& src = regs.blend; - blend.enabled = src.enable[0] != 0; - if (blend.enabled) { - blend.rgb_equation = MaxwellToGL::BlendEquation(src.equation_rgb); - blend.src_rgb_func = MaxwellToGL::BlendFunc(src.factor_source_rgb); - blend.dst_rgb_func = MaxwellToGL::BlendFunc(src.factor_dest_rgb); - blend.a_equation = MaxwellToGL::BlendEquation(src.equation_a); - blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); - blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); - } - for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { - state.blend[i].enabled = false; + flags[Dirty::BlendStates] = false; + + if (!regs.independent_blend_enable) { + if (!regs.blend.enable[0]) { + glDisable(GL_BLEND); + return; } - maxwell3d.dirty.blend_state = false; - state.MarkDirtyBlendState(); + glEnable(GL_BLEND); + glBlendFuncSeparate(MaxwellToGL::BlendFunc(regs.blend.factor_source_rgb), + MaxwellToGL::BlendFunc(regs.blend.factor_dest_rgb), + MaxwellToGL::BlendFunc(regs.blend.factor_source_a), + MaxwellToGL::BlendFunc(regs.blend.factor_dest_a)); + glBlendEquationSeparate(MaxwellToGL::BlendEquation(regs.blend.equation_rgb), + MaxwellToGL::BlendEquation(regs.blend.equation_a)); return; } - for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { - auto& blend = state.blend[i]; - const auto& src = regs.independent_blend[i]; - blend.enabled = regs.blend.enable[i] != 0; - if (!blend.enabled) + const bool force = flags[Dirty::BlendIndependentEnabled]; + flags[Dirty::BlendIndependentEnabled] = false; + + for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) { + if (!force && !flags[Dirty::BlendState0 + i]) { continue; - blend.rgb_equation = MaxwellToGL::BlendEquation(src.equation_rgb); - blend.src_rgb_func = MaxwellToGL::BlendFunc(src.factor_source_rgb); - blend.dst_rgb_func = MaxwellToGL::BlendFunc(src.factor_dest_rgb); - blend.a_equation = MaxwellToGL::BlendEquation(src.equation_a); - blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); - blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); - } + } + flags[Dirty::BlendState0 + i] = false; + + if (!regs.blend.enable[i]) { + glDisablei(GL_BLEND, static_cast<GLuint>(i)); + continue; + } + glEnablei(GL_BLEND, static_cast<GLuint>(i)); - state.MarkDirtyBlendState(); - maxwell3d.dirty.blend_state = false; + const auto& src = regs.independent_blend[i]; + glBlendFuncSeparatei(static_cast<GLuint>(i), MaxwellToGL::BlendFunc(src.factor_source_rgb), + MaxwellToGL::BlendFunc(src.factor_dest_rgb), + MaxwellToGL::BlendFunc(src.factor_source_a), + MaxwellToGL::BlendFunc(src.factor_dest_a)); + glBlendEquationSeparatei(static_cast<GLuint>(i), + MaxwellToGL::BlendEquation(src.equation_rgb), + MaxwellToGL::BlendEquation(src.equation_a)); + } } void RasterizerOpenGL::SyncLogicOpState() { - const auto& regs = system.GPU().Maxwell3D().regs; + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::LogicOp]) { + return; + } + flags[Dirty::LogicOp] = false; - state.logic_op.enabled = regs.logic_op.enable != 0; + const auto& regs = gpu.regs; + if (regs.logic_op.enable) { + glEnable(GL_COLOR_LOGIC_OP); + glLogicOp(MaxwellToGL::LogicOp(regs.logic_op.operation)); + } else { + glDisable(GL_COLOR_LOGIC_OP); + } +} - if (!state.logic_op.enabled) +void RasterizerOpenGL::SyncScissorTest() { + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::Scissors]) { return; + } + flags[Dirty::Scissors] = false; - ASSERT_MSG(regs.blend.enable[0] == 0, - "Blending and logic op can't be enabled at the same time."); - - state.logic_op.operation = MaxwellToGL::LogicOp(regs.logic_op.operation); -} + const auto& regs = gpu.regs; + for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) { + if (!flags[Dirty::Scissor0 + index]) { + continue; + } + flags[Dirty::Scissor0 + index] = false; -void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) { - const auto& regs = system.GPU().Maxwell3D().regs; - const bool geometry_shaders_enabled = - regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry)); - const std::size_t viewport_count = - geometry_shaders_enabled ? Tegra::Engines::Maxwell3D::Regs::NumViewports : 1; - for (std::size_t i = 0; i < viewport_count; i++) { - const auto& src = regs.scissor_test[i]; - auto& dst = current_state.viewports[i].scissor; - dst.enabled = (src.enable != 0); - if (dst.enabled == 0) { - return; + const auto& src = regs.scissor_test[index]; + if (src.enable) { + glEnablei(GL_SCISSOR_TEST, static_cast<GLuint>(index)); + glScissorIndexed(static_cast<GLuint>(index), src.min_x, src.min_y, + src.max_x - src.min_x, src.max_y - src.min_y); + } else { + glDisablei(GL_SCISSOR_TEST, static_cast<GLuint>(index)); } - const u32 width = src.max_x - src.min_x; - const u32 height = src.max_y - src.min_y; - dst.x = src.min_x; - dst.y = src.min_y; - dst.width = width; - dst.height = height; } } @@ -1242,45 +1264,78 @@ void RasterizerOpenGL::SyncTransformFeedback() { } void RasterizerOpenGL::SyncPointState() { - const auto& regs = system.GPU().Maxwell3D().regs; + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::PointSize]) { + return; + } + flags[Dirty::PointSize] = false; + + oglEnable(GL_POINT_SPRITE, gpu.regs.point_sprite_enable); + + if (gpu.regs.vp_point_size.enable) { + // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled. + glEnable(GL_PROGRAM_POINT_SIZE); + return; + } + // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid // in OpenGL). - state.point.program_control = regs.vp_point_size.enable != 0; - state.point.sprite = regs.point_sprite_enable != 0; - state.point.size = std::max(1.0f, regs.point_size); + glPointSize(std::max(1.0f, gpu.regs.point_size)); + glDisable(GL_PROGRAM_POINT_SIZE); } void RasterizerOpenGL::SyncPolygonOffset() { - auto& maxwell3d = system.GPU().Maxwell3D(); - if (!maxwell3d.dirty.polygon_offset) { + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::PolygonOffset]) { return; } - const auto& regs = maxwell3d.regs; - - state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; - state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; - state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; + flags[Dirty::PolygonOffset] = false; - // Hardware divides polygon offset units by two - state.polygon_offset.units = regs.polygon_offset_units / 2.0f; - state.polygon_offset.factor = regs.polygon_offset_factor; - state.polygon_offset.clamp = regs.polygon_offset_clamp; + const auto& regs = gpu.regs; + oglEnable(GL_POLYGON_OFFSET_FILL, regs.polygon_offset_fill_enable); + oglEnable(GL_POLYGON_OFFSET_LINE, regs.polygon_offset_line_enable); + oglEnable(GL_POLYGON_OFFSET_POINT, regs.polygon_offset_point_enable); - state.MarkDirtyPolygonOffset(); - maxwell3d.dirty.polygon_offset = false; + if (regs.polygon_offset_fill_enable || regs.polygon_offset_line_enable || + regs.polygon_offset_point_enable) { + // Hardware divides polygon offset units by two + glPolygonOffsetClamp(regs.polygon_offset_factor, regs.polygon_offset_units / 2.0f, + regs.polygon_offset_clamp); + } } void RasterizerOpenGL::SyncAlphaTest() { - const auto& regs = system.GPU().Maxwell3D().regs; - UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1, - "Alpha Testing is enabled with more than one rendertarget"); + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::AlphaTest]) { + return; + } + flags[Dirty::AlphaTest] = false; + + const auto& regs = gpu.regs; + if (regs.alpha_test_enabled && regs.rt_control.count > 1) { + LOG_WARNING(Render_OpenGL, "Alpha testing with more than one render target is not tested"); + } - state.alpha_test.enabled = regs.alpha_test_enabled; - if (!state.alpha_test.enabled) { + if (regs.alpha_test_enabled) { + glEnable(GL_ALPHA_TEST); + glAlphaFunc(MaxwellToGL::ComparisonOp(regs.alpha_test_func), regs.alpha_test_ref); + } else { + glDisable(GL_ALPHA_TEST); + } +} + +void RasterizerOpenGL::SyncFramebufferSRGB() { + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::FramebufferSRGB]) { return; } - state.alpha_test.func = MaxwellToGL::ComparisonOp(regs.alpha_test_func); - state.alpha_test.ref = regs.alpha_test_ref; + flags[Dirty::FramebufferSRGB] = false; + + oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 68abe9a21..b24c6661b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -30,7 +30,7 @@ #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_manager.h" -#include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/utils.h" #include "video_core/textures/texture.h" @@ -55,7 +55,8 @@ struct DrawParameters; class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { public: explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, - ScreenInfo& info); + ScreenInfo& info, GLShader::ProgramManager& program_manager, + StateTracker& state_tracker); ~RasterizerOpenGL() override; void Draw(bool is_indexed, bool is_instanced) override; @@ -76,6 +77,7 @@ public: u32 pixel_stride) override; void LoadDiskResources(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; + void SetupDirtyFlags() override; /// Returns true when there are commands queued to the OpenGL server. bool AnyCommandQueued() const { @@ -86,8 +88,7 @@ private: /// Configures the color and depth framebuffer states. void ConfigureFramebuffers(); - void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb, - bool using_depth_fb, bool using_stencil_fb); + void ConfigureClearFramebuffer(bool using_color_fb, bool using_depth_fb, bool using_stencil_fb); /// Configures the current constbuffers to use for the draw command. void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader); @@ -130,11 +131,13 @@ private: const GLShader::ImageEntry& entry); /// Syncs the viewport and depth range to match the guest state - void SyncViewport(OpenGLState& current_state); + void SyncViewport(); + + /// Syncs the depth clamp state + void SyncDepthClamp(); /// Syncs the clip enabled status to match the guest state - void SyncClipEnabled( - const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& clip_mask); + void SyncClipEnabled(u32 clip_mask); /// Syncs the clip coefficients to match the guest state void SyncClipCoef(); @@ -164,7 +167,7 @@ private: void SyncMultiSampleState(); /// Syncs the scissor test state to match the guest state - void SyncScissorTest(OpenGLState& current_state); + void SyncScissorTest(); /// Syncs the transform feedback state to match the guest state void SyncTransformFeedback(); @@ -173,7 +176,7 @@ private: void SyncPointState(); /// Syncs the rasterizer enable state to match the guest state - void SyncRasterizeEnable(OpenGLState& current_state); + void SyncRasterizeEnable(); /// Syncs Color Mask void SyncColorMask(); @@ -184,6 +187,9 @@ private: /// Syncs the alpha test state to match the guest state void SyncAlphaTest(); + /// Syncs the framebuffer sRGB state to match the guest state + void SyncFramebufferSRGB(); + /// Check for extension that are not strictly required but are needed for correct emulation void CheckExtensions(); @@ -191,18 +197,17 @@ private: std::size_t CalculateIndexBufferSize() const; - /// Updates and returns a vertex array object representing current vertex format - GLuint SetupVertexFormat(); + /// Updates the current vertex format + void SetupVertexFormat(); - void SetupVertexBuffer(GLuint vao); - void SetupVertexInstances(GLuint vao); + void SetupVertexBuffer(); + void SetupVertexInstances(); GLintptr SetupIndexBuffer(); void SetupShaders(GLenum primitive_mode); const Device device; - OpenGLState state; TextureCacheOpenGL texture_cache; ShaderCacheOpenGL shader_cache; @@ -212,22 +217,20 @@ private: Core::System& system; ScreenInfo& screen_info; - - std::unique_ptr<GLShader::ProgramManager> shader_program_manager; - std::map<std::array<Tegra::Engines::Maxwell3D::Regs::VertexAttribute, - Tegra::Engines::Maxwell3D::Regs::NumVertexAttributes>, - OGLVertexArray> - vertex_array_cache; + GLShader::ProgramManager& program_manager; + StateTracker& state_tracker; static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; OGLBufferCache buffer_cache; - VertexArrayPushBuffer vertex_array_pushbuffer; + VertexArrayPushBuffer vertex_array_pushbuffer{state_tracker}; BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; /// Number of commands queued to the OpenGL driver. Reseted on flush. std::size_t num_queued_commands = 0; + + u32 last_clip_distance_mask = 0; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index c0aee770f..97803d480 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -8,7 +8,6 @@ #include "common/microprofile.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" -#include "video_core/renderer_opengl/gl_state.h" MICROPROFILE_DEFINE(OpenGL_ResourceCreation, "OpenGL", "Resource Creation", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_RGB(128, 128, 192)); @@ -20,7 +19,7 @@ void OGLRenderbuffer::Create() { return; MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - glGenRenderbuffers(1, &handle); + glCreateRenderbuffers(1, &handle); } void OGLRenderbuffer::Release() { @@ -29,7 +28,6 @@ void OGLRenderbuffer::Release() { MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); glDeleteRenderbuffers(1, &handle); - OpenGLState::GetCurState().ResetRenderbuffer(handle).Apply(); handle = 0; } @@ -47,7 +45,6 @@ void OGLTexture::Release() { MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); glDeleteTextures(1, &handle); - OpenGLState::GetCurState().UnbindTexture(handle).Apply(); handle = 0; } @@ -65,7 +62,6 @@ void OGLTextureView::Release() { MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); glDeleteTextures(1, &handle); - OpenGLState::GetCurState().UnbindTexture(handle).Apply(); handle = 0; } @@ -83,7 +79,6 @@ void OGLSampler::Release() { MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); glDeleteSamplers(1, &handle); - OpenGLState::GetCurState().ResetSampler(handle).Apply(); handle = 0; } @@ -127,7 +122,6 @@ void OGLProgram::Release() { MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); glDeleteProgram(handle); - OpenGLState::GetCurState().ResetProgram(handle).Apply(); handle = 0; } @@ -145,7 +139,6 @@ void OGLPipeline::Release() { MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); glDeleteProgramPipelines(1, &handle); - OpenGLState::GetCurState().ResetPipeline(handle).Apply(); handle = 0; } @@ -189,24 +182,6 @@ void OGLSync::Release() { handle = 0; } -void OGLVertexArray::Create() { - if (handle != 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - glCreateVertexArrays(1, &handle); -} - -void OGLVertexArray::Release() { - if (handle == 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); - glDeleteVertexArrays(1, &handle); - OpenGLState::GetCurState().ResetVertexArray(handle).Apply(); - handle = 0; -} - void OGLFramebuffer::Create() { if (handle != 0) return; @@ -221,7 +196,6 @@ void OGLFramebuffer::Release() { MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); glDeleteFramebuffers(1, &handle); - OpenGLState::GetCurState().ResetFramebuffer(handle).Apply(); handle = 0; } diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 995a4e45e..de93f4212 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -241,31 +241,6 @@ public: GLsync handle = 0; }; -class OGLVertexArray : private NonCopyable { -public: - OGLVertexArray() = default; - - OGLVertexArray(OGLVertexArray&& o) noexcept : handle(std::exchange(o.handle, 0)) {} - - ~OGLVertexArray() { - Release(); - } - - OGLVertexArray& operator=(OGLVertexArray&& o) noexcept { - Release(); - handle = std::exchange(o.handle, 0); - return *this; - } - - /// Creates a new internal OpenGL resource and stores the handle - void Create(); - - /// Deletes the internal OpenGL resource - void Release(); - - GLuint handle = 0; -}; - class OGLFramebuffer : private NonCopyable { public: OGLFramebuffer() = default; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 489eb143c..4cb89db8c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -22,6 +22,7 @@ #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/utils.h" #include "video_core/shader/shader_ir.h" @@ -623,7 +624,7 @@ bool ShaderCacheOpenGL::GenerateUnspecializedShaders( } Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { - if (!system.GPU().Maxwell3D().dirty.shaders) { + if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) { return last_shaders[static_cast<std::size_t>(program)]; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 4735000b5..3a41ed30c 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -2547,7 +2547,10 @@ ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) { for (const auto& image : ir.GetImages()) { entries.images.emplace_back(image); } - entries.clip_distances = ir.GetClipDistances(); + const auto clip_distances = ir.GetClipDistances(); + for (std::size_t i = 0; i < std::size(clip_distances); ++i) { + entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; + } entries.shader_length = ir.GetLength(); return entries; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 7876f48d6..0f692c1db 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -74,7 +74,7 @@ struct ShaderEntries { std::vector<GlobalMemoryEntry> global_memory_entries; std::vector<SamplerEntry> samplers; std::vector<ImageEntry> images; - std::array<bool, Maxwell::NumClipDistances> clip_distances{}; + u32 clip_distances{}; std::size_t shader_length{}; }; diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 75d3fac04..15f3cd066 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -10,27 +10,21 @@ namespace OpenGL::GLShader { using Tegra::Engines::Maxwell3D; -ProgramManager::ProgramManager() { - pipeline.Create(); -} - ProgramManager::~ProgramManager() = default; -void ProgramManager::ApplyTo(OpenGLState& state) { - UpdatePipeline(); - state.draw.shader_program = 0; - state.draw.program_pipeline = pipeline.handle; +void ProgramManager::Create() { + pipeline.Create(); } -void ProgramManager::UpdatePipeline() { +void ProgramManager::Update() { // Avoid updating the pipeline when values have no changed if (old_state == current_state) { return; } // Workaround for AMD bug - constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | - GL_FRAGMENT_SHADER_BIT}; + static constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | + GL_FRAGMENT_SHADER_BIT}; glUseProgramStages(pipeline.handle, all_used_stages, 0); glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader); diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 478c165ce..e94cd75aa 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -9,7 +9,6 @@ #include <glad/glad.h> #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/maxwell_to_gl.h" namespace OpenGL::GLShader { @@ -29,25 +28,26 @@ static_assert(sizeof(MaxwellUniformData) < 16384, class ProgramManager { public: - explicit ProgramManager(); ~ProgramManager(); - void ApplyTo(OpenGLState& state); + void Create(); - void UseProgrammableVertexShader(GLuint program) { + void Update(); + + void UseVertexShader(GLuint program) { current_state.vertex_shader = program; } - void UseProgrammableGeometryShader(GLuint program) { + void UseGeometryShader(GLuint program) { current_state.geometry_shader = program; } - void UseProgrammableFragmentShader(GLuint program) { + void UseFragmentShader(GLuint program) { current_state.fragment_shader = program; } - void UseTrivialGeometryShader() { - current_state.geometry_shader = 0; + GLuint GetHandle() const { + return pipeline.handle; } void UseTrivialFragmentShader() { @@ -70,8 +70,6 @@ private: GLuint geometry_shader{}; }; - void UpdatePipeline(); - OGLPipeline pipeline; PipelineState current_state; PipelineState old_state; diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp deleted file mode 100644 index 7d3bc1a1f..000000000 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ /dev/null @@ -1,569 +0,0 @@ -// Copyright 2015 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <iterator> -#include <glad/glad.h> -#include "common/assert.h" -#include "common/logging/log.h" -#include "common/microprofile.h" -#include "video_core/renderer_opengl/gl_state.h" - -MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128)); - -namespace OpenGL { - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -OpenGLState OpenGLState::cur_state; - -namespace { - -template <typename T> -bool UpdateValue(T& current_value, const T new_value) { - const bool changed = current_value != new_value; - current_value = new_value; - return changed; -} - -template <typename T1, typename T2> -bool UpdateTie(T1 current_value, const T2 new_value) { - const bool changed = current_value != new_value; - current_value = new_value; - return changed; -} - -template <typename T> -std::optional<std::pair<GLuint, GLsizei>> UpdateArray(T& current_values, const T& new_values) { - std::optional<std::size_t> first; - std::size_t last; - for (std::size_t i = 0; i < std::size(current_values); ++i) { - if (!UpdateValue(current_values[i], new_values[i])) { - continue; - } - if (!first) { - first = i; - } - last = i; - } - if (!first) { - return std::nullopt; - } - return std::make_pair(static_cast<GLuint>(*first), static_cast<GLsizei>(last - *first + 1)); -} - -void Enable(GLenum cap, bool enable) { - if (enable) { - glEnable(cap); - } else { - glDisable(cap); - } -} - -void Enable(GLenum cap, GLuint index, bool enable) { - if (enable) { - glEnablei(cap, index); - } else { - glDisablei(cap, index); - } -} - -void Enable(GLenum cap, bool& current_value, bool new_value) { - if (UpdateValue(current_value, new_value)) { - Enable(cap, new_value); - } -} - -void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) { - if (UpdateValue(current_value, new_value)) { - Enable(cap, index, new_value); - } -} - -} // Anonymous namespace - -OpenGLState::OpenGLState() = default; - -void OpenGLState::SetDefaultViewports() { - viewports.fill(Viewport{}); - - depth_clamp.far_plane = false; - depth_clamp.near_plane = false; -} - -void OpenGLState::ApplyFramebufferState() { - if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) { - glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); - } - if (UpdateValue(cur_state.draw.draw_framebuffer, draw.draw_framebuffer)) { - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer); - } -} - -void OpenGLState::ApplyVertexArrayState() { - if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) { - glBindVertexArray(draw.vertex_array); - } -} - -void OpenGLState::ApplyShaderProgram() { - if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) { - glUseProgram(draw.shader_program); - } -} - -void OpenGLState::ApplyProgramPipeline() { - if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) { - glBindProgramPipeline(draw.program_pipeline); - } -} - -void OpenGLState::ApplyClipDistances() { - for (std::size_t i = 0; i < clip_distance.size(); ++i) { - Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i], - clip_distance[i]); - } -} - -void OpenGLState::ApplyPointSize() { - Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control); - Enable(GL_POINT_SPRITE, cur_state.point.sprite, point.sprite); - if (UpdateValue(cur_state.point.size, point.size)) { - glPointSize(point.size); - } -} - -void OpenGLState::ApplyFragmentColorClamp() { - if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) { - glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB, - fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE); - } -} - -void OpenGLState::ApplyMultisample() { - Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage, - multisample_control.alpha_to_coverage); - Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one, - multisample_control.alpha_to_one); -} - -void OpenGLState::ApplyDepthClamp() { - if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane && - depth_clamp.near_plane == cur_state.depth_clamp.near_plane) { - return; - } - cur_state.depth_clamp = depth_clamp; - - UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane, - "Unimplemented Depth Clamp Separation!"); - - Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane); -} - -void OpenGLState::ApplySRgb() { - if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled) - return; - cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled; - if (framebuffer_srgb.enabled) { - glEnable(GL_FRAMEBUFFER_SRGB); - } else { - glDisable(GL_FRAMEBUFFER_SRGB); - } -} - -void OpenGLState::ApplyCulling() { - Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled); - - if (UpdateValue(cur_state.cull.mode, cull.mode)) { - glCullFace(cull.mode); - } - - if (UpdateValue(cur_state.cull.front_face, cull.front_face)) { - glFrontFace(cull.front_face); - } -} - -void OpenGLState::ApplyRasterizerDiscard() { - Enable(GL_RASTERIZER_DISCARD, cur_state.rasterizer_discard, rasterizer_discard); -} - -void OpenGLState::ApplyColorMask() { - if (!dirty.color_mask) { - return; - } - dirty.color_mask = false; - - for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) { - const auto& updated = color_mask[i]; - auto& current = cur_state.color_mask[i]; - if (updated.red_enabled != current.red_enabled || - updated.green_enabled != current.green_enabled || - updated.blue_enabled != current.blue_enabled || - updated.alpha_enabled != current.alpha_enabled) { - current = updated; - glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled, - updated.blue_enabled, updated.alpha_enabled); - } - } -} - -void OpenGLState::ApplyDepth() { - Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled); - - if (cur_state.depth.test_func != depth.test_func) { - cur_state.depth.test_func = depth.test_func; - glDepthFunc(depth.test_func); - } - - if (cur_state.depth.write_mask != depth.write_mask) { - cur_state.depth.write_mask = depth.write_mask; - glDepthMask(depth.write_mask); - } -} - -void OpenGLState::ApplyPrimitiveRestart() { - Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled); - - if (cur_state.primitive_restart.index != primitive_restart.index) { - cur_state.primitive_restart.index = primitive_restart.index; - glPrimitiveRestartIndex(primitive_restart.index); - } -} - -void OpenGLState::ApplyStencilTest() { - if (!dirty.stencil_state) { - return; - } - dirty.stencil_state = false; - - Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled); - - const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) { - if (current.test_func != config.test_func || current.test_ref != config.test_ref || - current.test_mask != config.test_mask) { - current.test_func = config.test_func; - current.test_ref = config.test_ref; - current.test_mask = config.test_mask; - glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask); - } - if (current.action_depth_fail != config.action_depth_fail || - current.action_depth_pass != config.action_depth_pass || - current.action_stencil_fail != config.action_stencil_fail) { - current.action_depth_fail = config.action_depth_fail; - current.action_depth_pass = config.action_depth_pass; - current.action_stencil_fail = config.action_stencil_fail; - glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail, - config.action_depth_pass); - } - if (current.write_mask != config.write_mask) { - current.write_mask = config.write_mask; - glStencilMaskSeparate(face, config.write_mask); - } - }; - ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front); - ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back); -} - -void OpenGLState::ApplyViewport() { - for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) { - const auto& updated = viewports[i]; - auto& current = cur_state.viewports[i]; - - if (current.x != updated.x || current.y != updated.y || current.width != updated.width || - current.height != updated.height) { - current.x = updated.x; - current.y = updated.y; - current.width = updated.width; - current.height = updated.height; - glViewportIndexedf(i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y), - static_cast<GLfloat>(updated.width), - static_cast<GLfloat>(updated.height)); - } - if (current.depth_range_near != updated.depth_range_near || - current.depth_range_far != updated.depth_range_far) { - current.depth_range_near = updated.depth_range_near; - current.depth_range_far = updated.depth_range_far; - glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far); - } - - Enable(GL_SCISSOR_TEST, i, current.scissor.enabled, updated.scissor.enabled); - - if (current.scissor.x != updated.scissor.x || current.scissor.y != updated.scissor.y || - current.scissor.width != updated.scissor.width || - current.scissor.height != updated.scissor.height) { - current.scissor.x = updated.scissor.x; - current.scissor.y = updated.scissor.y; - current.scissor.width = updated.scissor.width; - current.scissor.height = updated.scissor.height; - glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width, - updated.scissor.height); - } - } -} - -void OpenGLState::ApplyGlobalBlending() { - const Blend& updated = blend[0]; - Blend& current = cur_state.blend[0]; - - Enable(GL_BLEND, current.enabled, updated.enabled); - - if (current.src_rgb_func != updated.src_rgb_func || - current.dst_rgb_func != updated.dst_rgb_func || current.src_a_func != updated.src_a_func || - current.dst_a_func != updated.dst_a_func) { - current.src_rgb_func = updated.src_rgb_func; - current.dst_rgb_func = updated.dst_rgb_func; - current.src_a_func = updated.src_a_func; - current.dst_a_func = updated.dst_a_func; - glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, - updated.dst_a_func); - } - - if (current.rgb_equation != updated.rgb_equation || current.a_equation != updated.a_equation) { - current.rgb_equation = updated.rgb_equation; - current.a_equation = updated.a_equation; - glBlendEquationSeparate(updated.rgb_equation, updated.a_equation); - } -} - -void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) { - const Blend& updated = blend[target]; - Blend& current = cur_state.blend[target]; - - if (current.enabled != updated.enabled || force) { - current.enabled = updated.enabled; - Enable(GL_BLEND, static_cast<GLuint>(target), updated.enabled); - } - - if (UpdateTie(std::tie(current.src_rgb_func, current.dst_rgb_func, current.src_a_func, - current.dst_a_func), - std::tie(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, - updated.dst_a_func))) { - glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func, - updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func); - } - - if (UpdateTie(std::tie(current.rgb_equation, current.a_equation), - std::tie(updated.rgb_equation, updated.a_equation))) { - glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation, - updated.a_equation); - } -} - -void OpenGLState::ApplyBlending() { - if (!dirty.blend_state) { - return; - } - dirty.blend_state = false; - - if (independant_blend.enabled) { - const bool force = independant_blend.enabled != cur_state.independant_blend.enabled; - for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) { - ApplyTargetBlending(target, force); - } - } else { - ApplyGlobalBlending(); - } - cur_state.independant_blend.enabled = independant_blend.enabled; - - if (UpdateTie( - std::tie(cur_state.blend_color.red, cur_state.blend_color.green, - cur_state.blend_color.blue, cur_state.blend_color.alpha), - std::tie(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha))) { - glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha); - } -} - -void OpenGLState::ApplyLogicOp() { - Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled); - - if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) { - glLogicOp(logic_op.operation); - } -} - -void OpenGLState::ApplyPolygonOffset() { - if (!dirty.polygon_offset) { - return; - } - dirty.polygon_offset = false; - - Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable, - polygon_offset.fill_enable); - Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable, - polygon_offset.line_enable); - Enable(GL_POLYGON_OFFSET_POINT, cur_state.polygon_offset.point_enable, - polygon_offset.point_enable); - - if (UpdateTie(std::tie(cur_state.polygon_offset.factor, cur_state.polygon_offset.units, - cur_state.polygon_offset.clamp), - std::tie(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp))) { - if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) { - glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp); - } else { - UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0, - "Unimplemented Depth polygon offset clamp."); - glPolygonOffset(polygon_offset.factor, polygon_offset.units); - } - } -} - -void OpenGLState::ApplyAlphaTest() { - Enable(GL_ALPHA_TEST, cur_state.alpha_test.enabled, alpha_test.enabled); - if (UpdateTie(std::tie(cur_state.alpha_test.func, cur_state.alpha_test.ref), - std::tie(alpha_test.func, alpha_test.ref))) { - glAlphaFunc(alpha_test.func, alpha_test.ref); - } -} - -void OpenGLState::ApplyClipControl() { - if (UpdateTie(std::tie(cur_state.clip_control.origin, cur_state.clip_control.depth_mode), - std::tie(clip_control.origin, clip_control.depth_mode))) { - glClipControl(clip_control.origin, clip_control.depth_mode); - } -} - -void OpenGLState::ApplyRenderBuffer() { - if (cur_state.renderbuffer != renderbuffer) { - cur_state.renderbuffer = renderbuffer; - glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer); - } -} - -void OpenGLState::ApplyTextures() { - const std::size_t size = std::size(textures); - for (std::size_t i = 0; i < size; ++i) { - if (UpdateValue(cur_state.textures[i], textures[i])) { - // BindTextureUnit doesn't support binding null textures, skip those binds. - // TODO(Rodrigo): Stop using null textures - if (textures[i] != 0) { - glBindTextureUnit(static_cast<GLuint>(i), textures[i]); - } - } - } -} - -void OpenGLState::ApplySamplers() { - const std::size_t size = std::size(samplers); - for (std::size_t i = 0; i < size; ++i) { - if (UpdateValue(cur_state.samplers[i], samplers[i])) { - glBindSampler(static_cast<GLuint>(i), samplers[i]); - } - } -} - -void OpenGLState::ApplyImages() { - if (const auto update = UpdateArray(cur_state.images, images)) { - glBindImageTextures(update->first, update->second, images.data() + update->first); - } -} - -void OpenGLState::Apply() { - MICROPROFILE_SCOPE(OpenGL_State); - ApplyFramebufferState(); - ApplyVertexArrayState(); - ApplyShaderProgram(); - ApplyProgramPipeline(); - ApplyClipDistances(); - ApplyPointSize(); - ApplyFragmentColorClamp(); - ApplyMultisample(); - ApplyRasterizerDiscard(); - ApplyColorMask(); - ApplyDepthClamp(); - ApplyViewport(); - ApplyStencilTest(); - ApplySRgb(); - ApplyCulling(); - ApplyDepth(); - ApplyPrimitiveRestart(); - ApplyBlending(); - ApplyLogicOp(); - ApplyTextures(); - ApplySamplers(); - ApplyImages(); - ApplyPolygonOffset(); - ApplyAlphaTest(); - ApplyClipControl(); - ApplyRenderBuffer(); -} - -void OpenGLState::EmulateViewportWithScissor() { - auto& current = viewports[0]; - if (current.scissor.enabled) { - const GLint left = std::max(current.x, current.scissor.x); - const GLint right = - std::max(current.x + current.width, current.scissor.x + current.scissor.width); - const GLint bottom = std::max(current.y, current.scissor.y); - const GLint top = - std::max(current.y + current.height, current.scissor.y + current.scissor.height); - current.scissor.x = std::max(left, 0); - current.scissor.y = std::max(bottom, 0); - current.scissor.width = std::max(right - left, 0); - current.scissor.height = std::max(top - bottom, 0); - } else { - current.scissor.enabled = true; - current.scissor.x = current.x; - current.scissor.y = current.y; - current.scissor.width = current.width; - current.scissor.height = current.height; - } -} - -OpenGLState& OpenGLState::UnbindTexture(GLuint handle) { - for (auto& texture : textures) { - if (texture == handle) { - texture = 0; - } - } - return *this; -} - -OpenGLState& OpenGLState::ResetSampler(GLuint handle) { - for (auto& sampler : samplers) { - if (sampler == handle) { - sampler = 0; - } - } - return *this; -} - -OpenGLState& OpenGLState::ResetProgram(GLuint handle) { - if (draw.shader_program == handle) { - draw.shader_program = 0; - } - return *this; -} - -OpenGLState& OpenGLState::ResetPipeline(GLuint handle) { - if (draw.program_pipeline == handle) { - draw.program_pipeline = 0; - } - return *this; -} - -OpenGLState& OpenGLState::ResetVertexArray(GLuint handle) { - if (draw.vertex_array == handle) { - draw.vertex_array = 0; - } - return *this; -} - -OpenGLState& OpenGLState::ResetFramebuffer(GLuint handle) { - if (draw.read_framebuffer == handle) { - draw.read_framebuffer = 0; - } - if (draw.draw_framebuffer == handle) { - draw.draw_framebuffer = 0; - } - return *this; -} - -OpenGLState& OpenGLState::ResetRenderbuffer(GLuint handle) { - if (renderbuffer == handle) { - renderbuffer = 0; - } - return *this; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h deleted file mode 100644 index bce662f2c..000000000 --- a/src/video_core/renderer_opengl/gl_state.h +++ /dev/null @@ -1,251 +0,0 @@ -// Copyright 2015 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <type_traits> -#include <glad/glad.h> -#include "video_core/engines/maxwell_3d.h" - -namespace OpenGL { - -class OpenGLState { -public: - struct { - bool enabled = false; // GL_FRAMEBUFFER_SRGB - } framebuffer_srgb; - - struct { - bool alpha_to_coverage = false; // GL_ALPHA_TO_COVERAGE - bool alpha_to_one = false; // GL_ALPHA_TO_ONE - } multisample_control; - - struct { - bool enabled = false; // GL_CLAMP_FRAGMENT_COLOR_ARB - } fragment_color_clamp; - - struct { - bool far_plane = false; - bool near_plane = false; - } depth_clamp; // GL_DEPTH_CLAMP - - struct { - bool enabled = false; // GL_CULL_FACE - GLenum mode = GL_BACK; // GL_CULL_FACE_MODE - GLenum front_face = GL_CCW; // GL_FRONT_FACE - } cull; - - struct { - bool test_enabled = false; // GL_DEPTH_TEST - GLboolean write_mask = GL_TRUE; // GL_DEPTH_WRITEMASK - GLenum test_func = GL_LESS; // GL_DEPTH_FUNC - } depth; - - struct { - bool enabled = false; - GLuint index = 0; - } primitive_restart; // GL_PRIMITIVE_RESTART - - bool rasterizer_discard = false; // GL_RASTERIZER_DISCARD - - struct ColorMask { - GLboolean red_enabled = GL_TRUE; - GLboolean green_enabled = GL_TRUE; - GLboolean blue_enabled = GL_TRUE; - GLboolean alpha_enabled = GL_TRUE; - }; - std::array<ColorMask, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> - color_mask; // GL_COLOR_WRITEMASK - - struct { - bool test_enabled = false; // GL_STENCIL_TEST - struct { - GLenum test_func = GL_ALWAYS; // GL_STENCIL_FUNC - GLint test_ref = 0; // GL_STENCIL_REF - GLuint test_mask = 0xFFFFFFFF; // GL_STENCIL_VALUE_MASK - GLuint write_mask = 0xFFFFFFFF; // GL_STENCIL_WRITEMASK - GLenum action_stencil_fail = GL_KEEP; // GL_STENCIL_FAIL - GLenum action_depth_fail = GL_KEEP; // GL_STENCIL_PASS_DEPTH_FAIL - GLenum action_depth_pass = GL_KEEP; // GL_STENCIL_PASS_DEPTH_PASS - } front, back; - } stencil; - - struct Blend { - bool enabled = false; // GL_BLEND - GLenum rgb_equation = GL_FUNC_ADD; // GL_BLEND_EQUATION_RGB - GLenum a_equation = GL_FUNC_ADD; // GL_BLEND_EQUATION_ALPHA - GLenum src_rgb_func = GL_ONE; // GL_BLEND_SRC_RGB - GLenum dst_rgb_func = GL_ZERO; // GL_BLEND_DST_RGB - GLenum src_a_func = GL_ONE; // GL_BLEND_SRC_ALPHA - GLenum dst_a_func = GL_ZERO; // GL_BLEND_DST_ALPHA - }; - std::array<Blend, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> blend; - - struct { - bool enabled = false; - } independant_blend; - - struct { - GLclampf red = 0.0f; - GLclampf green = 0.0f; - GLclampf blue = 0.0f; - GLclampf alpha = 0.0f; - } blend_color; // GL_BLEND_COLOR - - struct { - bool enabled = false; // GL_LOGIC_OP_MODE - GLenum operation = GL_COPY; - } logic_op; - - static constexpr std::size_t NumSamplers = 32 * 5; - static constexpr std::size_t NumImages = 8 * 5; - std::array<GLuint, NumSamplers> textures = {}; - std::array<GLuint, NumSamplers> samplers = {}; - std::array<GLuint, NumImages> images = {}; - - struct { - GLuint read_framebuffer = 0; // GL_READ_FRAMEBUFFER_BINDING - GLuint draw_framebuffer = 0; // GL_DRAW_FRAMEBUFFER_BINDING - GLuint vertex_array = 0; // GL_VERTEX_ARRAY_BINDING - GLuint shader_program = 0; // GL_CURRENT_PROGRAM - GLuint program_pipeline = 0; // GL_PROGRAM_PIPELINE_BINDING - } draw; - - struct Viewport { - GLint x = 0; - GLint y = 0; - GLint width = 0; - GLint height = 0; - GLfloat depth_range_near = 0.0f; // GL_DEPTH_RANGE - GLfloat depth_range_far = 1.0f; // GL_DEPTH_RANGE - struct { - bool enabled = false; // GL_SCISSOR_TEST - GLint x = 0; - GLint y = 0; - GLsizei width = 0; - GLsizei height = 0; - } scissor; - }; - std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports; - - struct { - bool program_control = false; // GL_PROGRAM_POINT_SIZE - bool sprite = false; // GL_POINT_SPRITE - GLfloat size = 1.0f; // GL_POINT_SIZE - } point; - - struct { - bool point_enable = false; - bool line_enable = false; - bool fill_enable = false; - GLfloat units = 0.0f; - GLfloat factor = 0.0f; - GLfloat clamp = 0.0f; - } polygon_offset; - - struct { - bool enabled = false; // GL_ALPHA_TEST - GLenum func = GL_ALWAYS; // GL_ALPHA_TEST_FUNC - GLfloat ref = 0.0f; // GL_ALPHA_TEST_REF - } alpha_test; - - std::array<bool, 8> clip_distance = {}; // GL_CLIP_DISTANCE - - struct { - GLenum origin = GL_LOWER_LEFT; - GLenum depth_mode = GL_NEGATIVE_ONE_TO_ONE; - } clip_control; - - GLuint renderbuffer{}; // GL_RENDERBUFFER_BINDING - - OpenGLState(); - - /// Get the currently active OpenGL state - static OpenGLState GetCurState() { - return cur_state; - } - - void SetDefaultViewports(); - /// Apply this state as the current OpenGL state - void Apply(); - - void ApplyFramebufferState(); - void ApplyVertexArrayState(); - void ApplyShaderProgram(); - void ApplyProgramPipeline(); - void ApplyClipDistances(); - void ApplyPointSize(); - void ApplyFragmentColorClamp(); - void ApplyMultisample(); - void ApplySRgb(); - void ApplyCulling(); - void ApplyRasterizerDiscard(); - void ApplyColorMask(); - void ApplyDepth(); - void ApplyPrimitiveRestart(); - void ApplyStencilTest(); - void ApplyViewport(); - void ApplyTargetBlending(std::size_t target, bool force); - void ApplyGlobalBlending(); - void ApplyBlending(); - void ApplyLogicOp(); - void ApplyTextures(); - void ApplySamplers(); - void ApplyImages(); - void ApplyDepthClamp(); - void ApplyPolygonOffset(); - void ApplyAlphaTest(); - void ApplyClipControl(); - void ApplyRenderBuffer(); - - /// Resets any references to the given resource - OpenGLState& UnbindTexture(GLuint handle); - OpenGLState& ResetSampler(GLuint handle); - OpenGLState& ResetProgram(GLuint handle); - OpenGLState& ResetPipeline(GLuint handle); - OpenGLState& ResetVertexArray(GLuint handle); - OpenGLState& ResetFramebuffer(GLuint handle); - OpenGLState& ResetRenderbuffer(GLuint handle); - - /// Viewport does not affects glClearBuffer so emulate viewport using scissor test - void EmulateViewportWithScissor(); - - void MarkDirtyBlendState() { - dirty.blend_state = true; - } - - void MarkDirtyStencilState() { - dirty.stencil_state = true; - } - - void MarkDirtyPolygonOffset() { - dirty.polygon_offset = true; - } - - void MarkDirtyColorMask() { - dirty.color_mask = true; - } - - void AllDirty() { - dirty.blend_state = true; - dirty.stencil_state = true; - dirty.polygon_offset = true; - dirty.color_mask = true; - } - -private: - static OpenGLState cur_state; - - struct { - bool blend_state; - bool stencil_state; - bool viewport_state; - bool polygon_offset; - bool color_mask; - } dirty{}; -}; -static_assert(std::is_trivially_copyable_v<OpenGLState>); - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp new file mode 100644 index 000000000..1e43c9ec0 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp @@ -0,0 +1,238 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> +#include <cstddef> + +#include "common/common_types.h" +#include "core/core.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" + +#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name) +#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / sizeof(u32)) + +namespace OpenGL { + +namespace { + +using namespace Dirty; +using namespace VideoCommon::Dirty; +using Tegra::Engines::Maxwell3D; +using Regs = Maxwell3D::Regs; +using Tables = Maxwell3D::DirtyState::Tables; +using Table = Maxwell3D::DirtyState::Table; + +void SetupDirtyColorMasks(Tables& tables) { + tables[0][OFF(color_mask_common)] = ColorMaskCommon; + for (std::size_t rt = 0; rt < Regs::NumRenderTargets; ++rt) { + const std::size_t offset = OFF(color_mask) + rt * NUM(color_mask[0]); + FillBlock(tables[0], offset, NUM(color_mask[0]), ColorMask0 + rt); + } + + FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks); +} + +void SetupDirtyVertexArrays(Tables& tables) { + static constexpr std::size_t num_array = 3; + static constexpr std::size_t instance_base_offset = 3; + for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) { + const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]); + const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]); + + FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers); + FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers); + + const std::size_t instance_array_offset = array_offset + instance_base_offset; + tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i); + tables[1][instance_array_offset] = VertexInstances; + + const std::size_t instance_offset = OFF(instanced_arrays) + i; + tables[0][instance_offset] = static_cast<u8>(VertexInstance0 + i); + tables[1][instance_offset] = VertexInstances; + } +} + +void SetupDirtyVertexFormat(Tables& tables) { + for (std::size_t i = 0; i < Regs::NumVertexAttributes; ++i) { + const std::size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]); + FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexFormat0 + i); + } + + FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexFormats); +} + +void SetupDirtyViewports(Tables& tables) { + for (std::size_t i = 0; i < Regs::NumViewports; ++i) { + const std::size_t transf_offset = OFF(viewport_transform) + i * NUM(viewport_transform[0]); + const std::size_t viewport_offset = OFF(viewports) + i * NUM(viewports[0]); + + FillBlock(tables[0], transf_offset, NUM(viewport_transform[0]), Viewport0 + i); + FillBlock(tables[0], viewport_offset, NUM(viewports[0]), Viewport0 + i); + } + + FillBlock(tables[1], OFF(viewport_transform), NUM(viewport_transform), Viewports); + FillBlock(tables[1], OFF(viewports), NUM(viewports), Viewports); + + tables[0][OFF(viewport_transform_enabled)] = ViewportTransform; + tables[1][OFF(viewport_transform_enabled)] = Viewports; +} + +void SetupDirtyScissors(Tables& tables) { + for (std::size_t i = 0; i < Regs::NumViewports; ++i) { + const std::size_t offset = OFF(scissor_test) + i * NUM(scissor_test[0]); + FillBlock(tables[0], offset, NUM(scissor_test[0]), Scissor0 + i); + } + FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors); +} + +void SetupDirtyShaders(Tables& tables) { + FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram, + Shaders); +} + +void SetupDirtyDepthTest(Tables& tables) { + auto& table = tables[0]; + table[OFF(depth_test_enable)] = DepthTest; + table[OFF(depth_write_enabled)] = DepthMask; + table[OFF(depth_test_func)] = DepthTest; +} + +void SetupDirtyStencilTest(Tables& tables) { + static constexpr std::array offsets = { + OFF(stencil_enable), OFF(stencil_front_func_func), OFF(stencil_front_func_ref), + OFF(stencil_front_func_mask), OFF(stencil_front_op_fail), OFF(stencil_front_op_zfail), + OFF(stencil_front_op_zpass), OFF(stencil_front_mask), OFF(stencil_two_side_enable), + OFF(stencil_back_func_func), OFF(stencil_back_func_ref), OFF(stencil_back_func_mask), + OFF(stencil_back_op_fail), OFF(stencil_back_op_zfail), OFF(stencil_back_op_zpass), + OFF(stencil_back_mask)}; + for (const auto offset : offsets) { + tables[0][offset] = StencilTest; + } +} + +void SetupDirtyAlphaTest(Tables& tables) { + auto& table = tables[0]; + table[OFF(alpha_test_ref)] = AlphaTest; + table[OFF(alpha_test_func)] = AlphaTest; + table[OFF(alpha_test_enabled)] = AlphaTest; +} + +void SetupDirtyBlend(Tables& tables) { + FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendColor); + + tables[0][OFF(independent_blend_enable)] = BlendIndependentEnabled; + + for (std::size_t i = 0; i < Regs::NumRenderTargets; ++i) { + const std::size_t offset = OFF(independent_blend) + i * NUM(independent_blend[0]); + FillBlock(tables[0], offset, NUM(independent_blend[0]), BlendState0 + i); + + tables[0][OFF(blend.enable) + i] = static_cast<u8>(BlendState0 + i); + } + FillBlock(tables[1], OFF(independent_blend), NUM(independent_blend), BlendStates); + FillBlock(tables[1], OFF(blend), NUM(blend), BlendStates); +} + +void SetupDirtyPrimitiveRestart(Tables& tables) { + FillBlock(tables[0], OFF(primitive_restart), NUM(primitive_restart), PrimitiveRestart); +} + +void SetupDirtyPolygonOffset(Tables& tables) { + auto& table = tables[0]; + table[OFF(polygon_offset_fill_enable)] = PolygonOffset; + table[OFF(polygon_offset_line_enable)] = PolygonOffset; + table[OFF(polygon_offset_point_enable)] = PolygonOffset; + table[OFF(polygon_offset_factor)] = PolygonOffset; + table[OFF(polygon_offset_units)] = PolygonOffset; + table[OFF(polygon_offset_clamp)] = PolygonOffset; +} + +void SetupDirtyMultisampleControl(Tables& tables) { + FillBlock(tables[0], OFF(multisample_control), NUM(multisample_control), MultisampleControl); +} + +void SetupDirtyRasterizeEnable(Tables& tables) { + tables[0][OFF(rasterize_enable)] = RasterizeEnable; +} + +void SetupDirtyFramebufferSRGB(Tables& tables) { + tables[0][OFF(framebuffer_srgb)] = FramebufferSRGB; +} + +void SetupDirtyLogicOp(Tables& tables) { + FillBlock(tables[0], OFF(logic_op), NUM(logic_op), LogicOp); +} + +void SetupDirtyFragmentClampColor(Tables& tables) { + tables[0][OFF(frag_color_clamp)] = FragmentClampColor; +} + +void SetupDirtyPointSize(Tables& tables) { + tables[0][OFF(vp_point_size)] = PointSize; + tables[0][OFF(point_size)] = PointSize; + tables[0][OFF(point_sprite_enable)] = PointSize; +} + +void SetupDirtyClipControl(Tables& tables) { + auto& table = tables[0]; + table[OFF(screen_y_control)] = ClipControl; + table[OFF(depth_mode)] = ClipControl; +} + +void SetupDirtyDepthClampEnabled(Tables& tables) { + tables[0][OFF(view_volume_clip_control)] = DepthClampEnabled; +} + +void SetupDirtyMisc(Tables& tables) { + auto& table = tables[0]; + + table[OFF(clip_distance_enabled)] = ClipDistances; + + table[OFF(front_face)] = FrontFace; + + table[OFF(cull_test_enabled)] = CullTest; + table[OFF(cull_face)] = CullTest; +} + +} // Anonymous namespace + +StateTracker::StateTracker(Core::System& system) : system{system} {} + +void StateTracker::Initialize() { + auto& dirty = system.GPU().Maxwell3D().dirty; + auto& tables = dirty.tables; + SetupDirtyRenderTargets(tables); + SetupDirtyColorMasks(tables); + SetupDirtyViewports(tables); + SetupDirtyScissors(tables); + SetupDirtyVertexArrays(tables); + SetupDirtyVertexFormat(tables); + SetupDirtyShaders(tables); + SetupDirtyDepthTest(tables); + SetupDirtyStencilTest(tables); + SetupDirtyAlphaTest(tables); + SetupDirtyBlend(tables); + SetupDirtyPrimitiveRestart(tables); + SetupDirtyPolygonOffset(tables); + SetupDirtyMultisampleControl(tables); + SetupDirtyRasterizeEnable(tables); + SetupDirtyFramebufferSRGB(tables); + SetupDirtyLogicOp(tables); + SetupDirtyFragmentClampColor(tables); + SetupDirtyPointSize(tables); + SetupDirtyClipControl(tables); + SetupDirtyDepthClampEnabled(tables); + SetupDirtyMisc(tables); + + auto& store = dirty.on_write_stores; + SetupCommonOnWriteStores(store); + store[VertexBuffers] = true; + for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) { + store[VertexBuffer0 + i] = true; + } +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h new file mode 100644 index 000000000..e08482911 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_state_tracker.h @@ -0,0 +1,204 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <limits> + +#include <glad/glad.h> + +#include "common/common_types.h" +#include "core/core.h" +#include "video_core/dirty_flags.h" +#include "video_core/engines/maxwell_3d.h" + +namespace Core { +class System; +} + +namespace OpenGL { + +namespace Dirty { + +enum : u8 { + First = VideoCommon::Dirty::LastCommonEntry, + + VertexFormats, + VertexFormat0, + VertexFormat31 = VertexFormat0 + 31, + + VertexBuffers, + VertexBuffer0, + VertexBuffer31 = VertexBuffer0 + 31, + + VertexInstances, + VertexInstance0, + VertexInstance31 = VertexInstance0 + 31, + + ViewportTransform, + Viewports, + Viewport0, + Viewport15 = Viewport0 + 15, + + Scissors, + Scissor0, + Scissor15 = Scissor0 + 15, + + ColorMaskCommon, + ColorMasks, + ColorMask0, + ColorMask7 = ColorMask0 + 7, + + BlendColor, + BlendIndependentEnabled, + BlendStates, + BlendState0, + BlendState7 = BlendState0 + 7, + + Shaders, + ClipDistances, + + ColorMask, + FrontFace, + CullTest, + DepthMask, + DepthTest, + StencilTest, + AlphaTest, + PrimitiveRestart, + PolygonOffset, + MultisampleControl, + RasterizeEnable, + FramebufferSRGB, + LogicOp, + FragmentClampColor, + PointSize, + ClipControl, + DepthClampEnabled, + + Last +}; +static_assert(Last <= std::numeric_limits<u8>::max()); + +} // namespace Dirty + +class StateTracker { +public: + explicit StateTracker(Core::System& system); + + void Initialize(); + + void BindIndexBuffer(GLuint new_index_buffer) { + if (index_buffer == new_index_buffer) { + return; + } + index_buffer = new_index_buffer; + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer); + } + + void NotifyScreenDrawVertexArray() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::VertexFormats] = true; + flags[OpenGL::Dirty::VertexFormat0 + 0] = true; + flags[OpenGL::Dirty::VertexFormat0 + 1] = true; + + flags[OpenGL::Dirty::VertexBuffers] = true; + flags[OpenGL::Dirty::VertexBuffer0] = true; + + flags[OpenGL::Dirty::VertexInstances] = true; + flags[OpenGL::Dirty::VertexInstance0 + 0] = true; + flags[OpenGL::Dirty::VertexInstance0 + 1] = true; + } + + void NotifyViewport0() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::Viewports] = true; + flags[OpenGL::Dirty::Viewport0] = true; + } + + void NotifyScissor0() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::Scissors] = true; + flags[OpenGL::Dirty::Scissor0] = true; + } + + void NotifyColorMask0() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::ColorMasks] = true; + flags[OpenGL::Dirty::ColorMask0] = true; + } + + void NotifyBlend0() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::BlendStates] = true; + flags[OpenGL::Dirty::BlendState0] = true; + } + + void NotifyFramebuffer() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[VideoCommon::Dirty::RenderTargets] = true; + } + + void NotifyFrontFace() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::FrontFace] = true; + } + + void NotifyCullTest() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::CullTest] = true; + } + + void NotifyDepthMask() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::DepthMask] = true; + } + + void NotifyDepthTest() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::DepthTest] = true; + } + + void NotifyStencilTest() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::StencilTest] = true; + } + + void NotifyPolygonOffset() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::PolygonOffset] = true; + } + + void NotifyRasterizeEnable() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::RasterizeEnable] = true; + } + + void NotifyFramebufferSRGB() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::FramebufferSRGB] = true; + } + + void NotifyLogicOp() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::LogicOp] = true; + } + + void NotifyClipControl() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::ClipControl] = true; + } + + void NotifyAlphaTest() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::AlphaTest] = true; + } + +private: + Core::System& system; + + GLuint index_buffer = 0; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 35ba334e4..6ec328c53 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -7,7 +7,6 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/microprofile.h" -#include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index cf934b0d8..2d3838a7a 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -10,7 +10,7 @@ #include "core/core.h" #include "video_core/morton.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/utils.h" #include "video_core/texture_cache/surface_base.h" @@ -397,6 +397,7 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p const bool is_proxy) : VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} { target = GetTextureTarget(params.target); + format = GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format; if (!is_proxy) { texture_view = CreateTextureView(); } @@ -467,25 +468,20 @@ void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_sou } OGLTextureView CachedSurfaceView::CreateTextureView() const { - const auto& owner_params = surface.GetSurfaceParams(); OGLTextureView texture_view; texture_view.Create(); - const GLuint handle{texture_view.handle}; - const FormatTuple& tuple{GetFormatTuple(owner_params.pixel_format)}; - - glTextureView(handle, target, surface.texture.handle, tuple.internal_format, params.base_level, + glTextureView(texture_view.handle, target, surface.texture.handle, format, params.base_level, params.num_levels, params.base_layer, params.num_layers); - - ApplyTextureDefaults(owner_params, handle); + ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle); return texture_view; } TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - const Device& device) - : TextureCacheBase{system, rasterizer} { + const Device& device, StateTracker& state_tracker) + : TextureCacheBase{system, rasterizer}, state_tracker{state_tracker} { src_framebuffer.Create(); dst_framebuffer.Create(); } @@ -519,25 +515,26 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, const Tegra::Engines::Fermi2D::Config& copy_config) { const auto& src_params{src_view->GetSurfaceParams()}; const auto& dst_params{dst_view->GetSurfaceParams()}; + UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D); + UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D); - OpenGLState prev_state{OpenGLState::GetCurState()}; - SCOPE_EXIT({ - prev_state.AllDirty(); - prev_state.Apply(); - }); - - OpenGLState state; - state.draw.read_framebuffer = src_framebuffer.handle; - state.draw.draw_framebuffer = dst_framebuffer.handle; - state.framebuffer_srgb.enabled = dst_params.srgb_conversion; - state.AllDirty(); - state.Apply(); + state_tracker.NotifyScissor0(); + state_tracker.NotifyFramebuffer(); + state_tracker.NotifyRasterizeEnable(); + state_tracker.NotifyFramebufferSRGB(); - u32 buffers{}; + if (dst_params.srgb_conversion) { + glEnable(GL_FRAMEBUFFER_SRGB); + } else { + glDisable(GL_FRAMEBUFFER_SRGB); + } + glDisable(GL_RASTERIZER_DISCARD); + glDisablei(GL_SCISSOR_TEST, 0); - UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D); - UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D); + glBindFramebuffer(GL_READ_FRAMEBUFFER, src_framebuffer.handle); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer.handle); + GLenum buffers = 0; if (src_params.type == SurfaceType::ColorTexture) { src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 8e13ab38b..6658c6ffd 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -27,6 +27,7 @@ using VideoCommon::ViewParams; class CachedSurfaceView; class CachedSurface; class TextureCacheOpenGL; +class StateTracker; using Surface = std::shared_ptr<CachedSurface>; using View = std::shared_ptr<CachedSurfaceView>; @@ -96,6 +97,10 @@ public: return texture_view.handle; } + GLenum GetFormat() const { + return format; + } + const SurfaceParams& GetSurfaceParams() const { return surface.GetSurfaceParams(); } @@ -113,6 +118,7 @@ private: CachedSurface& surface; GLenum target{}; + GLenum format{}; OGLTextureView texture_view; u32 swizzle{}; @@ -122,7 +128,7 @@ private: class TextureCacheOpenGL final : public TextureCacheBase { public: explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - const Device& device); + const Device& device, StateTracker& state_tracker); ~TextureCacheOpenGL(); protected: @@ -139,6 +145,8 @@ protected: private: GLuint FetchPBO(std::size_t buffer_size); + StateTracker& state_tracker; + OGLFramebuffer src_framebuffer; OGLFramebuffer dst_framebuffer; std::unordered_map<u32, OGLBuffer> copy_pbo_cache; diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index d3dea3659..494e38e7a 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -425,24 +425,24 @@ inline GLenum StencilOp(Maxwell::StencilOp stencil) { return GL_KEEP; } -inline GLenum FrontFace(Maxwell::Cull::FrontFace front_face) { +inline GLenum FrontFace(Maxwell::FrontFace front_face) { switch (front_face) { - case Maxwell::Cull::FrontFace::ClockWise: + case Maxwell::FrontFace::ClockWise: return GL_CW; - case Maxwell::Cull::FrontFace::CounterClockWise: + case Maxwell::FrontFace::CounterClockWise: return GL_CCW; } LOG_ERROR(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face)); return GL_CCW; } -inline GLenum CullFace(Maxwell::Cull::CullFace cull_face) { +inline GLenum CullFace(Maxwell::CullFace cull_face) { switch (cull_face) { - case Maxwell::Cull::CullFace::Front: + case Maxwell::CullFace::Front: return GL_FRONT; - case Maxwell::Cull::CullFace::Back: + case Maxwell::CullFace::Back: return GL_BACK; - case Maxwell::Cull::CullFace::FrontAndBack: + case Maxwell::CullFace::FrontAndBack: return GL_FRONT_AND_BACK; } LOG_ERROR(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face)); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index a4340b502..a51410660 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -20,6 +20,7 @@ #include "core/telemetry_session.h" #include "video_core/morton.h" #include "video_core/renderer_opengl/gl_rasterizer.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/renderer_opengl.h" namespace OpenGL { @@ -86,28 +87,22 @@ public: } void ReloadRenderFrame(Frame* frame, u32 width, u32 height) { - OpenGLState prev_state = OpenGLState::GetCurState(); - OpenGLState state = OpenGLState::GetCurState(); - // Recreate the color texture attachment frame->color.Release(); frame->color.Create(); - state.renderbuffer = frame->color.handle; - state.Apply(); - glRenderbufferStorage(GL_RENDERBUFFER, frame->is_srgb ? GL_SRGB8 : GL_RGB8, width, height); + const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8; + glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height); // Recreate the FBO for the render target frame->render.Release(); frame->render.Create(); - state.draw.read_framebuffer = frame->render.handle; - state.draw.draw_framebuffer = frame->render.handle; - state.Apply(); + glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle); glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, frame->color.handle); if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!"); } - prev_state.Apply(); + frame->width = width; frame->height = height; frame->color_reloaded = true; @@ -164,9 +159,13 @@ public: namespace { -constexpr char vertex_shader[] = R"( +constexpr char VERTEX_SHADER[] = R"( #version 430 core +out gl_PerVertex { + vec4 gl_Position; +}; + layout (location = 0) in vec2 vert_position; layout (location = 1) in vec2 vert_tex_coord; layout (location = 0) out vec2 frag_tex_coord; @@ -187,7 +186,7 @@ void main() { } )"; -constexpr char fragment_shader[] = R"( +constexpr char FRAGMENT_SHADER[] = R"( #version 430 core layout (location = 0) in vec2 frag_tex_coord; @@ -196,7 +195,7 @@ layout (location = 0) out vec4 color; layout (binding = 0) uniform sampler2D color_texture; void main() { - color = texture(color_texture, frag_tex_coord); + color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f); } )"; @@ -205,8 +204,8 @@ constexpr GLint TexCoordLocation = 1; constexpr GLint ModelViewMatrixLocation = 0; struct ScreenRectVertex { - constexpr ScreenRectVertex(GLfloat x, GLfloat y, GLfloat u, GLfloat v) - : position{{x, y}}, tex_coord{{u, v}} {} + constexpr ScreenRectVertex(u32 x, u32 y, GLfloat u, GLfloat v) + : position{{static_cast<GLfloat>(x), static_cast<GLfloat>(y)}}, tex_coord{{u, v}} {} std::array<GLfloat, 2> position; std::array<GLfloat, 2> tex_coord; @@ -311,11 +310,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { return; } - // Maintain the rasterizer's state as a priority - OpenGLState prev_state = OpenGLState::GetCurState(); - state.AllDirty(); - state.Apply(); - PrepareRendertarget(framebuffer); RenderScreenshot(); @@ -358,8 +352,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { frame->is_srgb = screen_info.display_srgb; frame_mailbox->ReloadRenderFrame(frame, layout.width, layout.height); } - state.draw.draw_framebuffer = frame->render.handle; - state.Apply(); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, frame->render.handle); DrawScreen(layout); // Create a fence for the frontend to wait on and swap this frame to OffTex frame->render_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); @@ -368,10 +361,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { m_current_frame++; rasterizer->TickFrame(); } - - // Restore the rasterizer state - prev_state.AllDirty(); - prev_state.Apply(); } void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) { @@ -442,31 +431,25 @@ void RendererOpenGL::InitOpenGLObjects() { glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f); - // Link shaders and get variable locations - shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); - state.draw.shader_program = shader.handle; - state.AllDirty(); - state.Apply(); + // Create shader programs + OGLShader vertex_shader; + vertex_shader.Create(VERTEX_SHADER, GL_VERTEX_SHADER); + + OGLShader fragment_shader; + fragment_shader.Create(FRAGMENT_SHADER, GL_FRAGMENT_SHADER); + + vertex_program.Create(true, false, vertex_shader.handle); + fragment_program.Create(true, false, fragment_shader.handle); + + // Create program pipeline + program_manager.Create(); + glBindProgramPipeline(program_manager.GetHandle()); // Generate VBO handle for drawing vertex_buffer.Create(); - // Generate VAO - vertex_array.Create(); - state.draw.vertex_array = vertex_array.handle; - // Attach vertex data to VAO glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW); - glVertexArrayAttribFormat(vertex_array.handle, PositionLocation, 2, GL_FLOAT, GL_FALSE, - offsetof(ScreenRectVertex, position)); - glVertexArrayAttribFormat(vertex_array.handle, TexCoordLocation, 2, GL_FLOAT, GL_FALSE, - offsetof(ScreenRectVertex, tex_coord)); - glVertexArrayAttribBinding(vertex_array.handle, PositionLocation, 0); - glVertexArrayAttribBinding(vertex_array.handle, TexCoordLocation, 0); - glEnableVertexArrayAttrib(vertex_array.handle, PositionLocation); - glEnableVertexArrayAttrib(vertex_array.handle, TexCoordLocation); - glVertexArrayVertexBuffer(vertex_array.handle, 0, vertex_buffer.handle, 0, - sizeof(ScreenRectVertex)); // Allocate textures for the screen screen_info.texture.resource.Create(GL_TEXTURE_2D); @@ -499,7 +482,8 @@ void RendererOpenGL::CreateRasterizer() { if (rasterizer) { return; } - rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); + rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info, + program_manager, state_tracker); } void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, @@ -538,8 +522,19 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height); } -void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, - float h) { +void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { + if (renderer_settings.set_background_color) { + // Update background color before drawing + glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, + 0.0f); + } + + // Set projection matrix + const std::array ortho_matrix = + MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); + glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE, + std::data(ortho_matrix)); + const auto& texcoords = screen_info.display_texcoords; auto left = texcoords.left; auto right = texcoords.right; @@ -571,46 +566,77 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, static_cast<f32>(screen_info.texture.height); } + const auto& screen = layout.screen; const std::array vertices = { - ScreenRectVertex(x, y, texcoords.top * scale_u, left * scale_v), - ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left * scale_v), - ScreenRectVertex(x, y + h, texcoords.top * scale_u, right * scale_v), - ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v), + ScreenRectVertex(screen.left, screen.top, texcoords.top * scale_u, left * scale_v), + ScreenRectVertex(screen.right, screen.top, texcoords.bottom * scale_u, left * scale_v), + ScreenRectVertex(screen.left, screen.bottom, texcoords.top * scale_u, right * scale_v), + ScreenRectVertex(screen.right, screen.bottom, texcoords.bottom * scale_u, right * scale_v), }; - - state.textures[0] = screen_info.display_texture; - state.framebuffer_srgb.enabled = screen_info.display_srgb; - state.AllDirty(); - state.Apply(); glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices)); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - // Restore default state - state.framebuffer_srgb.enabled = false; - state.textures[0] = 0; - state.AllDirty(); - state.Apply(); -} -void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { - if (renderer_settings.set_background_color) { - // Update background color before drawing - glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, - 0.0f); + // TODO: Signal state tracker about these changes + state_tracker.NotifyScreenDrawVertexArray(); + state_tracker.NotifyViewport0(); + state_tracker.NotifyScissor0(); + state_tracker.NotifyColorMask0(); + state_tracker.NotifyBlend0(); + state_tracker.NotifyFramebuffer(); + state_tracker.NotifyFrontFace(); + state_tracker.NotifyCullTest(); + state_tracker.NotifyDepthTest(); + state_tracker.NotifyStencilTest(); + state_tracker.NotifyPolygonOffset(); + state_tracker.NotifyRasterizeEnable(); + state_tracker.NotifyFramebufferSRGB(); + state_tracker.NotifyLogicOp(); + state_tracker.NotifyClipControl(); + state_tracker.NotifyAlphaTest(); + + program_manager.UseVertexShader(vertex_program.handle); + program_manager.UseGeometryShader(0); + program_manager.UseFragmentShader(fragment_program.handle); + program_manager.Update(); + + glEnable(GL_CULL_FACE); + if (screen_info.display_srgb) { + glEnable(GL_FRAMEBUFFER_SRGB); + } else { + glDisable(GL_FRAMEBUFFER_SRGB); } + glDisable(GL_COLOR_LOGIC_OP); + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glDisable(GL_POLYGON_OFFSET_FILL); + glDisable(GL_RASTERIZER_DISCARD); + glDisable(GL_ALPHA_TEST); + glDisablei(GL_BLEND, 0); + glDisablei(GL_SCISSOR_TEST, 0); + glCullFace(GL_BACK); + glFrontFace(GL_CW); + glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); + glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width), + static_cast<GLfloat>(layout.height)); + glDepthRangeIndexed(0, 0.0, 0.0); + + glEnableVertexAttribArray(PositionLocation); + glEnableVertexAttribArray(TexCoordLocation); + glVertexAttribDivisor(PositionLocation, 0); + glVertexAttribDivisor(TexCoordLocation, 0); + glVertexAttribFormat(PositionLocation, 2, GL_FLOAT, GL_FALSE, + offsetof(ScreenRectVertex, position)); + glVertexAttribFormat(TexCoordLocation, 2, GL_FLOAT, GL_FALSE, + offsetof(ScreenRectVertex, tex_coord)); + glVertexAttribBinding(PositionLocation, 0); + glVertexAttribBinding(TexCoordLocation, 0); + glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); + + glBindTextureUnit(0, screen_info.display_texture); + glBindSampler(0, 0); - const auto& screen = layout.screen; - - glViewport(0, 0, layout.width, layout.height); glClear(GL_COLOR_BUFFER_BIT); - - // Set projection matrix - const std::array ortho_matrix = - MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); - glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data()); - - DrawScreenTriangles(screen_info, static_cast<float>(screen.left), - static_cast<float>(screen.top), static_cast<float>(screen.GetWidth()), - static_cast<float>(screen.GetHeight())); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); } void RendererOpenGL::TryPresent(int timeout_ms) { @@ -653,13 +679,14 @@ void RendererOpenGL::RenderScreenshot() { return; } + GLint old_read_fb; + GLint old_draw_fb; + glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb); + glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb); + // Draw the current frame to the screenshot framebuffer screenshot_framebuffer.Create(); - GLuint old_read_fb = state.draw.read_framebuffer; - GLuint old_draw_fb = state.draw.draw_framebuffer; - state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle; - state.AllDirty(); - state.Apply(); + glBindFramebuffer(GL_FRAMEBUFFER, screenshot_framebuffer.handle); Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; @@ -676,12 +703,11 @@ void RendererOpenGL::RenderScreenshot() { renderer_settings.screenshot_bits); screenshot_framebuffer.Release(); - state.draw.read_framebuffer = old_read_fb; - state.draw.draw_framebuffer = old_draw_fb; - state.AllDirty(); - state.Apply(); glDeleteRenderbuffers(1, &renderbuffer); + glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); + renderer_settings.screenshot_complete_callback(); renderer_settings.screenshot_requested = false; } diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index d45e69cbc..33073ce5b 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -10,7 +10,8 @@ #include "common/math_util.h" #include "video_core/renderer_base.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" namespace Core { class System; @@ -76,8 +77,6 @@ private: /// Draws the emulated screens to the emulator window. void DrawScreen(const Layout::FramebufferLayout& layout); - void DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, float h); - void RenderScreenshot(); /// Loads framebuffer from emulated memory into the active OpenGL texture. @@ -93,17 +92,20 @@ private: Core::Frontend::EmuWindow& emu_window; Core::System& system; - OpenGLState state; + StateTracker state_tracker{system}; // OpenGL object IDs - OGLVertexArray vertex_array; OGLBuffer vertex_buffer; - OGLProgram shader; + OGLProgram vertex_program; + OGLProgram fragment_program; OGLFramebuffer screenshot_framebuffer; /// Display information for Switch screen ScreenInfo screen_info; + /// Global dummy shader pipeline + GLShader::ProgramManager program_manager; + /// OpenGL framebuffer data std::vector<u8> gl_framebuffer_data; diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index ac99e6385..b751086fa 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp @@ -9,6 +9,7 @@ #include <glad/glad.h> #include "common/common_types.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/utils.h" namespace OpenGL { @@ -20,12 +21,12 @@ struct VertexArrayPushBuffer::Entry { GLsizei stride{}; }; -VertexArrayPushBuffer::VertexArrayPushBuffer() = default; +VertexArrayPushBuffer::VertexArrayPushBuffer(StateTracker& state_tracker) + : state_tracker{state_tracker} {} VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; -void VertexArrayPushBuffer::Setup(GLuint vao_) { - vao = vao_; +void VertexArrayPushBuffer::Setup() { index_buffer = nullptr; vertex_buffers.clear(); } @@ -41,13 +42,11 @@ void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint* void VertexArrayPushBuffer::Bind() { if (index_buffer) { - glVertexArrayElementBuffer(vao, *index_buffer); + state_tracker.BindIndexBuffer(*index_buffer); } - // TODO(Rodrigo): Find a way to ARB_multi_bind this for (const auto& entry : vertex_buffers) { - glVertexArrayVertexBuffer(vao, entry.binding_index, *entry.buffer, entry.offset, - entry.stride); + glBindVertexBuffer(entry.binding_index, *entry.buffer, entry.offset, entry.stride); } } diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index 3ad7c02d4..47ee3177b 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h @@ -11,12 +11,14 @@ namespace OpenGL { +class StateTracker; + class VertexArrayPushBuffer final { public: - explicit VertexArrayPushBuffer(); + explicit VertexArrayPushBuffer(StateTracker& state_tracker); ~VertexArrayPushBuffer(); - void Setup(GLuint vao_); + void Setup(); void SetIndexBuffer(const GLuint* buffer); @@ -28,7 +30,8 @@ public: private: struct Entry; - GLuint vao{}; + StateTracker& state_tracker; + const GLuint* index_buffer{}; std::vector<Entry> vertex_buffers; }; diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 4e3ff231e..2bb376555 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -112,19 +112,18 @@ constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs) const auto& clip = regs.view_volume_clip_control; const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1; - Maxwell::Cull::FrontFace front_face = regs.cull.front_face; + Maxwell::FrontFace front_face = regs.front_face; if (regs.screen_y_control.triangle_rast_flip != 0 && regs.viewport_transform[0].scale_y > 0.0f) { - if (front_face == Maxwell::Cull::FrontFace::CounterClockWise) - front_face = Maxwell::Cull::FrontFace::ClockWise; - else if (front_face == Maxwell::Cull::FrontFace::ClockWise) - front_face = Maxwell::Cull::FrontFace::CounterClockWise; + if (front_face == Maxwell::FrontFace::CounterClockWise) + front_face = Maxwell::FrontFace::ClockWise; + else if (front_face == Maxwell::FrontFace::ClockWise) + front_face = Maxwell::FrontFace::CounterClockWise; } const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; - return FixedPipelineState::Rasterizer(regs.cull.enabled, depth_bias_enabled, - depth_clamp_enabled, gl_ndc, regs.cull.cull_face, - front_face); + return FixedPipelineState::Rasterizer(regs.cull_test_enabled, depth_bias_enabled, + depth_clamp_enabled, gl_ndc, regs.cull_face, front_face); } } // Anonymous namespace diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 87056ef37..4c8ba7f90 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -171,8 +171,8 @@ struct FixedPipelineState { struct Rasterizer { constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable, - bool ndc_minus_one_to_one, Maxwell::Cull::CullFace cull_face, - Maxwell::Cull::FrontFace front_face) + bool ndc_minus_one_to_one, Maxwell::CullFace cull_face, + Maxwell::FrontFace front_face) : cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable}, depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one}, cull_face{cull_face}, front_face{front_face} {} @@ -182,8 +182,8 @@ struct FixedPipelineState { bool depth_bias_enable; bool depth_clamp_enable; bool ndc_minus_one_to_one; - Maxwell::Cull::CullFace cull_face; - Maxwell::Cull::FrontFace front_face; + Maxwell::CullFace cull_face; + Maxwell::FrontFace front_face; std::size_t Hash() const noexcept; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 948d67d89..df3ac707c 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -586,24 +586,24 @@ vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor) { return {}; } -vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face) { +vk::FrontFace FrontFace(Maxwell::FrontFace front_face) { switch (front_face) { - case Maxwell::Cull::FrontFace::ClockWise: + case Maxwell::FrontFace::ClockWise: return vk::FrontFace::eClockwise; - case Maxwell::Cull::FrontFace::CounterClockWise: + case Maxwell::FrontFace::CounterClockWise: return vk::FrontFace::eCounterClockwise; } UNIMPLEMENTED_MSG("Unimplemented front face={}", static_cast<u32>(front_face)); return {}; } -vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face) { +vk::CullModeFlags CullFace(Maxwell::CullFace cull_face) { switch (cull_face) { - case Maxwell::Cull::CullFace::Front: + case Maxwell::CullFace::Front: return vk::CullModeFlagBits::eFront; - case Maxwell::Cull::CullFace::Back: + case Maxwell::CullFace::Back: return vk::CullModeFlagBits::eBack; - case Maxwell::Cull::CullFace::FrontAndBack: + case Maxwell::CullFace::FrontAndBack: return vk::CullModeFlagBits::eFrontAndBack; } UNIMPLEMENTED_MSG("Unimplemented cull face={}", static_cast<u32>(cull_face)); diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 7e9678b7b..24f6ab544 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h @@ -54,9 +54,9 @@ vk::BlendOp BlendEquation(Maxwell::Blend::Equation equation); vk::BlendFactor BlendFactor(Maxwell::Blend::Factor factor); -vk::FrontFace FrontFace(Maxwell::Cull::FrontFace front_face); +vk::FrontFace FrontFace(Maxwell::FrontFace front_face); -vk::CullModeFlags CullFace(Maxwell::Cull::CullFace cull_face); +vk::CullModeFlags CullFace(Maxwell::CullFace cull_face); vk::ComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index ddc62bc97..42bb01418 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -27,6 +27,7 @@ #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_swapchain.h" namespace Vulkan { @@ -177,10 +178,13 @@ bool RendererVulkan::Init() { swapchain = std::make_unique<VKSwapchain>(surface, *device); swapchain->Create(framebuffer.width, framebuffer.height, false); - scheduler = std::make_unique<VKScheduler>(*device, *resource_manager); + state_tracker = std::make_unique<StateTracker>(system); + + scheduler = std::make_unique<VKScheduler>(*device, *resource_manager, *state_tracker); rasterizer = std::make_unique<RasterizerVulkan>(system, render_window, screen_info, *device, - *resource_manager, *memory_manager, *scheduler); + *resource_manager, *memory_manager, + *state_tracker, *scheduler); blit_screen = std::make_unique<VKBlitScreen>(system, render_window, *rasterizer, *device, *resource_manager, *memory_manager, *swapchain, diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index f513397f0..3da08d2e4 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -4,8 +4,10 @@ #pragma once +#include <memory> #include <optional> #include <vector> + #include "video_core/renderer_base.h" #include "video_core/renderer_vulkan/declarations.h" @@ -15,6 +17,7 @@ class System; namespace Vulkan { +class StateTracker; class VKBlitScreen; class VKDevice; class VKFence; @@ -61,6 +64,7 @@ private: std::unique_ptr<VKSwapchain> swapchain; std::unique_ptr<VKMemoryManager> memory_manager; std::unique_ptr<VKResourceManager> resource_manager; + std::unique_ptr<StateTracker> state_tracker; std::unique_ptr<VKScheduler> scheduler; std::unique_ptr<VKBlitScreen> blit_screen; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 696e4b291..144e1e007 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -188,11 +188,6 @@ VKPipelineCache::~VKPipelineCache() = default; std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { const auto& gpu = system.GPU().Maxwell3D(); - auto& dirty = system.GPU().Maxwell3D().dirty.shaders; - if (!dirty) { - return last_shaders; - } - dirty = false; std::array<Shader, Maxwell::MaxShaderProgram> shaders; for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 3fe28c204..b402fb268 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -36,6 +36,7 @@ #include "video_core/renderer_vulkan/vk_sampler_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" +#include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" @@ -280,10 +281,11 @@ void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf, RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer, VKScreenInfo& screen_info, const VKDevice& device, VKResourceManager& resource_manager, - VKMemoryManager& memory_manager, VKScheduler& scheduler) + VKMemoryManager& memory_manager, StateTracker& state_tracker, + VKScheduler& scheduler) : RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer}, screen_info{screen_info}, device{device}, resource_manager{resource_manager}, - memory_manager{memory_manager}, scheduler{scheduler}, + memory_manager{memory_manager}, state_tracker{state_tracker}, scheduler{scheduler}, staging_pool(device, memory_manager, scheduler), descriptor_pool(device), update_descriptor_queue(device, scheduler), quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), @@ -548,6 +550,10 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, return true; } +void RasterizerVulkan::SetupDirtyFlags() { + state_tracker.Initialize(); +} + void RasterizerVulkan::FlushWork() { static constexpr u32 DRAWS_TO_DISPATCH = 4096; @@ -571,9 +577,9 @@ void RasterizerVulkan::FlushWork() { RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { MICROPROFILE_SCOPE(Vulkan_RenderTargets); - auto& dirty = system.GPU().Maxwell3D().dirty; - const bool update_rendertargets = dirty.render_settings; - dirty.render_settings = false; + auto& dirty = system.GPU().Maxwell3D().dirty.flags; + const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets]; + dirty[VideoCommon::Dirty::RenderTargets] = false; texture_cache.GuardRenderTargets(true); @@ -723,13 +729,13 @@ void RasterizerVulkan::SetupImageTransitions( } void RasterizerVulkan::UpdateDynamicStates() { - auto& gpu = system.GPU().Maxwell3D(); - UpdateViewportsState(gpu); - UpdateScissorsState(gpu); - UpdateDepthBias(gpu); - UpdateBlendConstants(gpu); - UpdateDepthBounds(gpu); - UpdateStencilFaces(gpu); + auto& regs = system.GPU().Maxwell3D().regs; + UpdateViewportsState(regs); + UpdateScissorsState(regs); + UpdateDepthBias(regs); + UpdateBlendConstants(regs); + UpdateDepthBounds(regs); + UpdateStencilFaces(regs); } void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, @@ -979,12 +985,10 @@ void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const Ima image_views.push_back(ImageView{std::move(view), image_layout}); } -void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu) { - if (!gpu.dirty.viewport_transform && scheduler.TouchViewports()) { +void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchViewports()) { return; } - gpu.dirty.viewport_transform = false; - const auto& regs = gpu.regs; const std::array viewports{ GetViewportState(device, regs, 0), GetViewportState(device, regs, 1), GetViewportState(device, regs, 2), GetViewportState(device, regs, 3), @@ -999,12 +1003,10 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu) { }); } -void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu) { - if (!gpu.dirty.scissor_test && scheduler.TouchScissors()) { +void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchScissors()) { return; } - gpu.dirty.scissor_test = false; - const auto& regs = gpu.regs; const std::array scissors = { GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2), GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5), @@ -1017,46 +1019,39 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu) { }); } -void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu) { - if (!gpu.dirty.polygon_offset && scheduler.TouchDepthBias()) { +void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchDepthBias()) { return; } - gpu.dirty.polygon_offset = false; - const auto& regs = gpu.regs; scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp, factor = regs.polygon_offset_factor](auto cmdbuf, auto& dld) { cmdbuf.setDepthBias(constant, clamp, factor / 2.0f, dld); }); } -void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu) { - if (!gpu.dirty.blend_state && scheduler.TouchBlendConstants()) { +void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchBlendConstants()) { return; } - gpu.dirty.blend_state = false; - const std::array blend_color = {gpu.regs.blend_color.r, gpu.regs.blend_color.g, - gpu.regs.blend_color.b, gpu.regs.blend_color.a}; + const std::array blend_color = {regs.blend_color.r, regs.blend_color.g, regs.blend_color.b, + regs.blend_color.a}; scheduler.Record([blend_color](auto cmdbuf, auto& dld) { cmdbuf.setBlendConstants(blend_color.data(), dld); }); } -void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu) { - if (!gpu.dirty.depth_bounds_values && scheduler.TouchDepthBounds()) { +void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchDepthBounds()) { return; } - gpu.dirty.depth_bounds_values = false; - const auto& regs = gpu.regs; scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]]( auto cmdbuf, auto& dld) { cmdbuf.setDepthBounds(min, max, dld); }); } -void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu) { - if (!gpu.dirty.stencil_test && scheduler.TouchStencilValues()) { +void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) { + if (!state_tracker.TouchStencilProperties()) { return; } - gpu.dirty.stencil_test = false; - const auto& regs = gpu.regs; if (regs.stencil_two_side_enable) { // Separate values per face scheduler.Record( diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 4dc8af6e8..96ea05f0a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -96,6 +96,7 @@ struct hash<Vulkan::FramebufferCacheKey> { namespace Vulkan { +class StateTracker; class BufferBindings; struct ImageView { @@ -108,7 +109,7 @@ public: explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, VKScreenInfo& screen_info, const VKDevice& device, VKResourceManager& resource_manager, VKMemoryManager& memory_manager, - VKScheduler& scheduler); + StateTracker& state_tracker, VKScheduler& scheduler); ~RasterizerVulkan() override; void Draw(bool is_indexed, bool is_instanced) override; @@ -127,6 +128,7 @@ public: const Tegra::Engines::Fermi2D::Config& copy_config) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; + void SetupDirtyFlags() override; /// Maximum supported size that a constbuffer can have in bytes. static constexpr std::size_t MaxConstbufferSize = 0x10000; @@ -215,12 +217,12 @@ private: void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); - void UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu); - void UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu); - void UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu); - void UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu); - void UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu); - void UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu); + void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs); + void UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs); std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; @@ -241,6 +243,7 @@ private: const VKDevice& device; VKResourceManager& resource_manager; VKMemoryManager& memory_manager; + StateTracker& state_tracker; VKScheduler& scheduler; VKStagingBufferPool staging_pool; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 92bd6c344..b61d4fe63 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -2,6 +2,12 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <memory> +#include <mutex> +#include <optional> +#include <thread> +#include <utility> + #include "common/assert.h" #include "common/microprofile.h" #include "video_core/renderer_vulkan/declarations.h" @@ -9,6 +15,7 @@ #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_state_tracker.h" namespace Vulkan { @@ -29,9 +36,10 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf, last = nullptr; } -VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager) - : device{device}, resource_manager{resource_manager}, next_fence{ - &resource_manager.CommitFence()} { +VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager, + StateTracker& state_tracker) + : device{device}, resource_manager{resource_manager}, state_tracker{state_tracker}, + next_fence{&resource_manager.CommitFence()} { AcquireNewChunk(); AllocateNewContext(); worker_thread = std::thread(&VKScheduler::WorkerThread, this); @@ -157,12 +165,7 @@ void VKScheduler::AllocateNewContext() { void VKScheduler::InvalidateState() { state.graphics_pipeline = nullptr; - state.viewports = false; - state.scissors = false; - state.depth_bias = false; - state.blend_constants = false; - state.depth_bounds = false; - state.stencil_values = false; + state_tracker.InvalidateCommandBufferState(); } void VKScheduler::EndPendingOperations() { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 62fd7858b..c7cc291c3 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -17,6 +17,7 @@ namespace Vulkan { +class StateTracker; class VKDevice; class VKFence; class VKQueryCache; @@ -43,7 +44,8 @@ private: /// OpenGL-like operations on Vulkan command buffers. class VKScheduler { public: - explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager); + explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager, + StateTracker& state_tracker); ~VKScheduler(); /// Sends the current execution context to the GPU. @@ -74,36 +76,6 @@ public: query_cache = &query_cache_; } - /// Returns true when viewports have been set in the current command buffer. - bool TouchViewports() { - return std::exchange(state.viewports, true); - } - - /// Returns true when scissors have been set in the current command buffer. - bool TouchScissors() { - return std::exchange(state.scissors, true); - } - - /// Returns true when depth bias have been set in the current command buffer. - bool TouchDepthBias() { - return std::exchange(state.depth_bias, true); - } - - /// Returns true when blend constants have been set in the current command buffer. - bool TouchBlendConstants() { - return std::exchange(state.blend_constants, true); - } - - /// Returns true when depth bounds have been set in the current command buffer. - bool TouchDepthBounds() { - return std::exchange(state.depth_bounds, true); - } - - /// Returns true when stencil values have been set in the current command buffer. - bool TouchStencilValues() { - return std::exchange(state.stencil_values, true); - } - /// Send work to a separate thread. template <typename T> void Record(T&& command) { @@ -217,6 +189,8 @@ private: const VKDevice& device; VKResourceManager& resource_manager; + StateTracker& state_tracker; + VKQueryCache* query_cache = nullptr; vk::CommandBuffer current_cmdbuf; @@ -226,12 +200,6 @@ private: struct State { std::optional<vk::RenderPassBeginInfo> renderpass; vk::Pipeline graphics_pipeline; - bool viewports = false; - bool scissors = false; - bool depth_bias = false; - bool blend_constants = false; - bool depth_bounds = false; - bool stencil_values = false; } state; std::unique_ptr<CommandChunk> chunk; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp new file mode 100644 index 000000000..d74e68b63 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -0,0 +1,101 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <cstddef> +#include <iterator> + +#include "common/common_types.h" +#include "core/core.h" +#include "video_core/dirty_flags.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" +#include "video_core/renderer_vulkan/vk_state_tracker.h" + +#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name) +#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / sizeof(u32)) + +namespace Vulkan { + +namespace { + +using namespace Dirty; +using namespace VideoCommon::Dirty; +using Tegra::Engines::Maxwell3D; +using Regs = Maxwell3D::Regs; +using Tables = Maxwell3D::DirtyState::Tables; +using Table = Maxwell3D::DirtyState::Table; +using Flags = Maxwell3D::DirtyState::Flags; + +Flags MakeInvalidationFlags() { + Flags flags{}; + flags[Viewports] = true; + flags[Scissors] = true; + flags[DepthBias] = true; + flags[BlendConstants] = true; + flags[DepthBounds] = true; + flags[StencilProperties] = true; + return flags; +} + +void SetupDirtyViewports(Tables& tables) { + FillBlock(tables[0], OFF(viewport_transform), NUM(viewport_transform), Viewports); + FillBlock(tables[0], OFF(viewports), NUM(viewports), Viewports); + tables[0][OFF(viewport_transform_enabled)] = Viewports; +} + +void SetupDirtyScissors(Tables& tables) { + FillBlock(tables[0], OFF(scissor_test), NUM(scissor_test), Scissors); +} + +void SetupDirtyDepthBias(Tables& tables) { + auto& table = tables[0]; + table[OFF(polygon_offset_units)] = DepthBias; + table[OFF(polygon_offset_clamp)] = DepthBias; + table[OFF(polygon_offset_factor)] = DepthBias; +} + +void SetupDirtyBlendConstants(Tables& tables) { + FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendConstants); +} + +void SetupDirtyDepthBounds(Tables& tables) { + FillBlock(tables[0], OFF(depth_bounds), NUM(depth_bounds), DepthBounds); +} + +void SetupDirtyStencilProperties(Tables& tables) { + auto& table = tables[0]; + table[OFF(stencil_two_side_enable)] = StencilProperties; + table[OFF(stencil_front_func_ref)] = StencilProperties; + table[OFF(stencil_front_mask)] = StencilProperties; + table[OFF(stencil_front_func_mask)] = StencilProperties; + table[OFF(stencil_back_func_ref)] = StencilProperties; + table[OFF(stencil_back_mask)] = StencilProperties; + table[OFF(stencil_back_func_mask)] = StencilProperties; +} + +} // Anonymous namespace + +StateTracker::StateTracker(Core::System& system) + : system{system}, invalidation_flags{MakeInvalidationFlags()} {} + +void StateTracker::Initialize() { + auto& dirty = system.GPU().Maxwell3D().dirty; + auto& tables = dirty.tables; + SetupDirtyRenderTargets(tables); + SetupDirtyViewports(tables); + SetupDirtyScissors(tables); + SetupDirtyDepthBias(tables); + SetupDirtyBlendConstants(tables); + SetupDirtyDepthBounds(tables); + SetupDirtyStencilProperties(tables); + + SetupCommonOnWriteStores(dirty.on_write_stores); +} + +void StateTracker::InvalidateCommandBufferState() { + system.GPU().Maxwell3D().dirty.flags |= invalidation_flags; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h new file mode 100644 index 000000000..03bc415b2 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -0,0 +1,79 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstddef> +#include <limits> + +#include "common/common_types.h" +#include "core/core.h" +#include "video_core/dirty_flags.h" +#include "video_core/engines/maxwell_3d.h" + +namespace Vulkan { + +namespace Dirty { + +enum : u8 { + First = VideoCommon::Dirty::LastCommonEntry, + + Viewports, + Scissors, + DepthBias, + BlendConstants, + DepthBounds, + StencilProperties, + + Last +}; +static_assert(Last <= std::numeric_limits<u8>::max()); + +} // namespace Dirty + +class StateTracker { +public: + explicit StateTracker(Core::System& system); + + void Initialize(); + + void InvalidateCommandBufferState(); + + bool TouchViewports() { + return Exchange(Dirty::Viewports, false); + } + + bool TouchScissors() { + return Exchange(Dirty::Scissors, false); + } + + bool TouchDepthBias() { + return Exchange(Dirty::DepthBias, false); + } + + bool TouchBlendConstants() { + return Exchange(Dirty::BlendConstants, false); + } + + bool TouchDepthBounds() { + return Exchange(Dirty::DepthBounds, false); + } + + bool TouchStencilProperties() { + return Exchange(Dirty::StencilProperties, false); + } + +private: + bool Exchange(std::size_t id, bool new_value) const noexcept { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + const bool is_dirty = flags[id]; + flags[id] = new_value; + return is_dirty; + } + + Core::System& system; + Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 51b0d38a6..73d92a5ae 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -22,6 +22,7 @@ #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/surface.h" diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c70e4aec2..51373b687 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -22,6 +22,7 @@ #include "core/core.h" #include "core/memory.h" #include "core/settings.h" +#include "video_core/dirty_flags.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/gpu.h" @@ -142,11 +143,10 @@ public: TView GetDepthBufferSurface(bool preserve_contents) { std::lock_guard lock{mutex}; auto& maxwell3d = system.GPU().Maxwell3D(); - - if (!maxwell3d.dirty.depth_buffer) { + if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) { return depth_buffer.view; } - maxwell3d.dirty.depth_buffer = false; + maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false; const auto& regs{maxwell3d.regs}; const auto gpu_addr{regs.zeta.Address()}; @@ -175,10 +175,10 @@ public: std::lock_guard lock{mutex}; ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); auto& maxwell3d = system.GPU().Maxwell3D(); - if (!maxwell3d.dirty.render_target[index]) { + if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) { return render_targets[index].view; } - maxwell3d.dirty.render_target[index] = false; + maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = false; const auto& regs{maxwell3d.regs}; if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || @@ -320,14 +320,14 @@ protected: virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; void ManageRenderTargetUnregister(TSurface& surface) { - auto& maxwell3d = system.GPU().Maxwell3D(); + auto& dirty = system.GPU().Maxwell3D().dirty; const u32 index = surface->GetRenderTarget(); if (index == DEPTH_RT) { - maxwell3d.dirty.depth_buffer = true; + dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true; } else { - maxwell3d.dirty.render_target[index] = true; + dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index] = true; } - maxwell3d.dirty.render_settings = true; + dirty.flags[VideoCommon::Dirty::RenderTargets] = true; } void Register(TSurface surface) { diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 07098c70d..7edc4abe1 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -337,20 +337,22 @@ struct TSCEntry { }; float GetMaxAnisotropy() const { - switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) { - case Anisotropy::Default: - return static_cast<float>(1U << max_anisotropy); - case Anisotropy::Filter2x: - return static_cast<float>(2U << max_anisotropy); - case Anisotropy::Filter4x: - return static_cast<float>(4U << max_anisotropy); - case Anisotropy::Filter8x: - return static_cast<float>(8U << max_anisotropy); - case Anisotropy::Filter16x: - return static_cast<float>(16U << max_anisotropy); - default: - return static_cast<float>(1U << max_anisotropy); - } + const u32 min_value = [] { + switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) { + default: + case Anisotropy::Default: + return 1U; + case Anisotropy::Filter2x: + return 2U; + case Anisotropy::Filter4x: + return 4U; + case Anisotropy::Filter8x: + return 8U; + case Anisotropy::Filter16x: + return 16U; + } + }(); + return static_cast<float>(std::max(1U << max_anisotropy, min_value)); } float GetMinLod() const { |