diff options
Diffstat (limited to 'src')
18 files changed, 2905 insertions, 101 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 2f946e7be..e56253c4c 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -11,13 +11,24 @@ add_library(video_core STATIC gpu.h memory_manager.cpp memory_manager.h + rasterizer_interface.h renderer_base.cpp renderer_base.h + renderer_opengl/gl_rasterizer.cpp + renderer_opengl/gl_rasterizer.h + renderer_opengl/gl_rasterizer_cache.cpp + renderer_opengl/gl_rasterizer_cache.h renderer_opengl/gl_resource_manager.h + renderer_opengl/gl_shader_decompiler.cpp + renderer_opengl/gl_shader_decompiler.h + renderer_opengl/gl_shader_gen.cpp + renderer_opengl/gl_shader_gen.h renderer_opengl/gl_shader_util.cpp renderer_opengl/gl_shader_util.h renderer_opengl/gl_state.cpp renderer_opengl/gl_state.h + renderer_opengl/gl_stream_buffer.cpp + renderer_opengl/gl_stream_buffer.h renderer_opengl/renderer_opengl.cpp renderer_opengl/renderer_opengl.h utils.h diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h new file mode 100644 index 000000000..6c7bd0826 --- /dev/null +++ b/src/video_core/rasterizer_interface.h @@ -0,0 +1,61 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +struct ScreenInfo; + +namespace VideoCore { + +class RasterizerInterface { +public: + virtual ~RasterizerInterface() {} + + /// Draw the current batch of triangles + virtual void DrawTriangles() = 0; + + /// Notify rasterizer that the specified Maxwell register has been changed + virtual void NotifyMaxwellRegisterChanged(u32 id) = 0; + + /// Notify rasterizer that all caches should be flushed to 3DS memory + virtual void FlushAll() = 0; + + /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory + virtual void FlushRegion(PAddr addr, u32 size) = 0; + + /// Notify rasterizer that any caches of the specified region should be invalidated + virtual void InvalidateRegion(PAddr addr, u32 size) = 0; + + /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory + /// and invalidated + virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0; + + /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 0 + virtual bool AccelerateDisplayTransfer(const void* config) { + return false; + } + + /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 1 + virtual bool AccelerateTextureCopy(const void* config) { + return false; + } + + /// Attempt to use a faster method to fill a region + virtual bool AccelerateFill(const void* config) { + return false; + } + + /// Attempt to use a faster method to display the framebuffer to screen + virtual bool AccelerateDisplay(const void* config, PAddr framebuffer_addr, u32 pixel_stride, + ScreenInfo& screen_info) { + return false; + } + + virtual bool AccelerateDrawBatch(bool is_indexed) { + return false; + } +}; +} // namespace VideoCore diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp new file mode 100644 index 000000000..24cfff229 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -0,0 +1,269 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <memory> +#include <string> +#include <tuple> +#include <utility> +#include <glad/glad.h> +#include "common/alignment.h" +#include "common/assert.h" +#include "common/logging/log.h" +#include "common/math_util.h" +#include "common/microprofile.h" +#include "common/scope_exit.h" +#include "common/vector_math.h" +#include "core/settings.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" +#include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/renderer_opengl/renderer_opengl.h" + +using PixelFormat = SurfaceParams::PixelFormat; +using SurfaceType = SurfaceParams::SurfaceType; + +MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(128, 128, 192)); +MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(128, 128, 192)); +MICROPROFILE_DEFINE(OpenGL_FS, "OpenGL", "Fragment Shader Setup", MP_RGB(128, 128, 192)); +MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); +MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255)); +MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); + +enum class UniformBindings : GLuint { Common, VS, FS }; + +static void SetShaderUniformBlockBinding(GLuint shader, const char* name, UniformBindings binding, + size_t expected_size) { + GLuint ub_index = glGetUniformBlockIndex(shader, name); + if (ub_index != GL_INVALID_INDEX) { + GLint ub_size = 0; + glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); + ASSERT_MSG(ub_size == expected_size, + "Uniform block size did not match! Got %d, expected %zu", + static_cast<int>(ub_size), expected_size); + glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding)); + } +} + +static void SetShaderUniformBlockBindings(GLuint shader) { + SetShaderUniformBlockBinding(shader, "shader_data", UniformBindings::Common, + sizeof(RasterizerOpenGL::UniformData)); + SetShaderUniformBlockBinding(shader, "vs_config", UniformBindings::VS, + sizeof(RasterizerOpenGL::VSUniformData)); + SetShaderUniformBlockBinding(shader, "fs_config", UniformBindings::FS, + sizeof(RasterizerOpenGL::FSUniformData)); +} + +RasterizerOpenGL::RasterizerOpenGL() { + has_ARB_buffer_storage = false; + has_ARB_direct_state_access = false; + has_ARB_separate_shader_objects = false; + has_ARB_vertex_attrib_binding = false; + + GLint ext_num; + glGetIntegerv(GL_NUM_EXTENSIONS, &ext_num); + for (GLint i = 0; i < ext_num; i++) { + std::string extension{reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i))}; + + if (extension == "GL_ARB_buffer_storage") { + has_ARB_buffer_storage = true; + } else if (extension == "GL_ARB_direct_state_access") { + has_ARB_direct_state_access = true; + } else if (extension == "GL_ARB_separate_shader_objects") { + has_ARB_separate_shader_objects = true; + } else if (extension == "GL_ARB_vertex_attrib_binding") { + has_ARB_vertex_attrib_binding = true; + } + } + + // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0 + state.clip_distance[0] = true; + + // Generate VBO, VAO and UBO + vertex_buffer = OGLStreamBuffer::MakeBuffer(GLAD_GL_ARB_buffer_storage, GL_ARRAY_BUFFER); + vertex_buffer->Create(VERTEX_BUFFER_SIZE, VERTEX_BUFFER_SIZE / 2); + sw_vao.Create(); + uniform_buffer.Create(); + + state.draw.vertex_array = sw_vao.handle; + state.draw.vertex_buffer = vertex_buffer->GetHandle(); + state.draw.uniform_buffer = uniform_buffer.handle; + state.Apply(); + + glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), nullptr, GL_STATIC_DRAW); + glBindBufferBase(GL_UNIFORM_BUFFER, 0, uniform_buffer.handle); + + uniform_block_data.dirty = true; + + // Create render framebuffer + framebuffer.Create(); + + if (has_ARB_separate_shader_objects) { + hw_vao.Create(); + hw_vao_enabled_attributes.fill(false); + + stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER); + stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2); + state.draw.vertex_buffer = stream_buffer->GetHandle(); + + pipeline.Create(); + vs_input_index_min = 0; + vs_input_index_max = 0; + state.draw.program_pipeline = pipeline.handle; + state.draw.shader_program = 0; + state.draw.vertex_array = hw_vao.handle; + state.Apply(); + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle()); + + vs_uniform_buffer.Create(); + glBindBuffer(GL_UNIFORM_BUFFER, vs_uniform_buffer.handle); + glBufferData(GL_UNIFORM_BUFFER, sizeof(VSUniformData), nullptr, GL_STREAM_COPY); + glBindBufferBase(GL_UNIFORM_BUFFER, 1, vs_uniform_buffer.handle); + } else { + UNIMPLEMENTED(); + } + + accelerate_draw = AccelDraw::Disabled; + + glEnable(GL_BLEND); + + // Sync fixed function OpenGL state + SyncClipEnabled(); + SyncClipCoef(); + SyncCullMode(); + SyncBlendEnabled(); + SyncBlendFuncs(); + SyncBlendColor(); +} + +RasterizerOpenGL::~RasterizerOpenGL() { + if (stream_buffer != nullptr) { + state.draw.vertex_buffer = stream_buffer->GetHandle(); + state.Apply(); + stream_buffer->Release(); + } +} + +static constexpr std::array<GLenum, 4> vs_attrib_types{ + GL_BYTE, // VertexAttributeFormat::BYTE + GL_UNSIGNED_BYTE, // VertexAttributeFormat::UBYTE + GL_SHORT, // VertexAttributeFormat::SHORT + GL_FLOAT // VertexAttributeFormat::FLOAT +}; + +void RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) { + UNIMPLEMENTED(); +} + +void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { + MICROPROFILE_SCOPE(OpenGL_VAO); + UNIMPLEMENTED(); +} + +void RasterizerOpenGL::SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset) { + MICROPROFILE_SCOPE(OpenGL_VS); + UNIMPLEMENTED(); +} + +void RasterizerOpenGL::SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset) { + MICROPROFILE_SCOPE(OpenGL_FS); + UNIMPLEMENTED(); +} + +bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { + if (!has_ARB_separate_shader_objects) { + UNIMPLEMENTED(); + return false; + } + + accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; + DrawTriangles(); + + return true; +} + +void RasterizerOpenGL::DrawTriangles() { + MICROPROFILE_SCOPE(OpenGL_Drawing); + UNIMPLEMENTED(); +} + +void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 id) {} + +void RasterizerOpenGL::FlushAll() { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.FlushAll(); +} + +void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.FlushRegion(addr, size); +} + +void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.InvalidateRegion(addr, size, nullptr); +} + +void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) { + MICROPROFILE_SCOPE(OpenGL_CacheManagement); + res_cache.FlushRegion(addr, size); + res_cache.InvalidateRegion(addr, size, nullptr); +} + +bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { + MICROPROFILE_SCOPE(OpenGL_Blits); + UNIMPLEMENTED(); + return true; +} + +bool RasterizerOpenGL::AccelerateTextureCopy(const void* config) { + UNIMPLEMENTED(); + return true; +} + +bool RasterizerOpenGL::AccelerateFill(const void* config) { + UNIMPLEMENTED(); + return true; +} + +bool RasterizerOpenGL::AccelerateDisplay(const void* config, PAddr framebuffer_addr, + u32 pixel_stride, ScreenInfo& screen_info) { + UNIMPLEMENTED(); + return true; +} + +void RasterizerOpenGL::SetShader() { + UNIMPLEMENTED(); +} + +void RasterizerOpenGL::SyncClipEnabled() { + UNIMPLEMENTED(); +} + +void RasterizerOpenGL::SyncClipCoef() { + UNIMPLEMENTED(); +} + +void RasterizerOpenGL::SyncCullMode() { + UNIMPLEMENTED(); +} + +void RasterizerOpenGL::SyncDepthScale() { + UNIMPLEMENTED(); +} + +void RasterizerOpenGL::SyncDepthOffset() { + UNIMPLEMENTED(); +} + +void RasterizerOpenGL::SyncBlendEnabled() { + UNIMPLEMENTED(); +} + +void RasterizerOpenGL::SyncBlendFuncs() { + UNIMPLEMENTED(); +} + +void RasterizerOpenGL::SyncBlendColor() { + UNIMPLEMENTED(); +} diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h new file mode 100644 index 000000000..893fc530f --- /dev/null +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -0,0 +1,162 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <cstddef> +#include <cstring> +#include <memory> +#include <unordered_map> +#include <vector> +#include <glad/glad.h> +#include "common/bit_field.h" +#include "common/common_types.h" +#include "common/hash.h" +#include "common/vector_math.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/renderer_opengl/gl_rasterizer_cache.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/gl_stream_buffer.h" + +struct ScreenInfo; + +class RasterizerOpenGL : public VideoCore::RasterizerInterface { +public: + RasterizerOpenGL(); + ~RasterizerOpenGL() override; + + void DrawTriangles() override; + void NotifyMaxwellRegisterChanged(u32 id) override; + void FlushAll() override; + void FlushRegion(PAddr addr, u32 size) override; + void InvalidateRegion(PAddr addr, u32 size) override; + void FlushAndInvalidateRegion(PAddr addr, u32 size) override; + bool AccelerateDisplayTransfer(const void* config) override; + bool AccelerateTextureCopy(const void* config) override; + bool AccelerateFill(const void* config) override; + bool AccelerateDisplay(const void* config, PAddr framebuffer_addr, u32 pixel_stride, + ScreenInfo& screen_info) override; + bool AccelerateDrawBatch(bool is_indexed) override; + + struct VertexShader { + OGLShader shader; + }; + + struct FragmentShader { + OGLShader shader; + }; + + /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned + // NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at + // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. + // Not following that rule will cause problems on some AMD drivers. + struct UniformData {}; + + // static_assert( + // sizeof(UniformData) == 0x460, + // "The size of the UniformData structure has changed, update the structure in the shader"); + static_assert(sizeof(UniformData) < 16384, + "UniformData structure must be less than 16kb as per the OpenGL spec"); + + struct VSUniformData {}; + // static_assert( + // sizeof(VSUniformData) == 1856, + // "The size of the VSUniformData structure has changed, update the structure in the + // shader"); + static_assert(sizeof(VSUniformData) < 16384, + "VSUniformData structure must be less than 16kb as per the OpenGL spec"); + + struct FSUniformData {}; + // static_assert( + // sizeof(FSUniformData) == 1856, + // "The size of the FSUniformData structure has changed, update the structure in the + // shader"); + static_assert(sizeof(FSUniformData) < 16384, + "FSUniformData structure must be less than 16kb as per the OpenGL spec"); + +private: + struct SamplerInfo {}; + + /// Syncs the clip enabled status to match the guest state + void SyncClipEnabled(); + + /// Syncs the clip coefficients to match the guest state + void SyncClipCoef(); + + /// Sets the OpenGL shader in accordance with the current guest state + void SetShader(); + + /// Syncs the cull mode to match the guest state + void SyncCullMode(); + + /// Syncs the depth scale to match the guest state + void SyncDepthScale(); + + /// Syncs the depth offset to match the guest state + void SyncDepthOffset(); + + /// Syncs the blend enabled status to match the guest state + void SyncBlendEnabled(); + + /// Syncs the blend functions to match the guest state + void SyncBlendFuncs(); + + /// Syncs the blend color to match the guest state + void SyncBlendColor(); + + bool has_ARB_buffer_storage; + bool has_ARB_direct_state_access; + bool has_ARB_separate_shader_objects; + bool has_ARB_vertex_attrib_binding; + + OpenGLState state; + + RasterizerCacheOpenGL res_cache; + + struct { + UniformData data; + bool dirty; + } uniform_block_data = {}; + + OGLPipeline pipeline; + OGLVertexArray sw_vao; + OGLVertexArray hw_vao; + std::array<bool, 16> hw_vao_enabled_attributes; + + std::array<SamplerInfo, 3> texture_samplers; + static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024; + std::unique_ptr<OGLStreamBuffer> vertex_buffer; + OGLBuffer uniform_buffer; + OGLFramebuffer framebuffer; + + static constexpr size_t STREAM_BUFFER_SIZE = 4 * 1024 * 1024; + std::unique_ptr<OGLStreamBuffer> stream_buffer; + + GLint vs_input_index_min; + GLint vs_input_index_max; + GLsizeiptr vs_input_size; + + void AnalyzeVertexArray(bool is_indexed); + void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset); + + OGLBuffer vs_uniform_buffer; + std::unordered_map<GLShader::MaxwellVSConfig, VertexShader*> vs_shader_map; + std::unordered_map<std::string, VertexShader> vs_shader_cache; + OGLShader vs_default_shader; + + void SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset); + + OGLBuffer fs_uniform_buffer; + std::unordered_map<GLShader::MaxwellFSConfig, FragmentShader*> fs_shader_map; + std::unordered_map<std::string, FragmentShader> fs_shader_cache; + OGLShader fs_default_shader; + + void SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset); + + enum class AccelDraw { Disabled, Arrays, Indexed }; + AccelDraw accelerate_draw; +}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp new file mode 100644 index 000000000..884637ca5 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -0,0 +1,1361 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <atomic> +#include <cstring> +#include <iterator> +#include <memory> +#include <unordered_set> +#include <utility> +#include <vector> +#include <boost/optional.hpp> +#include <boost/range/iterator_range.hpp> +#include <glad/glad.h> +#include "common/alignment.h" +#include "common/bit_field.h" +#include "common/color.h" +#include "common/logging/log.h" +#include "common/math_util.h" +#include "common/microprofile.h" +#include "common/scope_exit.h" +#include "common/vector_math.h" +#include "core/frontend/emu_window.h" +#include "core/memory.h" +#include "core/settings.h" +#include "video_core/renderer_opengl/gl_rasterizer_cache.h" +#include "video_core/renderer_opengl/gl_state.h" +#include "video_core/utils.h" +#include "video_core/video_core.h" + +using SurfaceType = SurfaceParams::SurfaceType; +using PixelFormat = SurfaceParams::PixelFormat; + +struct FormatTuple { + GLint internal_format; + GLenum format; + GLenum type; +}; + +static constexpr std::array<FormatTuple, 5> fb_format_tuples = {{ + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}, // RGBA8 + {GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE}, // RGB8 + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // RGB5A1 + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565 + {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4 +}}; + +static constexpr std::array<FormatTuple, 4> depth_format_tuples = {{ + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16 + {}, + {GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT}, // D24 + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8 +}}; + +static constexpr FormatTuple tex_tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}; + +static const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { + const SurfaceType type = SurfaceParams::GetFormatType(pixel_format); + if (type == SurfaceType::Color) { + ASSERT(static_cast<size_t>(pixel_format) < fb_format_tuples.size()); + return fb_format_tuples[static_cast<unsigned int>(pixel_format)]; + } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { + size_t tuple_idx = static_cast<size_t>(pixel_format) - 14; + ASSERT(tuple_idx < depth_format_tuples.size()); + return depth_format_tuples[tuple_idx]; + } + return tex_tuple; +} + +template <typename Map, typename Interval> +constexpr auto RangeFromInterval(Map& map, const Interval& interval) { + return boost::make_iterator_range(map.equal_range(interval)); +} + +static u16 GetResolutionScaleFactor() { + return static_cast<u16>(!Settings::values.resolution_factor + ? VideoCore::g_emu_window->GetFramebufferLayout().GetScalingRatio() + : Settings::values.resolution_factor); +} + +template <bool morton_to_gl, PixelFormat format> +static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) { + constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; + constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); + for (u32 y = 0; y < 8; ++y) { + for (u32 x = 0; x < 8; ++x) { + u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel; + u8* gl_ptr = gl_buffer + ((7 - y) * stride + x) * gl_bytes_per_pixel; + if (morton_to_gl) { + if (format == PixelFormat::D24S8) { + gl_ptr[0] = tile_ptr[3]; + std::memcpy(gl_ptr + 1, tile_ptr, 3); + } else { + std::memcpy(gl_ptr, tile_ptr, bytes_per_pixel); + } + } else { + if (format == PixelFormat::D24S8) { + std::memcpy(tile_ptr, gl_ptr + 1, 3); + tile_ptr[3] = gl_ptr[0]; + } else { + std::memcpy(tile_ptr, gl_ptr, bytes_per_pixel); + } + } + } + } +} + +template <bool morton_to_gl, PixelFormat format> +static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, PAddr base, PAddr start, PAddr end) { + constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; + constexpr u32 tile_size = bytes_per_pixel * 64; + + constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); + static_assert(gl_bytes_per_pixel >= bytes_per_pixel, ""); + gl_buffer += gl_bytes_per_pixel - bytes_per_pixel; + + const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size); + const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size); + const PAddr aligned_end = base + Common::AlignDown(end - base, tile_size); + + ASSERT(!morton_to_gl || (aligned_start == start && aligned_end == end)); + + const u64 begin_pixel_index = (aligned_down_start - base) / bytes_per_pixel; + u32 x = static_cast<u32>((begin_pixel_index % (stride * 8)) / 8); + u32 y = static_cast<u32>((begin_pixel_index / (stride * 8)) * 8); + + gl_buffer += ((height - 8 - y) * stride + x) * gl_bytes_per_pixel; + + auto glbuf_next_tile = [&] { + x = (x + 8) % stride; + gl_buffer += 8 * gl_bytes_per_pixel; + if (!x) { + y += 8; + gl_buffer -= stride * 9 * gl_bytes_per_pixel; + } + }; + + u8* tile_buffer = Memory::GetPhysicalPointer(start); + + if (start < aligned_start && !morton_to_gl) { + std::array<u8, tile_size> tmp_buf; + MortonCopyTile<morton_to_gl, format>(stride, &tmp_buf[0], gl_buffer); + std::memcpy(tile_buffer, &tmp_buf[start - aligned_down_start], + std::min(aligned_start, end) - start); + + tile_buffer += aligned_start - start; + glbuf_next_tile(); + } + + const u8* const buffer_end = tile_buffer + aligned_end - aligned_start; + while (tile_buffer < buffer_end) { + MortonCopyTile<morton_to_gl, format>(stride, tile_buffer, gl_buffer); + tile_buffer += tile_size; + glbuf_next_tile(); + } + + if (end > std::max(aligned_start, aligned_end) && !morton_to_gl) { + std::array<u8, tile_size> tmp_buf; + MortonCopyTile<morton_to_gl, format>(stride, &tmp_buf[0], gl_buffer); + std::memcpy(tile_buffer, &tmp_buf[0], end - aligned_end); + } +} + +static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> morton_to_gl_fns = { + MortonCopy<true, PixelFormat::RGBA8>, // 0 + MortonCopy<true, PixelFormat::RGB8>, // 1 + MortonCopy<true, PixelFormat::RGB5A1>, // 2 + MortonCopy<true, PixelFormat::RGB565>, // 3 + MortonCopy<true, PixelFormat::RGBA4>, // 4 + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, // 5 - 13 + MortonCopy<true, PixelFormat::D16>, // 14 + nullptr, // 15 + MortonCopy<true, PixelFormat::D24>, // 16 + MortonCopy<true, PixelFormat::D24S8> // 17 +}; + +static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> gl_to_morton_fns = { + MortonCopy<false, PixelFormat::RGBA8>, // 0 + MortonCopy<false, PixelFormat::RGB8>, // 1 + MortonCopy<false, PixelFormat::RGB5A1>, // 2 + MortonCopy<false, PixelFormat::RGB565>, // 3 + MortonCopy<false, PixelFormat::RGBA4>, // 4 + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, // 5 - 13 + MortonCopy<false, PixelFormat::D16>, // 14 + nullptr, // 15 + MortonCopy<false, PixelFormat::D24>, // 16 + MortonCopy<false, PixelFormat::D24S8> // 17 +}; + +// Allocate an uninitialized texture of appropriate size and format for the surface +static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tuple, u32 width, + u32 height) { + OpenGLState cur_state = OpenGLState::GetCurState(); + + // Keep track of previous texture bindings + GLuint old_tex = cur_state.texture_units[0].texture_2d; + cur_state.texture_units[0].texture_2d = texture; + cur_state.Apply(); + glActiveTexture(GL_TEXTURE0); + + glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, width, height, 0, + format_tuple.format, format_tuple.type, nullptr); + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + + // Restore previous texture bindings + cur_state.texture_units[0].texture_2d = old_tex; + cur_state.Apply(); +} + +static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex, + const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type, + GLuint read_fb_handle, GLuint draw_fb_handle) { + OpenGLState state = OpenGLState::GetCurState(); + + OpenGLState prev_state = state; + SCOPE_EXIT({ prev_state.Apply(); }); + + // Make sure textures aren't bound to texture units, since going to bind them to framebuffer + // components + state.ResetTexture(src_tex); + state.ResetTexture(dst_tex); + + state.draw.read_framebuffer = read_fb_handle; + state.draw.draw_framebuffer = draw_fb_handle; + state.Apply(); + + u32 buffers = 0; + + if (type == SurfaceType::Color || type == SurfaceType::Texture) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex, + 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, + 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + + buffers = GL_COLOR_BUFFER_BIT; + } else if (type == SurfaceType::Depth) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + buffers = GL_DEPTH_BUFFER_BIT; + } else if (type == SurfaceType::DepthStencil) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + src_tex, 0); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + dst_tex, 0); + + buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + } + + glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left, + dst_rect.bottom, dst_rect.right, dst_rect.top, buffers, + buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); + + return true; +} + +static bool FillSurface(const Surface& surface, const u8* fill_data, + const MathUtil::Rectangle<u32>& fill_rect, GLuint draw_fb_handle) { + UNIMPLEMENTED(); + return true; +} + +SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { + SurfaceParams params = *this; + const u32 tiled_size = is_tiled ? 8 : 1; + const u64 stride_tiled_bytes = BytesInPixels(stride * tiled_size); + PAddr aligned_start = + addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes); + PAddr aligned_end = + addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes); + + if (aligned_end - aligned_start > stride_tiled_bytes) { + params.addr = aligned_start; + params.height = static_cast<u32>((aligned_end - aligned_start) / BytesInPixels(stride)); + } else { + // 1 row + ASSERT(aligned_end - aligned_start == stride_tiled_bytes); + const u64 tiled_alignment = BytesInPixels(is_tiled ? 8 * 8 : 1); + aligned_start = + addr + Common::AlignDown(boost::icl::first(interval) - addr, tiled_alignment); + aligned_end = + addr + Common::AlignUp(boost::icl::last_next(interval) - addr, tiled_alignment); + params.addr = aligned_start; + params.width = static_cast<u32>(PixelsInBytes(aligned_end - aligned_start) / tiled_size); + params.stride = params.width; + params.height = tiled_size; + } + params.UpdateParams(); + + return params; +} + +SurfaceInterval SurfaceParams::GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const { + if (unscaled_rect.GetHeight() == 0 || unscaled_rect.GetWidth() == 0) { + return {}; + } + + if (is_tiled) { + unscaled_rect.left = Common::AlignDown(unscaled_rect.left, 8) * 8; + unscaled_rect.bottom = Common::AlignDown(unscaled_rect.bottom, 8) / 8; + unscaled_rect.right = Common::AlignUp(unscaled_rect.right, 8) * 8; + unscaled_rect.top = Common::AlignUp(unscaled_rect.top, 8) / 8; + } + + const u32 stride_tiled = !is_tiled ? stride : stride * 8; + + const u32 pixel_offset = + stride_tiled * (!is_tiled ? unscaled_rect.bottom : (height / 8) - unscaled_rect.top) + + unscaled_rect.left; + + const u32 pixels = (unscaled_rect.GetHeight() - 1) * stride_tiled + unscaled_rect.GetWidth(); + + return {addr + BytesInPixels(pixel_offset), addr + BytesInPixels(pixel_offset + pixels)}; +} + +MathUtil::Rectangle<u32> SurfaceParams::GetSubRect(const SurfaceParams& sub_surface) const { + const u32 begin_pixel_index = static_cast<u32>(PixelsInBytes(sub_surface.addr - addr)); + + if (is_tiled) { + const int x0 = (begin_pixel_index % (stride * 8)) / 8; + const int y0 = (begin_pixel_index / (stride * 8)) * 8; + // Top to bottom + return MathUtil::Rectangle<u32>(x0, height - y0, x0 + sub_surface.width, + height - (y0 + sub_surface.height)); + } + + const int x0 = begin_pixel_index % stride; + const int y0 = begin_pixel_index / stride; + // Bottom to top + return MathUtil::Rectangle<u32>(x0, y0 + sub_surface.height, x0 + sub_surface.width, y0); +} + +MathUtil::Rectangle<u32> SurfaceParams::GetScaledSubRect(const SurfaceParams& sub_surface) const { + auto rect = GetSubRect(sub_surface); + rect.left = rect.left * res_scale; + rect.right = rect.right * res_scale; + rect.top = rect.top * res_scale; + rect.bottom = rect.bottom * res_scale; + return rect; +} + +bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { + return std::tie(other_surface.addr, other_surface.width, other_surface.height, + other_surface.stride, other_surface.pixel_format, other_surface.is_tiled) == + std::tie(addr, width, height, stride, pixel_format, is_tiled) && + pixel_format != PixelFormat::Invalid; +} + +bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { + return sub_surface.addr >= addr && sub_surface.end <= end && + sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid && + sub_surface.is_tiled == is_tiled && + (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && + (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) && + GetSubRect(sub_surface).left + sub_surface.width <= stride; +} + +bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { + return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format && + addr <= expanded_surface.end && expanded_surface.addr <= end && + is_tiled == expanded_surface.is_tiled && stride == expanded_surface.stride && + (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) % + BytesInPixels(stride * (is_tiled ? 8 : 1)) == + 0; +} + +bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const { + if (pixel_format == PixelFormat::Invalid || addr > texcopy_params.addr || + end < texcopy_params.end) { + return false; + } + if (texcopy_params.width != texcopy_params.stride) { + const u32 tile_stride = static_cast<u32>(BytesInPixels(stride * (is_tiled ? 8 : 1))); + return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 && + texcopy_params.width % BytesInPixels(is_tiled ? 64 : 1) == 0 && + (texcopy_params.height == 1 || texcopy_params.stride == tile_stride) && + ((texcopy_params.addr - addr) % tile_stride) + texcopy_params.width <= tile_stride; + } + return FromInterval(texcopy_params.GetInterval()).GetInterval() == texcopy_params.GetInterval(); +} + +bool CachedSurface::CanFill(const SurfaceParams& dest_surface, + SurfaceInterval fill_interval) const { + if (type == SurfaceType::Fill && IsRegionValid(fill_interval) && + boost::icl::first(fill_interval) >= addr && + boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range + dest_surface.FromInterval(fill_interval).GetInterval() == + fill_interval) { // make sure interval is a rectangle in dest surface + if (fill_size * 8 != dest_surface.GetFormatBpp()) { + // Check if bits repeat for our fill_size + const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / 8, 1u); + std::vector<u8> fill_test(fill_size * dest_bytes_per_pixel); + + for (u32 i = 0; i < dest_bytes_per_pixel; ++i) + std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size); + + for (u32 i = 0; i < fill_size; ++i) + if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0], + dest_bytes_per_pixel) != 0) + return false; + + if (dest_surface.GetFormatBpp() == 4 && (fill_test[0] & 0xF) != (fill_test[0] >> 4)) + return false; + } + return true; + } + return false; +} + +bool CachedSurface::CanCopy(const SurfaceParams& dest_surface, + SurfaceInterval copy_interval) const { + SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval); + ASSERT(subrect_params.GetInterval() == copy_interval); + if (CanSubRect(subrect_params)) + return true; + + if (CanFill(dest_surface, copy_interval)) + return true; + + return false; +} + +SurfaceInterval SurfaceParams::GetCopyableInterval(const Surface& src_surface) const { + SurfaceInterval result{}; + const auto valid_regions = + SurfaceRegions(GetInterval() & src_surface->GetInterval()) - src_surface->invalid_regions; + for (auto& valid_interval : valid_regions) { + const SurfaceInterval aligned_interval{ + addr + Common::AlignUp(boost::icl::first(valid_interval) - addr, + BytesInPixels(is_tiled ? 8 * 8 : 1)), + addr + Common::AlignDown(boost::icl::last_next(valid_interval) - addr, + BytesInPixels(is_tiled ? 8 * 8 : 1))}; + + if (BytesInPixels(is_tiled ? 8 * 8 : 1) > boost::icl::length(valid_interval) || + boost::icl::length(aligned_interval) == 0) { + continue; + } + + // Get the rectangle within aligned_interval + const u32 stride_bytes = static_cast<u32>(BytesInPixels(stride)) * (is_tiled ? 8 : 1); + SurfaceInterval rect_interval{ + addr + Common::AlignUp(boost::icl::first(aligned_interval) - addr, stride_bytes), + addr + Common::AlignDown(boost::icl::last_next(aligned_interval) - addr, stride_bytes), + }; + if (boost::icl::first(rect_interval) > boost::icl::last_next(rect_interval)) { + // 1 row + rect_interval = aligned_interval; + } else if (boost::icl::length(rect_interval) == 0) { + // 2 rows that do not make a rectangle, return the larger one + const SurfaceInterval row1{boost::icl::first(aligned_interval), + boost::icl::first(rect_interval)}; + const SurfaceInterval row2{boost::icl::first(rect_interval), + boost::icl::last_next(aligned_interval)}; + rect_interval = (boost::icl::length(row1) > boost::icl::length(row2)) ? row1 : row2; + } + + if (boost::icl::length(rect_interval) > boost::icl::length(result)) { + result = rect_interval; + } + } + return result; +} + +void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface, + SurfaceInterval copy_interval) { + SurfaceParams subrect_params = dst_surface->FromInterval(copy_interval); + ASSERT(subrect_params.GetInterval() == copy_interval); + + ASSERT(src_surface != dst_surface); + + // This is only called when CanCopy is true, no need to run checks here + if (src_surface->type == SurfaceType::Fill) { + // FillSurface needs a 4 bytes buffer + const u64 fill_offset = + (boost::icl::first(copy_interval) - src_surface->addr) % src_surface->fill_size; + std::array<u8, 4> fill_buffer; + + u64 fill_buff_pos = fill_offset; + for (int i : {0, 1, 2, 3}) + fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size]; + + FillSurface(dst_surface, &fill_buffer[0], dst_surface->GetScaledSubRect(subrect_params), + draw_framebuffer.handle); + return; + } + if (src_surface->CanSubRect(subrect_params)) { + BlitTextures(src_surface->texture.handle, src_surface->GetScaledSubRect(subrect_params), + dst_surface->texture.handle, dst_surface->GetScaledSubRect(subrect_params), + src_surface->type, read_framebuffer.handle, draw_framebuffer.handle); + return; + } + UNREACHABLE(); +} + +MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); +void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { + ASSERT(type != SurfaceType::Fill); + + const u8* const texture_src_data = Memory::GetPhysicalPointer(addr); + if (texture_src_data == nullptr) + return; + + if (gl_buffer == nullptr) { + gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format); + gl_buffer.reset(new u8[gl_buffer_size]); + } + + // TODO: Should probably be done in ::Memory:: and check for other regions too + if (load_start < Memory::VRAM_VADDR_END && load_end > Memory::VRAM_VADDR_END) + load_end = Memory::VRAM_VADDR_END; + + if (load_start < Memory::VRAM_VADDR && load_end > Memory::VRAM_VADDR) + load_start = Memory::VRAM_VADDR; + + MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); + + ASSERT(load_start >= addr && load_end <= end); + const u32 start_offset = load_start - addr; + + if (!is_tiled) { + ASSERT(type == SurfaceType::Color); + std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, + load_end - load_start); + } else { + if (type == SurfaceType::Texture) { + UNIMPLEMENTED(); + } else { + morton_to_gl_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr, + load_start, load_end); + } + } +} + +MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); +void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { + u8* const dst_buffer = Memory::GetPhysicalPointer(addr); + if (dst_buffer == nullptr) + return; + + ASSERT(gl_buffer_size == width * height * GetGLBytesPerPixel(pixel_format)); + + // TODO: Should probably be done in ::Memory:: and check for other regions too + // same as loadglbuffer() + if (flush_start < Memory::VRAM_VADDR_END && flush_end > Memory::VRAM_VADDR_END) + flush_end = Memory::VRAM_VADDR_END; + + if (flush_start < Memory::VRAM_VADDR && flush_end > Memory::VRAM_VADDR) + flush_start = Memory::VRAM_VADDR; + + MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); + + ASSERT(flush_start >= addr && flush_end <= end); + const u64 start_offset = flush_start - addr; + const u64 end_offset = flush_end - addr; + + if (type == SurfaceType::Fill) { + const u64 coarse_start_offset = start_offset - (start_offset % fill_size); + const u64 backup_bytes = start_offset % fill_size; + std::array<u8, 4> backup_data; + if (backup_bytes) + std::memcpy(&backup_data[0], &dst_buffer[coarse_start_offset], backup_bytes); + + for (u64 offset = coarse_start_offset; offset < end_offset; offset += fill_size) { + std::memcpy(&dst_buffer[offset], &fill_data[0], + std::min(fill_size, end_offset - offset)); + } + + if (backup_bytes) + std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes); + } else if (!is_tiled) { + ASSERT(type == SurfaceType::Color); + std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start); + } else { + gl_to_morton_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr, + flush_start, flush_end); + } +} + +MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); +void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, + GLuint draw_fb_handle) { + if (type == SurfaceType::Fill) + return; + + MICROPROFILE_SCOPE(OpenGL_TextureUL); + + ASSERT(gl_buffer_size == width * height * GetGLBytesPerPixel(pixel_format)); + + // Load data from memory to the surface + GLint x0 = static_cast<GLint>(rect.left); + GLint y0 = static_cast<GLint>(rect.bottom); + size_t buffer_offset = (y0 * stride + x0) * GetGLBytesPerPixel(pixel_format); + + const FormatTuple& tuple = GetFormatTuple(pixel_format); + GLuint target_tex = texture.handle; + + // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in + // surface + OGLTexture unscaled_tex; + if (res_scale != 1) { + x0 = 0; + y0 = 0; + + unscaled_tex.Create(); + AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight()); + target_tex = unscaled_tex.handle; + } + + OpenGLState cur_state = OpenGLState::GetCurState(); + + GLuint old_tex = cur_state.texture_units[0].texture_2d; + cur_state.texture_units[0].texture_2d = target_tex; + cur_state.Apply(); + + // Ensure no bad interactions with GL_UNPACK_ALIGNMENT + ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); + glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(stride)); + + glActiveTexture(GL_TEXTURE0); + glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()), + static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, + &gl_buffer[buffer_offset]); + + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + + cur_state.texture_units[0].texture_2d = old_tex; + cur_state.Apply(); + + if (res_scale != 1) { + auto scaled_rect = rect; + scaled_rect.left *= res_scale; + scaled_rect.top *= res_scale; + scaled_rect.right *= res_scale; + scaled_rect.bottom *= res_scale; + + BlitTextures(unscaled_tex.handle, {0, rect.GetHeight(), rect.GetWidth(), 0}, texture.handle, + scaled_rect, type, read_fb_handle, draw_fb_handle); + } +} + +MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64)); +void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, + GLuint draw_fb_handle) { + if (type == SurfaceType::Fill) + return; + + MICROPROFILE_SCOPE(OpenGL_TextureDL); + + if (gl_buffer == nullptr) { + gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format); + gl_buffer.reset(new u8[gl_buffer_size]); + } + + OpenGLState state = OpenGLState::GetCurState(); + OpenGLState prev_state = state; + SCOPE_EXIT({ prev_state.Apply(); }); + + const FormatTuple& tuple = GetFormatTuple(pixel_format); + + // Ensure no bad interactions with GL_PACK_ALIGNMENT + ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0); + glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(stride)); + size_t buffer_offset = (rect.bottom * stride + rect.left) * GetGLBytesPerPixel(pixel_format); + + // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush + if (res_scale != 1) { + auto scaled_rect = rect; + scaled_rect.left *= res_scale; + scaled_rect.top *= res_scale; + scaled_rect.right *= res_scale; + scaled_rect.bottom *= res_scale; + + OGLTexture unscaled_tex; + unscaled_tex.Create(); + + MathUtil::Rectangle<u32> unscaled_tex_rect{0, rect.GetHeight(), rect.GetWidth(), 0}; + AllocateSurfaceTexture(unscaled_tex.handle, tuple, rect.GetWidth(), rect.GetHeight()); + BlitTextures(texture.handle, scaled_rect, unscaled_tex.handle, unscaled_tex_rect, type, + read_fb_handle, draw_fb_handle); + + state.texture_units[0].texture_2d = unscaled_tex.handle; + state.Apply(); + + glActiveTexture(GL_TEXTURE0); + glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]); + } else { + state.ResetTexture(texture.handle); + state.draw.read_framebuffer = read_fb_handle; + state.Apply(); + + if (type == SurfaceType::Color || type == SurfaceType::Texture) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + texture.handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + 0, 0); + } else if (type == SurfaceType::Depth) { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + texture.handle, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + } else { + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + texture.handle, 0); + } + glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom), + static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()), + tuple.format, tuple.type, &gl_buffer[buffer_offset]); + } + + glPixelStorei(GL_PACK_ROW_LENGTH, 0); +} + +enum MatchFlags { + Invalid = 1, // Flag that can be applied to other match types, invalid matches require + // validation before they can be used + Exact = 1 << 1, // Surfaces perfectly match + SubRect = 1 << 2, // Surface encompasses params + Copy = 1 << 3, // Surface we can copy from + Expand = 1 << 4, // Surface that can expand params + TexCopy = 1 << 5 // Surface that will match a display transfer "texture copy" parameters +}; + +constexpr MatchFlags operator|(MatchFlags lhs, MatchFlags rhs) { + return static_cast<MatchFlags>(static_cast<int>(lhs) | static_cast<int>(rhs)); +} + +/// Get the best surface match (and its match type) for the given flags +template <MatchFlags find_flags> +Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, + ScaleMatch match_scale_type, + boost::optional<SurfaceInterval> validate_interval = boost::none) { + Surface match_surface = nullptr; + bool match_valid = false; + u32 match_scale = 0; + SurfaceInterval match_interval{}; + + for (auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { + for (auto& surface : pair.second) { + bool res_scale_matched = match_scale_type == ScaleMatch::Exact + ? (params.res_scale == surface->res_scale) + : (params.res_scale <= surface->res_scale); + // validity will be checked in GetCopyableInterval + bool is_valid = + find_flags & MatchFlags::Copy + ? true + : surface->IsRegionValid(validate_interval.value_or(params.GetInterval())); + + if (!(find_flags & MatchFlags::Invalid) && !is_valid) + continue; + + auto IsMatch_Helper = [&](auto check_type, auto match_fn) { + if (!(find_flags & check_type)) + return; + + bool matched; + SurfaceInterval surface_interval; + std::tie(matched, surface_interval) = match_fn(); + if (!matched) + return; + + if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore && + surface->type != SurfaceType::Fill) + return; + + // Found a match, update only if this is better than the previous one + auto UpdateMatch = [&] { + match_surface = surface; + match_valid = is_valid; + match_scale = surface->res_scale; + match_interval = surface_interval; + }; + + if (surface->res_scale > match_scale) { + UpdateMatch(); + return; + } else if (surface->res_scale < match_scale) { + return; + } + + if (is_valid && !match_valid) { + UpdateMatch(); + return; + } else if (is_valid != match_valid) { + return; + } + + if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) { + UpdateMatch(); + } + }; + IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Exact>{}, [&] { + return std::make_pair(surface->ExactMatch(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::SubRect>{}, [&] { + return std::make_pair(surface->CanSubRect(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Copy>{}, [&] { + auto copy_interval = + params.FromInterval(*validate_interval).GetCopyableInterval(surface); + bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 && + surface->CanCopy(params, copy_interval); + return std::make_pair(matched, copy_interval); + }); + IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Expand>{}, [&] { + return std::make_pair(surface->CanExpand(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::TexCopy>{}, [&] { + return std::make_pair(surface->CanTexCopy(params), surface->GetInterval()); + }); + } + } + return match_surface; +} + +RasterizerCacheOpenGL::RasterizerCacheOpenGL() { + read_framebuffer.Create(); + draw_framebuffer.Create(); + + attributeless_vao.Create(); + + d24s8_abgr_buffer.Create(); + d24s8_abgr_buffer_size = 0; + + const char* vs_source = R"( +#version 330 core +const vec2 vertices[4] = vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0)); +void main() { + gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0); +} +)"; + const char* fs_source = R"( +#version 330 core + +uniform samplerBuffer tbo; +uniform vec2 tbo_size; +uniform vec4 viewport; + +out vec4 color; + +void main() { + vec2 tbo_coord = (gl_FragCoord.xy - viewport.xy) * tbo_size / viewport.zw; + int tbo_offset = int(tbo_coord.y) * int(tbo_size.x) + int(tbo_coord.x); + color = texelFetch(tbo, tbo_offset).rabg; +} +)"; + d24s8_abgr_shader.Create(vs_source, nullptr, fs_source); + + OpenGLState state = OpenGLState::GetCurState(); + GLuint old_program = state.draw.shader_program; + state.draw.shader_program = d24s8_abgr_shader.handle; + state.Apply(); + + GLint tbo_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo"); + ASSERT(tbo_u_id != -1); + glUniform1i(tbo_u_id, 0); + + state.draw.shader_program = old_program; + state.Apply(); + + d24s8_abgr_tbo_size_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "tbo_size"); + ASSERT(d24s8_abgr_tbo_size_u_id != -1); + d24s8_abgr_viewport_u_id = glGetUniformLocation(d24s8_abgr_shader.handle, "viewport"); + ASSERT(d24s8_abgr_viewport_u_id != -1); +} + +RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { + FlushAll(); + while (!surface_cache.empty()) + UnregisterSurface(*surface_cache.begin()->second.begin()); +} + +bool RasterizerCacheOpenGL::BlitSurfaces(const Surface& src_surface, + const MathUtil::Rectangle<u32>& src_rect, + const Surface& dst_surface, + const MathUtil::Rectangle<u32>& dst_rect) { + if (!SurfaceParams::CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) + return false; + + return BlitTextures(src_surface->texture.handle, src_rect, dst_surface->texture.handle, + dst_rect, src_surface->type, read_framebuffer.handle, + draw_framebuffer.handle); +} + +void RasterizerCacheOpenGL::ConvertD24S8toABGR(GLuint src_tex, + const MathUtil::Rectangle<u32>& src_rect, + GLuint dst_tex, + const MathUtil::Rectangle<u32>& dst_rect) { + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState state; + state.draw.read_framebuffer = read_framebuffer.handle; + state.draw.draw_framebuffer = draw_framebuffer.handle; + state.Apply(); + + glBindBuffer(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer.handle); + + GLsizeiptr target_pbo_size = src_rect.GetWidth() * src_rect.GetHeight() * 4; + if (target_pbo_size > d24s8_abgr_buffer_size) { + d24s8_abgr_buffer_size = target_pbo_size * 2; + glBufferData(GL_PIXEL_PACK_BUFFER, d24s8_abgr_buffer_size, nullptr, GL_STREAM_COPY); + } + + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, src_tex, + 0); + glReadPixels(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.bottom), + static_cast<GLsizei>(src_rect.GetWidth()), + static_cast<GLsizei>(src_rect.GetHeight()), GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, + 0); + + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + + // PBO now contains src_tex in RABG format + state.draw.shader_program = d24s8_abgr_shader.handle; + state.draw.vertex_array = attributeless_vao.handle; + state.viewport.x = static_cast<GLint>(dst_rect.left); + state.viewport.y = static_cast<GLint>(dst_rect.bottom); + state.viewport.width = static_cast<GLsizei>(dst_rect.GetWidth()); + state.viewport.height = static_cast<GLsizei>(dst_rect.GetHeight()); + state.Apply(); + + OGLTexture tbo; + tbo.Create(); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_BUFFER, tbo.handle); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA8, d24s8_abgr_buffer.handle); + + glUniform2f(d24s8_abgr_tbo_size_u_id, static_cast<GLfloat>(src_rect.GetWidth()), + static_cast<GLfloat>(src_rect.GetHeight())); + glUniform4f(d24s8_abgr_viewport_u_id, static_cast<GLfloat>(state.viewport.x), + static_cast<GLfloat>(state.viewport.y), static_cast<GLfloat>(state.viewport.width), + static_cast<GLfloat>(state.viewport.height)); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + + glBindTexture(GL_TEXTURE_BUFFER, 0); +} + +Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create) { + if (params.addr == 0 || params.height * params.width == 0) { + return nullptr; + } + // Use GetSurfaceSubRect instead + ASSERT(params.width == params.stride); + + ASSERT(!params.is_tiled || (params.width % 8 == 0 && params.height % 8 == 0)); + + // Check for an exact match in existing surfaces + Surface surface = + FindMatch<MatchFlags::Exact | MatchFlags::Invalid>(surface_cache, params, match_res_scale); + + if (surface == nullptr) { + u16 target_res_scale = params.res_scale; + if (match_res_scale != ScaleMatch::Exact) { + // This surface may have a subrect of another surface with a higher res_scale, find it + // to adjust our params + SurfaceParams find_params = params; + Surface expandable = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>( + surface_cache, find_params, match_res_scale); + if (expandable != nullptr && expandable->res_scale > target_res_scale) { + target_res_scale = expandable->res_scale; + } + // Keep res_scale when reinterpreting d24s8 -> rgba8 + if (params.pixel_format == PixelFormat::RGBA8) { + find_params.pixel_format = PixelFormat::D24S8; + expandable = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>( + surface_cache, find_params, match_res_scale); + if (expandable != nullptr && expandable->res_scale > target_res_scale) { + target_res_scale = expandable->res_scale; + } + } + } + SurfaceParams new_params = params; + new_params.res_scale = target_res_scale; + surface = CreateSurface(new_params); + RegisterSurface(surface); + } + + if (load_if_create) { + ValidateSurface(surface, params.addr, params.size); + } + + return surface; +} + +SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params, + ScaleMatch match_res_scale, + bool load_if_create) { + if (params.addr == 0 || params.height * params.width == 0) { + return std::make_tuple(nullptr, MathUtil::Rectangle<u32>{}); + } + + // Attempt to find encompassing surface + Surface surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(surface_cache, params, + match_res_scale); + + // Check if FindMatch failed because of res scaling + // If that's the case create a new surface with + // the dimensions of the lower res_scale surface + // to suggest it should not be used again + if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) { + surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>(surface_cache, params, + ScaleMatch::Ignore); + if (surface != nullptr) { + ASSERT(surface->res_scale < params.res_scale); + SurfaceParams new_params = *surface; + new_params.res_scale = params.res_scale; + + surface = CreateSurface(new_params); + RegisterSurface(surface); + } + } + + SurfaceParams aligned_params = params; + if (params.is_tiled) { + aligned_params.height = Common::AlignUp(params.height, 8); + aligned_params.width = Common::AlignUp(params.width, 8); + aligned_params.stride = Common::AlignUp(params.stride, 8); + aligned_params.UpdateParams(); + } + + // Check for a surface we can expand before creating a new one + if (surface == nullptr) { + surface = FindMatch<MatchFlags::Expand | MatchFlags::Invalid>(surface_cache, aligned_params, + match_res_scale); + if (surface != nullptr) { + aligned_params.width = aligned_params.stride; + aligned_params.UpdateParams(); + + SurfaceParams new_params = *surface; + new_params.addr = std::min(aligned_params.addr, surface->addr); + new_params.end = std::max(aligned_params.end, surface->end); + new_params.size = new_params.end - new_params.addr; + new_params.height = static_cast<u32>( + new_params.size / aligned_params.BytesInPixels(aligned_params.stride)); + ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0); + + Surface new_surface = CreateSurface(new_params); + DuplicateSurface(surface, new_surface); + + // Delete the expanded surface, this can't be done safely yet + // because it may still be in use + remove_surfaces.emplace(surface); + + surface = new_surface; + RegisterSurface(new_surface); + } + } + + // No subrect found - create and return a new surface + if (surface == nullptr) { + SurfaceParams new_params = aligned_params; + // Can't have gaps in a surface + new_params.width = aligned_params.stride; + new_params.UpdateParams(); + // GetSurface will create the new surface and possibly adjust res_scale if necessary + surface = GetSurface(new_params, match_res_scale, load_if_create); + } else if (load_if_create) { + ValidateSurface(surface, aligned_params.addr, aligned_params.size); + } + + return std::make_tuple(surface, surface->GetScaledSubRect(params)); +} + +Surface RasterizerCacheOpenGL::GetTextureSurface(const void* config) { + UNIMPLEMENTED(); + return {}; +} + +SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( + bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport_rect) { + UNIMPLEMENTED(); + return {}; +} + +Surface RasterizerCacheOpenGL::GetFillSurface(const void* config) { + UNIMPLEMENTED(); + return {}; +} + +SurfaceRect_Tuple RasterizerCacheOpenGL::GetTexCopySurface(const SurfaceParams& params) { + MathUtil::Rectangle<u32> rect{}; + + Surface match_surface = FindMatch<MatchFlags::TexCopy | MatchFlags::Invalid>( + surface_cache, params, ScaleMatch::Ignore); + + if (match_surface != nullptr) { + ValidateSurface(match_surface, params.addr, params.size); + + SurfaceParams match_subrect; + if (params.width != params.stride) { + const u32 tiled_size = match_surface->is_tiled ? 8 : 1; + match_subrect = params; + match_subrect.width = + static_cast<u32>(match_surface->PixelsInBytes(params.width) / tiled_size); + match_subrect.stride = + static_cast<u32>(match_surface->PixelsInBytes(params.stride) / tiled_size); + match_subrect.height *= tiled_size; + } else { + match_subrect = match_surface->FromInterval(params.GetInterval()); + ASSERT(match_subrect.GetInterval() == params.GetInterval()); + } + + rect = match_surface->GetScaledSubRect(match_subrect); + } + + return std::make_tuple(match_surface, rect); +} + +void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface, + const Surface& dest_surface) { + ASSERT(dest_surface->addr <= src_surface->addr && dest_surface->end >= src_surface->end); + + BlitSurfaces(src_surface, src_surface->GetScaledRect(), dest_surface, + dest_surface->GetScaledSubRect(*src_surface)); + + dest_surface->invalid_regions -= src_surface->GetInterval(); + dest_surface->invalid_regions += src_surface->invalid_regions; + + SurfaceRegions regions; + for (auto& pair : RangeFromInterval(dirty_regions, src_surface->GetInterval())) { + if (pair.second == src_surface) { + regions += pair.first; + } + } + for (auto& interval : regions) { + dirty_regions.set({interval, dest_surface}); + } +} + +void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, u64 size) { + if (size == 0) + return; + + const SurfaceInterval validate_interval(addr, addr + size); + + if (surface->type == SurfaceType::Fill) { + // Sanity check, fill surfaces will always be valid when used + ASSERT(surface->IsRegionValid(validate_interval)); + return; + } + + while (true) { + const auto it = surface->invalid_regions.find(validate_interval); + if (it == surface->invalid_regions.end()) + break; + + const auto interval = *it & validate_interval; + // Look for a valid surface to copy from + SurfaceParams params = surface->FromInterval(interval); + + Surface copy_surface = + FindMatch<MatchFlags::Copy>(surface_cache, params, ScaleMatch::Ignore, interval); + if (copy_surface != nullptr) { + SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface); + CopySurface(copy_surface, surface, copy_interval); + surface->invalid_regions.erase(copy_interval); + continue; + } + + // D24S8 to RGBA8 + if (surface->pixel_format == PixelFormat::RGBA8) { + params.pixel_format = PixelFormat::D24S8; + Surface reinterpret_surface = + FindMatch<MatchFlags::Copy>(surface_cache, params, ScaleMatch::Ignore, interval); + if (reinterpret_surface != nullptr) { + ASSERT(reinterpret_surface->pixel_format == PixelFormat::D24S8); + + SurfaceInterval convert_interval = params.GetCopyableInterval(reinterpret_surface); + SurfaceParams convert_params = surface->FromInterval(convert_interval); + auto src_rect = reinterpret_surface->GetScaledSubRect(convert_params); + auto dest_rect = surface->GetScaledSubRect(convert_params); + + ConvertD24S8toABGR(reinterpret_surface->texture.handle, src_rect, + surface->texture.handle, dest_rect); + + surface->invalid_regions.erase(convert_interval); + continue; + } + } + + // Load data from 3DS memory + FlushRegion(params.addr, params.size); + surface->LoadGLBuffer(params.addr, params.end); + surface->UploadGLTexture(surface->GetSubRect(params), read_framebuffer.handle, + draw_framebuffer.handle); + surface->invalid_regions.erase(params.GetInterval()); + } +} + +void RasterizerCacheOpenGL::FlushRegion(PAddr addr, u64 size, Surface flush_surface) { + if (size == 0) + return; + + const SurfaceInterval flush_interval(addr, addr + size); + SurfaceRegions flushed_intervals; + + for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { + // small sizes imply that this most likely comes from the cpu, flush the entire region + // the point is to avoid thousands of small writes every frame if the cpu decides to access + // that region, anything higher than 8 you're guaranteed it comes from a service + const auto interval = size <= 8 ? pair.first : pair.first & flush_interval; + auto& surface = pair.second; + + if (flush_surface != nullptr && surface != flush_surface) + continue; + + // Sanity check, this surface is the last one that marked this region dirty + ASSERT(surface->IsRegionValid(interval)); + + if (surface->type != SurfaceType::Fill) { + SurfaceParams params = surface->FromInterval(interval); + surface->DownloadGLTexture(surface->GetSubRect(params), read_framebuffer.handle, + draw_framebuffer.handle); + } + surface->FlushGLBuffer(boost::icl::first(interval), boost::icl::last_next(interval)); + flushed_intervals += interval; + } + // Reset dirty regions + dirty_regions -= flushed_intervals; +} + +void RasterizerCacheOpenGL::FlushAll() { + FlushRegion(0, 0xFFFFFFFF); +} + +void RasterizerCacheOpenGL::InvalidateRegion(PAddr addr, u64 size, const Surface& region_owner) { + if (size == 0) + return; + + const SurfaceInterval invalid_interval(addr, addr + size); + + if (region_owner != nullptr) { + ASSERT(region_owner->type != SurfaceType::Texture); + ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end); + // Surfaces can't have a gap + ASSERT(region_owner->width == region_owner->stride); + region_owner->invalid_regions.erase(invalid_interval); + } + + for (auto& pair : RangeFromInterval(surface_cache, invalid_interval)) { + for (auto& cached_surface : pair.second) { + if (cached_surface == region_owner) + continue; + + // If cpu is invalidating this region we want to remove it + // to (likely) mark the memory pages as uncached + if (region_owner == nullptr && size <= 8) { + FlushRegion(cached_surface->addr, cached_surface->size, cached_surface); + remove_surfaces.emplace(cached_surface); + continue; + } + + const auto interval = cached_surface->GetInterval() & invalid_interval; + cached_surface->invalid_regions.insert(interval); + + // Remove only "empty" fill surfaces to avoid destroying and recreating OGL textures + if (cached_surface->type == SurfaceType::Fill && + cached_surface->IsSurfaceFullyInvalid()) { + remove_surfaces.emplace(cached_surface); + } + } + } + + if (region_owner != nullptr) + dirty_regions.set({invalid_interval, region_owner}); + else + dirty_regions.erase(invalid_interval); + + for (auto& remove_surface : remove_surfaces) { + if (remove_surface == region_owner) { + Surface expanded_surface = FindMatch<MatchFlags::SubRect | MatchFlags::Invalid>( + surface_cache, *region_owner, ScaleMatch::Ignore); + ASSERT(expanded_surface); + + if ((region_owner->invalid_regions - expanded_surface->invalid_regions).empty()) { + DuplicateSurface(region_owner, expanded_surface); + } else { + continue; + } + } + UnregisterSurface(remove_surface); + } + + remove_surfaces.clear(); +} + +Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) { + Surface surface = std::make_shared<CachedSurface>(); + static_cast<SurfaceParams&>(*surface) = params; + + surface->texture.Create(); + + surface->gl_buffer_size = 0; + surface->invalid_regions.insert(surface->GetInterval()); + AllocateSurfaceTexture(surface->texture.handle, GetFormatTuple(surface->pixel_format), + surface->GetScaledWidth(), surface->GetScaledHeight()); + + return surface; +} + +void RasterizerCacheOpenGL::RegisterSurface(const Surface& surface) { + if (surface->registered) { + return; + } + surface->registered = true; + surface_cache.add({surface->GetInterval(), SurfaceSet{surface}}); + UpdatePagesCachedCount(surface->addr, surface->size, 1); +} + +void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { + if (!surface->registered) { + return; + } + surface->registered = false; + UpdatePagesCachedCount(surface->addr, surface->size, -1); + surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); +} + +void RasterizerCacheOpenGL::UpdatePagesCachedCount(PAddr addr, u64 size, int delta) { + UNIMPLEMENTED(); +} diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h new file mode 100644 index 000000000..17ce0fee7 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -0,0 +1,350 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <memory> +#include <set> +#include <tuple> +#ifdef __GNUC__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-local-typedefs" +#endif +#include <boost/icl/interval_map.hpp> +#include <boost/icl/interval_set.hpp> +#ifdef __GNUC__ +#pragma GCC diagnostic pop +#endif +#include <glad/glad.h> +#include "common/assert.h" +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "common/math_util.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" + +struct CachedSurface; +using Surface = std::shared_ptr<CachedSurface>; +using SurfaceSet = std::set<Surface>; + +using SurfaceRegions = boost::icl::interval_set<PAddr>; +using SurfaceMap = boost::icl::interval_map<PAddr, Surface>; +using SurfaceCache = boost::icl::interval_map<PAddr, SurfaceSet>; + +using SurfaceInterval = SurfaceCache::interval_type; +static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval_type>() && + std::is_same<SurfaceMap::interval_type, SurfaceCache::interval_type>(), + "incorrect interval types"); + +using SurfaceRect_Tuple = std::tuple<Surface, MathUtil::Rectangle<u32>>; +using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; + +using PageMap = boost::icl::interval_map<u32, int>; + +enum class ScaleMatch { + Exact, // only accept same res scale + Upscale, // only allow higher scale than params + Ignore // accept every scaled res +}; + +struct SurfaceParams { + enum class PixelFormat { + // First 5 formats are shared between textures and color buffers + RGBA8 = 0, + RGB8 = 1, + RGB5A1 = 2, + RGB565 = 3, + RGBA4 = 4, + + // Texture-only formats + IA8 = 5, + RG8 = 6, + I8 = 7, + A8 = 8, + IA4 = 9, + I4 = 10, + A4 = 11, + ETC1 = 12, + ETC1A4 = 13, + + // Depth buffer-only formats + D16 = 14, + // gap + D24 = 16, + D24S8 = 17, + + Invalid = 255, + }; + + enum class SurfaceType { + Color = 0, + Texture = 1, + Depth = 2, + DepthStencil = 3, + Fill = 4, + Invalid = 5 + }; + + static constexpr unsigned int GetFormatBpp(PixelFormat format) { + constexpr std::array<unsigned int, 18> bpp_table = { + 32, // RGBA8 + 24, // RGB8 + 16, // RGB5A1 + 16, // RGB565 + 16, // RGBA4 + 16, // IA8 + 16, // RG8 + 8, // I8 + 8, // A8 + 8, // IA4 + 4, // I4 + 4, // A4 + 4, // ETC1 + 8, // ETC1A4 + 16, // D16 + 0, + 24, // D24 + 32, // D24S8 + }; + + assert(static_cast<size_t>(format) < bpp_table.size()); + return bpp_table[static_cast<size_t>(format)]; + } + unsigned int GetFormatBpp() const { + return GetFormatBpp(pixel_format); + } + + static bool CheckFormatsBlittable(PixelFormat pixel_format_a, PixelFormat pixel_format_b) { + SurfaceType a_type = GetFormatType(pixel_format_a); + SurfaceType b_type = GetFormatType(pixel_format_b); + + if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) && + (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) { + return true; + } + + if (a_type == SurfaceType::Depth && b_type == SurfaceType::Depth) { + return true; + } + + if (a_type == SurfaceType::DepthStencil && b_type == SurfaceType::DepthStencil) { + return true; + } + + return false; + } + + static constexpr SurfaceType GetFormatType(PixelFormat pixel_format) { + if ((unsigned int)pixel_format < 5) { + return SurfaceType::Color; + } + + if ((unsigned int)pixel_format < 14) { + return SurfaceType::Texture; + } + + if (pixel_format == PixelFormat::D16 || pixel_format == PixelFormat::D24) { + return SurfaceType::Depth; + } + + if (pixel_format == PixelFormat::D24S8) { + return SurfaceType::DepthStencil; + } + + return SurfaceType::Invalid; + } + + /// Update the params "size", "end" and "type" from the already set "addr", "width", "height" + /// and "pixel_format" + void UpdateParams() { + if (stride == 0) { + stride = width; + } + type = GetFormatType(pixel_format); + size = !is_tiled ? BytesInPixels(stride * (height - 1) + width) + : BytesInPixels(stride * 8 * (height / 8 - 1) + width * 8); + end = addr + size; + } + + SurfaceInterval GetInterval() const { + return SurfaceInterval::right_open(addr, end); + } + + // Returns the outer rectangle containing "interval" + SurfaceParams FromInterval(SurfaceInterval interval) const; + + SurfaceInterval GetSubRectInterval(MathUtil::Rectangle<u32> unscaled_rect) const; + + // Returns the region of the biggest valid rectange within interval + SurfaceInterval GetCopyableInterval(const Surface& src_surface) const; + + u32 GetScaledWidth() const { + return width * res_scale; + } + + u32 GetScaledHeight() const { + return height * res_scale; + } + + MathUtil::Rectangle<u32> GetRect() const { + return {0, height, width, 0}; + } + + MathUtil::Rectangle<u32> GetScaledRect() const { + return {0, GetScaledHeight(), GetScaledWidth(), 0}; + } + + u64 PixelsInBytes(u64 size) const { + return size * CHAR_BIT / GetFormatBpp(pixel_format); + } + + u64 BytesInPixels(u64 pixels) const { + return pixels * GetFormatBpp(pixel_format) / CHAR_BIT; + } + + bool ExactMatch(const SurfaceParams& other_surface) const; + bool CanSubRect(const SurfaceParams& sub_surface) const; + bool CanExpand(const SurfaceParams& expanded_surface) const; + bool CanTexCopy(const SurfaceParams& texcopy_params) const; + + MathUtil::Rectangle<u32> GetSubRect(const SurfaceParams& sub_surface) const; + MathUtil::Rectangle<u32> GetScaledSubRect(const SurfaceParams& sub_surface) const; + + PAddr addr = 0; + PAddr end = 0; + u64 size = 0; + + u32 width = 0; + u32 height = 0; + u32 stride = 0; + u16 res_scale = 1; + + bool is_tiled = false; + PixelFormat pixel_format = PixelFormat::Invalid; + SurfaceType type = SurfaceType::Invalid; +}; + +struct CachedSurface : SurfaceParams { + bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const; + bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const; + + bool IsRegionValid(SurfaceInterval interval) const { + return (invalid_regions.find(interval) == invalid_regions.end()); + } + + bool IsSurfaceFullyInvalid() const { + return (invalid_regions & GetInterval()) == SurfaceRegions(GetInterval()); + } + + bool registered = false; + SurfaceRegions invalid_regions; + + u64 fill_size = 0; /// Number of bytes to read from fill_data + std::array<u8, 4> fill_data; + + OGLTexture texture; + + static constexpr unsigned int GetGLBytesPerPixel(PixelFormat format) { + // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type + return format == PixelFormat::Invalid + ? 0 + : (format == PixelFormat::D24 || GetFormatType(format) == SurfaceType::Texture) + ? 4 + : SurfaceParams::GetFormatBpp(format) / 8; + } + + std::unique_ptr<u8[]> gl_buffer; + size_t gl_buffer_size = 0; + + // Read/Write data in 3DS memory to/from gl_buffer + void LoadGLBuffer(PAddr load_start, PAddr load_end); + void FlushGLBuffer(PAddr flush_start, PAddr flush_end); + + // Upload/Download data in gl_buffer in/to this surface's texture + void UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, + GLuint draw_fb_handle); + void DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, + GLuint draw_fb_handle); +}; + +class RasterizerCacheOpenGL : NonCopyable { +public: + RasterizerCacheOpenGL(); + ~RasterizerCacheOpenGL(); + + /// Blit one surface's texture to another + bool BlitSurfaces(const Surface& src_surface, const MathUtil::Rectangle<u32>& src_rect, + const Surface& dst_surface, const MathUtil::Rectangle<u32>& dst_rect); + + void ConvertD24S8toABGR(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, + GLuint dst_tex, const MathUtil::Rectangle<u32>& dst_rect); + + /// Copy one surface's region to another + void CopySurface(const Surface& src_surface, const Surface& dst_surface, + SurfaceInterval copy_interval); + + /// Load a texture from 3DS memory to OpenGL and cache it (if not already cached) + Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create); + + /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from + /// 3DS memory to OpenGL and caches it (if not already cached) + SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create); + + /// Get a surface based on the texture configuration + Surface GetTextureSurface(const void* config); + + /// Get the color and depth surfaces based on the framebuffer configuration + SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, + const MathUtil::Rectangle<s32>& viewport_rect); + + /// Get a surface that matches the fill config + Surface GetFillSurface(const void* config); + + /// Get a surface that matches a "texture copy" display transfer config + SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); + + /// Write any cached resources overlapping the region back to memory (if dirty) + void FlushRegion(PAddr addr, u64 size, Surface flush_surface = nullptr); + + /// Mark region as being invalidated by region_owner (nullptr if 3DS memory) + void InvalidateRegion(PAddr addr, u64 size, const Surface& region_owner); + + /// Flush all cached resources tracked by this cache manager + void FlushAll(); + +private: + void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface); + + /// Update surface's texture for given region when necessary + void ValidateSurface(const Surface& surface, PAddr addr, u64 size); + + /// Create a new surface + Surface CreateSurface(const SurfaceParams& params); + + /// Register surface into the cache + void RegisterSurface(const Surface& surface); + + /// Remove surface from the cache + void UnregisterSurface(const Surface& surface); + + /// Increase/decrease the number of surface in pages touching the specified region + void UpdatePagesCachedCount(PAddr addr, u64 size, int delta); + + SurfaceCache surface_cache; + PageMap cached_pages; + SurfaceMap dirty_regions; + SurfaceSet remove_surfaces; + + OGLFramebuffer read_framebuffer; + OGLFramebuffer draw_framebuffer; + + OGLVertexArray attributeless_vao; + OGLBuffer d24s8_abgr_buffer; + GLsizeiptr d24s8_abgr_buffer_size; + OGLShader d24s8_abgr_shader; + GLint d24s8_abgr_tbo_size_u_id; + GLint d24s8_abgr_viewport_u_id; +}; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 13301ec9f..7da5e74d1 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -36,7 +36,7 @@ public: if (handle == 0) return; glDeleteTextures(1, &handle); - OpenGLState::ResetTexture(handle); + OpenGLState::GetCurState().ResetTexture(handle).Apply(); handle = 0; } @@ -69,7 +69,7 @@ public: if (handle == 0) return; glDeleteSamplers(1, &handle); - OpenGLState::ResetSampler(handle); + OpenGLState::GetCurState().ResetSampler(handle).Apply(); handle = 0; } @@ -91,10 +91,13 @@ public: } /// Creates a new internal OpenGL resource and stores the handle - void Create(const char* vert_shader, const char* frag_shader) { + void Create(const char* vert_shader, const char* geo_shader, const char* frag_shader, + const std::vector<const char*>& feedback_vars = {}, + bool separable_program = false) { if (handle != 0) return; - handle = GLShader::LoadProgram(vert_shader, frag_shader); + handle = GLShader::LoadProgram(vert_shader, geo_shader, frag_shader, feedback_vars, + separable_program); } /// Deletes the internal OpenGL resource @@ -102,7 +105,40 @@ public: if (handle == 0) return; glDeleteProgram(handle); - OpenGLState::ResetProgram(handle); + OpenGLState::GetCurState().ResetProgram(handle).Apply(); + handle = 0; + } + + GLuint handle = 0; +}; + +class OGLPipeline : private NonCopyable { +public: + OGLPipeline() = default; + OGLPipeline(OGLPipeline&& o) { + handle = std::exchange<GLuint>(o.handle, 0); + } + ~OGLPipeline() { + Release(); + } + OGLPipeline& operator=(OGLPipeline&& o) { + handle = std::exchange<GLuint>(o.handle, 0); + return *this; + } + + /// Creates a new internal OpenGL resource and stores the handle + void Create() { + if (handle != 0) + return; + glGenProgramPipelines(1, &handle); + } + + /// Deletes the internal OpenGL resource + void Release() { + if (handle == 0) + return; + glDeleteProgramPipelines(1, &handle); + OpenGLState::GetCurState().ResetPipeline(handle).Apply(); handle = 0; } @@ -135,13 +171,46 @@ public: if (handle == 0) return; glDeleteBuffers(1, &handle); - OpenGLState::ResetBuffer(handle); + OpenGLState::GetCurState().ResetBuffer(handle).Apply(); handle = 0; } GLuint handle = 0; }; +class OGLSync : private NonCopyable { +public: + OGLSync() = default; + + OGLSync(OGLSync&& o) : handle(std::exchange(o.handle, nullptr)) {} + + ~OGLSync() { + Release(); + } + OGLSync& operator=(OGLSync&& o) { + Release(); + handle = std::exchange(o.handle, nullptr); + return *this; + } + + /// Creates a new internal OpenGL resource and stores the handle + void Create() { + if (handle != 0) + return; + handle = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + } + + /// Deletes the internal OpenGL resource + void Release() { + if (handle == 0) + return; + glDeleteSync(handle); + handle = 0; + } + + GLsync handle = 0; +}; + class OGLVertexArray : private NonCopyable { public: OGLVertexArray() = default; @@ -168,7 +237,7 @@ public: if (handle == 0) return; glDeleteVertexArrays(1, &handle); - OpenGLState::ResetVertexArray(handle); + OpenGLState::GetCurState().ResetVertexArray(handle).Apply(); handle = 0; } @@ -201,7 +270,7 @@ public: if (handle == 0) return; glDeleteFramebuffers(1, &handle); - OpenGLState::ResetFramebuffer(handle); + OpenGLState::GetCurState().ResetFramebuffer(handle).Apply(); handle = 0; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp new file mode 100644 index 000000000..0e0ef18cc --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -0,0 +1,58 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <string> +#include <queue> +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/renderer_opengl/gl_shader_decompiler.h" + +namespace Maxwell3D { +namespace Shader { +namespace Decompiler { + +constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH; + +class Impl { +public: + Impl(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code, + const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, u32 main_offset, + const std::function<std::string(u32)>& inputreg_getter, + const std::function<std::string(u32)>& outputreg_getter, bool sanitize_mul, + const std::string& emit_cb, const std::string& setemit_cb) + : program_code(program_code), swizzle_data(swizzle_data), main_offset(main_offset), + inputreg_getter(inputreg_getter), outputreg_getter(outputreg_getter), + sanitize_mul(sanitize_mul), emit_cb(emit_cb), setemit_cb(setemit_cb) {} + + std::string Decompile() { + UNIMPLEMENTED(); + return {}; + } + +private: + const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code; + const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data; + u32 main_offset; + const std::function<std::string(u32)>& inputreg_getter; + const std::function<std::string(u32)>& outputreg_getter; + bool sanitize_mul; + const std::string& emit_cb; + const std::string& setemit_cb; +}; + +std::string DecompileProgram(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code, + const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, + u32 main_offset, + const std::function<std::string(u32)>& inputreg_getter, + const std::function<std::string(u32)>& outputreg_getter, + bool sanitize_mul, const std::string& emit_cb, + const std::string& setemit_cb) { + Impl impl(program_code, swizzle_data, main_offset, inputreg_getter, outputreg_getter, + sanitize_mul, emit_cb, setemit_cb); + return impl.Decompile(); +} + +} // namespace Decompiler +} // namespace Shader +} // namespace Maxwell3D diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h new file mode 100644 index 000000000..02ebfcbe8 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -0,0 +1,27 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> +#include <functional> +#include <string> +#include "common/common_types.h" + +namespace Maxwell3D { +namespace Shader { +namespace Decompiler { + +constexpr size_t MAX_PROGRAM_CODE_LENGTH{0x100000}; +constexpr size_t MAX_SWIZZLE_DATA_LENGTH{0x100000}; + +std::string DecompileProgram(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>& program_code, + const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>& swizzle_data, + u32 main_offset, + const std::function<std::string(u32)>& inputreg_getter, + const std::function<std::string(u32)>& outputreg_getter, + bool sanitize_mul, const std::string& emit_cb = "", + const std::string& setemit_cb = ""); + +} // namespace Decompiler +} // namespace Shader +} // namespace Maxwell3D diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp new file mode 100644 index 000000000..f242bce1d --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -0,0 +1,20 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "video_core/renderer_opengl/gl_shader_gen.h" + +namespace GLShader { + +std::string GenerateVertexShader(const MaxwellVSConfig& config) { + UNIMPLEMENTED(); + return {}; +} + +std::string GenerateFragmentShader(const MaxwellFSConfig& config) { + UNIMPLEMENTED(); + return {}; +} + +} // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h new file mode 100644 index 000000000..5101e7d30 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -0,0 +1,66 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstring> +#include <string> +#include <type_traits> +#include "common/hash.h" + +namespace GLShader { + +enum Attributes { + ATTRIBUTE_POSITION, + ATTRIBUTE_COLOR, + ATTRIBUTE_TEXCOORD0, + ATTRIBUTE_TEXCOORD1, + ATTRIBUTE_TEXCOORD2, + ATTRIBUTE_TEXCOORD0_W, + ATTRIBUTE_NORMQUAT, + ATTRIBUTE_VIEW, +}; + +struct MaxwellShaderConfigCommon { + explicit MaxwellShaderConfigCommon(){}; +}; + +struct MaxwellVSConfig : MaxwellShaderConfigCommon { + explicit MaxwellVSConfig() : MaxwellShaderConfigCommon() {} + + bool operator==(const MaxwellVSConfig& o) const { + return std::memcmp(this, &o, sizeof(MaxwellVSConfig)) == 0; + }; +}; + +struct MaxwellFSConfig : MaxwellShaderConfigCommon { + explicit MaxwellFSConfig() : MaxwellShaderConfigCommon() {} + + bool operator==(const MaxwellFSConfig& o) const { + return std::memcmp(this, &o, sizeof(MaxwellFSConfig)) == 0; + }; +}; + +std::string GenerateVertexShader(const MaxwellVSConfig& config); +std::string GenerateFragmentShader(const MaxwellFSConfig& config); + +} // namespace GLShader + +namespace std { + +template <> +struct hash<GLShader::MaxwellVSConfig> { + size_t operator()(const GLShader::MaxwellVSConfig& k) const { + return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellVSConfig)); + } +}; + +template <> +struct hash<GLShader::MaxwellFSConfig> { + size_t operator()(const GLShader::MaxwellFSConfig& k) const { + return Common::ComputeHash64(&k, sizeof(GLShader::MaxwellFSConfig)); + } +}; + +} // namespace std diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 4da241d83..a3ba16761 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -10,53 +10,85 @@ namespace GLShader { -GLuint LoadProgram(const char* vertex_shader, const char* fragment_shader) { - +GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, + const char* fragment_shader, const std::vector<const char*>& feedback_vars, + bool separable_program) { // Create the shaders - GLuint vertex_shader_id = glCreateShader(GL_VERTEX_SHADER); - GLuint fragment_shader_id = glCreateShader(GL_FRAGMENT_SHADER); + GLuint vertex_shader_id = vertex_shader ? glCreateShader(GL_VERTEX_SHADER) : 0; + GLuint geometry_shader_id = geometry_shader ? glCreateShader(GL_GEOMETRY_SHADER) : 0; + GLuint fragment_shader_id = fragment_shader ? glCreateShader(GL_FRAGMENT_SHADER) : 0; GLint result = GL_FALSE; int info_log_length; - // Compile Vertex Shader - LOG_DEBUG(Render_OpenGL, "Compiling vertex shader..."); - - glShaderSource(vertex_shader_id, 1, &vertex_shader, nullptr); - glCompileShader(vertex_shader_id); - - // Check Vertex Shader - glGetShaderiv(vertex_shader_id, GL_COMPILE_STATUS, &result); - glGetShaderiv(vertex_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); - - if (info_log_length > 1) { - std::vector<char> vertex_shader_error(info_log_length); - glGetShaderInfoLog(vertex_shader_id, info_log_length, nullptr, &vertex_shader_error[0]); - if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "%s", &vertex_shader_error[0]); - } else { - LOG_ERROR(Render_OpenGL, "Error compiling vertex shader:\n%s", &vertex_shader_error[0]); + if (vertex_shader) { + // Compile Vertex Shader + LOG_DEBUG(Render_OpenGL, "Compiling vertex shader..."); + + glShaderSource(vertex_shader_id, 1, &vertex_shader, nullptr); + glCompileShader(vertex_shader_id); + + // Check Vertex Shader + glGetShaderiv(vertex_shader_id, GL_COMPILE_STATUS, &result); + glGetShaderiv(vertex_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); + + if (info_log_length > 1) { + std::vector<char> vertex_shader_error(info_log_length); + glGetShaderInfoLog(vertex_shader_id, info_log_length, nullptr, &vertex_shader_error[0]); + if (result == GL_TRUE) { + LOG_DEBUG(Render_OpenGL, "%s", &vertex_shader_error[0]); + } else { + LOG_ERROR(Render_OpenGL, "Error compiling vertex shader:\n%s", + &vertex_shader_error[0]); + } } } - // Compile Fragment Shader - LOG_DEBUG(Render_OpenGL, "Compiling fragment shader..."); - - glShaderSource(fragment_shader_id, 1, &fragment_shader, nullptr); - glCompileShader(fragment_shader_id); - - // Check Fragment Shader - glGetShaderiv(fragment_shader_id, GL_COMPILE_STATUS, &result); - glGetShaderiv(fragment_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); + if (geometry_shader) { + // Compile Geometry Shader + LOG_DEBUG(Render_OpenGL, "Compiling geometry shader..."); + + glShaderSource(geometry_shader_id, 1, &geometry_shader, nullptr); + glCompileShader(geometry_shader_id); + + // Check Geometry Shader + glGetShaderiv(geometry_shader_id, GL_COMPILE_STATUS, &result); + glGetShaderiv(geometry_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); + + if (info_log_length > 1) { + std::vector<char> geometry_shader_error(info_log_length); + glGetShaderInfoLog(geometry_shader_id, info_log_length, nullptr, + &geometry_shader_error[0]); + if (result == GL_TRUE) { + LOG_DEBUG(Render_OpenGL, "%s", &geometry_shader_error[0]); + } else { + LOG_ERROR(Render_OpenGL, "Error compiling geometry shader:\n%s", + &geometry_shader_error[0]); + } + } + } - if (info_log_length > 1) { - std::vector<char> fragment_shader_error(info_log_length); - glGetShaderInfoLog(fragment_shader_id, info_log_length, nullptr, &fragment_shader_error[0]); - if (result == GL_TRUE) { - LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]); - } else { - LOG_ERROR(Render_OpenGL, "Error compiling fragment shader:\n%s", - &fragment_shader_error[0]); + if (fragment_shader) { + // Compile Fragment Shader + LOG_DEBUG(Render_OpenGL, "Compiling fragment shader..."); + + glShaderSource(fragment_shader_id, 1, &fragment_shader, nullptr); + glCompileShader(fragment_shader_id); + + // Check Fragment Shader + glGetShaderiv(fragment_shader_id, GL_COMPILE_STATUS, &result); + glGetShaderiv(fragment_shader_id, GL_INFO_LOG_LENGTH, &info_log_length); + + if (info_log_length > 1) { + std::vector<char> fragment_shader_error(info_log_length); + glGetShaderInfoLog(fragment_shader_id, info_log_length, nullptr, + &fragment_shader_error[0]); + if (result == GL_TRUE) { + LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]); + } else { + LOG_ERROR(Render_OpenGL, "Error compiling fragment shader:\n%s", + &fragment_shader_error[0]); + } } } @@ -64,8 +96,25 @@ GLuint LoadProgram(const char* vertex_shader, const char* fragment_shader) { LOG_DEBUG(Render_OpenGL, "Linking program..."); GLuint program_id = glCreateProgram(); - glAttachShader(program_id, vertex_shader_id); - glAttachShader(program_id, fragment_shader_id); + if (vertex_shader) { + glAttachShader(program_id, vertex_shader_id); + } + if (geometry_shader) { + glAttachShader(program_id, geometry_shader_id); + } + if (fragment_shader) { + glAttachShader(program_id, fragment_shader_id); + } + + if (!feedback_vars.empty()) { + auto varyings = feedback_vars; + glTransformFeedbackVaryings(program_id, static_cast<GLsizei>(feedback_vars.size()), + &varyings[0], GL_INTERLEAVED_ATTRIBS); + } + + if (separable_program) { + glProgramParameteri(program_id, GL_PROGRAM_SEPARABLE, GL_TRUE); + } glLinkProgram(program_id); @@ -85,13 +134,30 @@ GLuint LoadProgram(const char* vertex_shader, const char* fragment_shader) { // If the program linking failed at least one of the shaders was probably bad if (result == GL_FALSE) { - LOG_ERROR(Render_OpenGL, "Vertex shader:\n%s", vertex_shader); - LOG_ERROR(Render_OpenGL, "Fragment shader:\n%s", fragment_shader); + if (vertex_shader) { + LOG_ERROR(Render_OpenGL, "Vertex shader:\n%s", vertex_shader); + } + if (geometry_shader) { + LOG_ERROR(Render_OpenGL, "Geometry shader:\n%s", geometry_shader); + } + if (fragment_shader) { + LOG_ERROR(Render_OpenGL, "Fragment shader:\n%s", fragment_shader); + } } ASSERT_MSG(result == GL_TRUE, "Shader not linked"); - glDeleteShader(vertex_shader_id); - glDeleteShader(fragment_shader_id); + if (vertex_shader) { + glDetachShader(program_id, vertex_shader_id); + glDeleteShader(vertex_shader_id); + } + if (geometry_shader) { + glDetachShader(program_id, geometry_shader_id); + glDeleteShader(geometry_shader_id); + } + if (fragment_shader) { + glDetachShader(program_id, fragment_shader_id); + glDeleteShader(fragment_shader_id); + } return program_id; } diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index a4bcffdfa..fc7b5e080 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -4,6 +4,7 @@ #pragma once +#include <vector> #include <glad/glad.h> namespace GLShader { @@ -11,9 +12,12 @@ namespace GLShader { /** * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader) * @param vertex_shader String of the GLSL vertex shader program + * @param geometry_shader String of the GLSL geometry shader program * @param fragment_shader String of the GLSL fragment shader program * @returns Handle of the newly created OpenGL shader object */ -GLuint LoadProgram(const char* vertex_shader, const char* fragment_shader); +GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, + const char* fragment_shader, const std::vector<const char*>& feedback_vars = {}, + bool separable_program = false); } // namespace GLShader diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 5770ae08f..1d396728b 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -33,7 +33,7 @@ OpenGLState::OpenGLState() { stencil.action_depth_pass = GL_KEEP; stencil.action_stencil_fail = GL_KEEP; - blend.enabled = false; + blend.enabled = true; blend.rgb_equation = GL_FUNC_ADD; blend.a_equation = GL_FUNC_ADD; blend.src_rgb_func = GL_ONE; @@ -68,6 +68,18 @@ OpenGLState::OpenGLState() { draw.vertex_buffer = 0; draw.uniform_buffer = 0; draw.shader_program = 0; + draw.program_pipeline = 0; + + scissor.enabled = false; + scissor.x = 0; + scissor.y = 0; + scissor.width = 0; + scissor.height = 0; + + viewport.x = 0; + viewport.y = 0; + viewport.width = 0; + viewport.height = 0; clip_distance = {}; } @@ -148,9 +160,6 @@ void OpenGLState::Apply() const { if (blend.enabled != cur_state.blend.enabled) { if (blend.enabled) { glEnable(GL_BLEND); - - cur_state.logic_op = GL_COPY; - glLogicOp(cur_state.logic_op); glDisable(GL_COLOR_LOGIC_OP); } else { glDisable(GL_BLEND); @@ -196,7 +205,7 @@ void OpenGLState::Apply() const { // Lighting LUTs if (lighting_lut.texture_buffer != cur_state.lighting_lut.texture_buffer) { glActiveTexture(TextureUnits::LightingLUT.Enum()); - glBindTexture(GL_TEXTURE_BUFFER, cur_state.lighting_lut.texture_buffer); + glBindTexture(GL_TEXTURE_BUFFER, lighting_lut.texture_buffer); } // Fog LUT @@ -263,6 +272,31 @@ void OpenGLState::Apply() const { glUseProgram(draw.shader_program); } + // Program pipeline + if (draw.program_pipeline != cur_state.draw.program_pipeline) { + glBindProgramPipeline(draw.program_pipeline); + } + + // Scissor test + if (scissor.enabled != cur_state.scissor.enabled) { + if (scissor.enabled) { + glEnable(GL_SCISSOR_TEST); + } else { + glDisable(GL_SCISSOR_TEST); + } + } + + if (scissor.x != cur_state.scissor.x || scissor.y != cur_state.scissor.y || + scissor.width != cur_state.scissor.width || scissor.height != cur_state.scissor.height) { + glScissor(scissor.x, scissor.y, scissor.width, scissor.height); + } + + if (viewport.x != cur_state.viewport.x || viewport.y != cur_state.viewport.y || + viewport.width != cur_state.viewport.width || + viewport.height != cur_state.viewport.height) { + glViewport(viewport.x, viewport.y, viewport.width, viewport.height); + } + // Clip distance for (size_t i = 0; i < clip_distance.size(); ++i) { if (clip_distance[i] != cur_state.clip_distance[i]) { @@ -277,62 +311,75 @@ void OpenGLState::Apply() const { cur_state = *this; } -void OpenGLState::ResetTexture(GLuint handle) { - for (auto& unit : cur_state.texture_units) { +OpenGLState& OpenGLState::ResetTexture(GLuint handle) { + for (auto& unit : texture_units) { if (unit.texture_2d == handle) { unit.texture_2d = 0; } } - if (cur_state.lighting_lut.texture_buffer == handle) - cur_state.lighting_lut.texture_buffer = 0; - if (cur_state.fog_lut.texture_buffer == handle) - cur_state.fog_lut.texture_buffer = 0; - if (cur_state.proctex_noise_lut.texture_buffer == handle) - cur_state.proctex_noise_lut.texture_buffer = 0; - if (cur_state.proctex_color_map.texture_buffer == handle) - cur_state.proctex_color_map.texture_buffer = 0; - if (cur_state.proctex_alpha_map.texture_buffer == handle) - cur_state.proctex_alpha_map.texture_buffer = 0; - if (cur_state.proctex_lut.texture_buffer == handle) - cur_state.proctex_lut.texture_buffer = 0; - if (cur_state.proctex_diff_lut.texture_buffer == handle) - cur_state.proctex_diff_lut.texture_buffer = 0; + if (lighting_lut.texture_buffer == handle) + lighting_lut.texture_buffer = 0; + if (fog_lut.texture_buffer == handle) + fog_lut.texture_buffer = 0; + if (proctex_noise_lut.texture_buffer == handle) + proctex_noise_lut.texture_buffer = 0; + if (proctex_color_map.texture_buffer == handle) + proctex_color_map.texture_buffer = 0; + if (proctex_alpha_map.texture_buffer == handle) + proctex_alpha_map.texture_buffer = 0; + if (proctex_lut.texture_buffer == handle) + proctex_lut.texture_buffer = 0; + if (proctex_diff_lut.texture_buffer == handle) + proctex_diff_lut.texture_buffer = 0; + return *this; } -void OpenGLState::ResetSampler(GLuint handle) { - for (auto& unit : cur_state.texture_units) { +OpenGLState& OpenGLState::ResetSampler(GLuint handle) { + for (auto& unit : texture_units) { if (unit.sampler == handle) { unit.sampler = 0; } } + return *this; +} + +OpenGLState& OpenGLState::ResetProgram(GLuint handle) { + if (draw.shader_program == handle) { + draw.shader_program = 0; + } + return *this; } -void OpenGLState::ResetProgram(GLuint handle) { - if (cur_state.draw.shader_program == handle) { - cur_state.draw.shader_program = 0; +OpenGLState& OpenGLState::ResetPipeline(GLuint handle) { + if (draw.program_pipeline == handle) { + draw.program_pipeline = 0; } + return *this; } -void OpenGLState::ResetBuffer(GLuint handle) { - if (cur_state.draw.vertex_buffer == handle) { - cur_state.draw.vertex_buffer = 0; +OpenGLState& OpenGLState::ResetBuffer(GLuint handle) { + if (draw.vertex_buffer == handle) { + draw.vertex_buffer = 0; } - if (cur_state.draw.uniform_buffer == handle) { - cur_state.draw.uniform_buffer = 0; + if (draw.uniform_buffer == handle) { + draw.uniform_buffer = 0; } + return *this; } -void OpenGLState::ResetVertexArray(GLuint handle) { - if (cur_state.draw.vertex_array == handle) { - cur_state.draw.vertex_array = 0; +OpenGLState& OpenGLState::ResetVertexArray(GLuint handle) { + if (draw.vertex_array == handle) { + draw.vertex_array = 0; } + return *this; } -void OpenGLState::ResetFramebuffer(GLuint handle) { - if (cur_state.draw.read_framebuffer == handle) { - cur_state.draw.read_framebuffer = 0; +OpenGLState& OpenGLState::ResetFramebuffer(GLuint handle) { + if (draw.read_framebuffer == handle) { + draw.read_framebuffer = 0; } - if (cur_state.draw.draw_framebuffer == handle) { - cur_state.draw.draw_framebuffer = 0; + if (draw.draw_framebuffer == handle) { + draw.draw_framebuffer = 0; } + return *this; } diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 437fe34c4..940575dfa 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -122,27 +122,44 @@ public: GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING GLuint shader_program; // GL_CURRENT_PROGRAM + GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING } draw; + struct { + bool enabled; // GL_SCISSOR_TEST + GLint x; + GLint y; + GLsizei width; + GLsizei height; + } scissor; + + struct { + GLint x; + GLint y; + GLsizei width; + GLsizei height; + } viewport; + std::array<bool, 2> clip_distance; // GL_CLIP_DISTANCE OpenGLState(); /// Get the currently active OpenGL state - static const OpenGLState& GetCurState() { + static OpenGLState GetCurState() { return cur_state; } /// Apply this state as the current OpenGL state void Apply() const; - /// Resets and unbinds any references to the given resource in the current OpenGL state - static void ResetTexture(GLuint handle); - static void ResetSampler(GLuint handle); - static void ResetProgram(GLuint handle); - static void ResetBuffer(GLuint handle); - static void ResetVertexArray(GLuint handle); - static void ResetFramebuffer(GLuint handle); + /// Resets any references to the given resource + OpenGLState& ResetTexture(GLuint handle); + OpenGLState& ResetSampler(GLuint handle); + OpenGLState& ResetProgram(GLuint handle); + OpenGLState& ResetPipeline(GLuint handle); + OpenGLState& ResetBuffer(GLuint handle); + OpenGLState& ResetVertexArray(GLuint handle); + OpenGLState& ResetFramebuffer(GLuint handle); private: static OpenGLState cur_state; diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp new file mode 100644 index 000000000..a2713e9f0 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -0,0 +1,182 @@ +// Copyright 2018 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <deque> +#include <vector> +#include "common/alignment.h" +#include "common/assert.h" +#include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/gl_stream_buffer.h" + +class OrphanBuffer : public OGLStreamBuffer { +public: + explicit OrphanBuffer(GLenum target) : OGLStreamBuffer(target) {} + ~OrphanBuffer() override; + +private: + void Create(size_t size, size_t sync_subdivide) override; + void Release() override; + + std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override; + void Unmap() override; + + std::vector<u8> data; +}; + +class StorageBuffer : public OGLStreamBuffer { +public: + explicit StorageBuffer(GLenum target) : OGLStreamBuffer(target) {} + ~StorageBuffer() override; + +private: + void Create(size_t size, size_t sync_subdivide) override; + void Release() override; + + std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override; + void Unmap() override; + + struct Fence { + OGLSync sync; + size_t offset; + }; + std::deque<Fence> head; + std::deque<Fence> tail; + + u8* mapped_ptr; +}; + +OGLStreamBuffer::OGLStreamBuffer(GLenum target) { + gl_target = target; +} + +GLuint OGLStreamBuffer::GetHandle() const { + return gl_buffer.handle; +} + +std::unique_ptr<OGLStreamBuffer> OGLStreamBuffer::MakeBuffer(bool storage_buffer, GLenum target) { + if (storage_buffer) { + return std::make_unique<StorageBuffer>(target); + } + return std::make_unique<OrphanBuffer>(target); +} + +OrphanBuffer::~OrphanBuffer() { + Release(); +} + +void OrphanBuffer::Create(size_t size, size_t /*sync_subdivide*/) { + buffer_pos = 0; + buffer_size = size; + data.resize(buffer_size); + + if (gl_buffer.handle == 0) { + gl_buffer.Create(); + glBindBuffer(gl_target, gl_buffer.handle); + } + + glBufferData(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr, GL_STREAM_DRAW); +} + +void OrphanBuffer::Release() { + gl_buffer.Release(); +} + +std::pair<u8*, GLintptr> OrphanBuffer::Map(size_t size, size_t alignment) { + buffer_pos = Common::AlignUp(buffer_pos, alignment); + + if (buffer_pos + size > buffer_size) { + Create(std::max(buffer_size, size), 0); + } + + mapped_size = size; + return std::make_pair(&data[buffer_pos], static_cast<GLintptr>(buffer_pos)); +} + +void OrphanBuffer::Unmap() { + glBufferSubData(gl_target, static_cast<GLintptr>(buffer_pos), + static_cast<GLsizeiptr>(mapped_size), &data[buffer_pos]); + buffer_pos += mapped_size; +} + +StorageBuffer::~StorageBuffer() { + Release(); +} + +void StorageBuffer::Create(size_t size, size_t sync_subdivide) { + if (gl_buffer.handle != 0) + return; + + buffer_pos = 0; + buffer_size = size; + buffer_sync_subdivide = std::max<size_t>(sync_subdivide, 1); + + gl_buffer.Create(); + glBindBuffer(gl_target, gl_buffer.handle); + + glBufferStorage(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr, + GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT); + mapped_ptr = reinterpret_cast<u8*>( + glMapBufferRange(gl_target, 0, static_cast<GLsizeiptr>(buffer_size), + GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_FLUSH_EXPLICIT_BIT)); +} + +void StorageBuffer::Release() { + if (gl_buffer.handle == 0) + return; + + glUnmapBuffer(gl_target); + + gl_buffer.Release(); + head.clear(); + tail.clear(); +} + +std::pair<u8*, GLintptr> StorageBuffer::Map(size_t size, size_t alignment) { + ASSERT(size <= buffer_size); + + OGLSync sync; + + buffer_pos = Common::AlignUp(buffer_pos, alignment); + size_t effective_offset = Common::AlignDown(buffer_pos, buffer_sync_subdivide); + + if (!head.empty() && + (effective_offset > head.back().offset || buffer_pos + size > buffer_size)) { + ASSERT(head.back().sync.handle == 0); + head.back().sync.Create(); + } + + if (buffer_pos + size > buffer_size) { + if (!tail.empty()) { + std::swap(sync, tail.back().sync); + tail.clear(); + } + std::swap(tail, head); + buffer_pos = 0; + effective_offset = 0; + } + + while (!tail.empty() && buffer_pos + size > tail.front().offset) { + std::swap(sync, tail.front().sync); + tail.pop_front(); + } + + if (sync.handle != 0) { + glClientWaitSync(sync.handle, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); + sync.Release(); + } + + if (head.empty() || effective_offset > head.back().offset) { + head.emplace_back(); + head.back().offset = effective_offset; + } + + mapped_size = size; + return std::make_pair(&mapped_ptr[buffer_pos], static_cast<GLintptr>(buffer_pos)); +} + +void StorageBuffer::Unmap() { + glFlushMappedBufferRange(gl_target, static_cast<GLintptr>(buffer_pos), + static_cast<GLsizeiptr>(mapped_size)); + buffer_pos += mapped_size; +} diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h new file mode 100644 index 000000000..4bc2f52e0 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -0,0 +1,34 @@ +// Copyright 2018 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <memory> +#include <glad/glad.h> +#include "common/common_types.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" + +class OGLStreamBuffer : private NonCopyable { +public: + explicit OGLStreamBuffer(GLenum target); + virtual ~OGLStreamBuffer() = default; + +public: + static std::unique_ptr<OGLStreamBuffer> MakeBuffer(bool storage_buffer, GLenum target); + + virtual void Create(size_t size, size_t sync_subdivide) = 0; + virtual void Release() {} + + GLuint GetHandle() const; + + virtual std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) = 0; + virtual void Unmap() = 0; + +protected: + OGLBuffer gl_buffer; + GLenum gl_target; + + size_t buffer_pos = 0; + size_t buffer_size = 0; + size_t buffer_sync_subdivide = 0; + size_t mapped_size = 0; +}; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 7f921fa32..65d38ade5 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -318,7 +318,7 @@ void RendererOpenGL::InitOpenGLObjects() { 0.0f); // Link shaders and get variable locations - shader.Create(vertex_shader, fragment_shader); + shader.Create(vertex_shader, nullptr, fragment_shader); state.draw.shader_program = shader.handle; state.Apply(); uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix"); |