diff options
Diffstat (limited to 'src/video_core')
22 files changed, 209 insertions, 163 deletions
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h index bc80661d8..72e2a33d5 100644 --- a/src/video_core/engines/shader_header.h +++ b/src/video_core/engines/shader_header.h @@ -4,6 +4,9 @@ #pragma once +#include <array> +#include <optional> + #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" @@ -16,7 +19,7 @@ enum class OutputTopology : u32 { TriangleStrip = 7, }; -enum class AttributeUse : u8 { +enum class PixelImap : u8 { Unused = 0, Constant = 1, Perspective = 2, @@ -24,7 +27,7 @@ enum class AttributeUse : u8 { }; // Documentation in: -// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture +// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html struct Header { union { BitField<0, 5, u32> sph_type; @@ -59,8 +62,8 @@ struct Header { union { BitField<0, 12, u32> max_output_vertices; BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. - BitField<24, 4, u32> reserved; - BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders. + BitField<20, 4, u32> reserved; + BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders. } common4{}; union { @@ -93,17 +96,20 @@ struct Header { struct { INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB + union { - BitField<0, 2, AttributeUse> x; - BitField<2, 2, AttributeUse> y; - BitField<4, 2, AttributeUse> w; - BitField<6, 2, AttributeUse> z; + BitField<0, 2, PixelImap> x; + BitField<2, 2, PixelImap> y; + BitField<4, 2, PixelImap> z; + BitField<6, 2, PixelImap> w; u8 raw; } imap_generic_vector[32]; + INSERT_UNION_PADDING_BYTES(2); // ImapColor INSERT_UNION_PADDING_BYTES(2); // ImapSystemValuesC INSERT_UNION_PADDING_BYTES(10); // ImapFixedFncTexture[10] INSERT_UNION_PADDING_BYTES(2); // ImapReserved + struct { u32 target; union { @@ -112,31 +118,30 @@ struct Header { BitField<2, 30, u32> reserved; }; } omap; + bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { const u32 bit = render_target * 4 + component; return omap.target & (1 << bit); } - AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const { - return static_cast<AttributeUse>( - (imap_generic_vector[attribute].raw >> (index * 2)) & 0x03); - } - AttributeUse GetAttributeUse(u32 attribute) const { - AttributeUse result = AttributeUse::Unused; - for (u32 i = 0; i < 4; i++) { - const auto index = GetAttributeIndexUse(attribute, i); - if (index == AttributeUse::Unused) { - continue; - } - if (result == AttributeUse::Unused || result == index) { - result = index; + + PixelImap GetPixelImap(u32 attribute) const { + const auto get_index = [this, attribute](u32 index) { + return static_cast<PixelImap>( + (imap_generic_vector[attribute].raw >> (index * 2)) & 3); + }; + + std::optional<PixelImap> result; + for (u32 component = 0; component < 4; ++component) { + const PixelImap index = get_index(component); + if (index == PixelImap::Unused) { continue; } - LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode"); - if (index == AttributeUse::Perspective) { - result = index; + if (result && result != index) { + LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode"); } + result = index; } - return result; + return result.value_or(PixelImap::Unused); } } ps; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index e8f763ce9..8acf2eda2 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -7,6 +7,7 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/frontend/emu_window.h" #include "core/memory.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/kepler_compute.h" @@ -16,14 +17,15 @@ #include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/renderer_base.h" +#include "video_core/video_core.h" namespace Tegra { MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); -GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) - : system{system}, renderer{renderer}, is_async{is_async} { - auto& rasterizer{renderer.Rasterizer()}; +GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, bool is_async) + : system{system}, renderer{std::move(renderer_)}, is_async{is_async} { + auto& rasterizer{renderer->Rasterizer()}; memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); @@ -137,7 +139,7 @@ u64 GPU::GetTicks() const { } void GPU::FlushCommands() { - renderer.Rasterizer().FlushCommands(); + renderer->Rasterizer().FlushCommands(); } // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 64acb17df..ced9d7e28 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -25,8 +25,11 @@ inline u8* FromCacheAddr(CacheAddr cache_addr) { } namespace Core { -class System; +namespace Frontend { +class EmuWindow; } +class System; +} // namespace Core namespace VideoCore { class RendererBase; @@ -129,7 +132,8 @@ class MemoryManager; class GPU { public: - explicit GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async); + explicit GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer, + bool is_async); virtual ~GPU(); @@ -174,6 +178,14 @@ public: /// Returns a reference to the GPU DMA pusher. Tegra::DmaPusher& DmaPusher(); + VideoCore::RendererBase& Renderer() { + return *renderer; + } + + const VideoCore::RendererBase& Renderer() const { + return *renderer; + } + // Waits for the GPU to finish working virtual void WaitIdle() const = 0; @@ -287,7 +299,7 @@ private: protected: std::unique_ptr<Tegra::DmaPusher> dma_pusher; Core::System& system; - VideoCore::RendererBase& renderer; + std::unique_ptr<VideoCore::RendererBase> renderer; private: std::unique_ptr<Tegra::MemoryManager> memory_manager; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index 04222d060..925be8d7b 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -10,13 +10,16 @@ namespace VideoCommon { -GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) - : GPU(system, renderer, true), gpu_thread{system} {} +GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, + std::unique_ptr<Core::Frontend::GraphicsContext>&& context) + : GPU(system, std::move(renderer_), true), gpu_thread{system}, gpu_context(std::move(context)), + cpu_context(renderer->GetRenderWindow().CreateSharedContext()) {} GPUAsynch::~GPUAsynch() = default; void GPUAsynch::Start() { - gpu_thread.StartThread(renderer, *dma_pusher); + cpu_context->MakeCurrent(); + gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher); } void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 1241ade1d..265c62758 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -7,6 +7,10 @@ #include "video_core/gpu.h" #include "video_core/gpu_thread.h" +namespace Core::Frontend { +class GraphicsContext; +} + namespace VideoCore { class RendererBase; } // namespace VideoCore @@ -16,7 +20,8 @@ namespace VideoCommon { /// Implementation of GPU interface that runs the GPU asynchronously class GPUAsynch final : public Tegra::GPU { public: - explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer); + explicit GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer, + std::unique_ptr<Core::Frontend::GraphicsContext>&& context); ~GPUAsynch() override; void Start() override; @@ -32,6 +37,8 @@ protected: private: GPUThread::ThreadManager gpu_thread; + std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context; + std::unique_ptr<Core::Frontend::GraphicsContext> gpu_context; }; } // namespace VideoCommon diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index d48221077..bd5278a5c 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp @@ -7,12 +7,15 @@ namespace VideoCommon { -GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer) - : GPU(system, renderer, false) {} +GPUSynch::GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer, + std::unique_ptr<Core::Frontend::GraphicsContext>&& context) + : GPU(system, std::move(renderer), false), context{std::move(context)} {} GPUSynch::~GPUSynch() = default; -void GPUSynch::Start() {} +void GPUSynch::Start() { + context->MakeCurrent(); +} void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { dma_pusher->Push(std::move(entries)); @@ -20,19 +23,19 @@ void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { } void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - renderer.SwapBuffers(framebuffer); + renderer->SwapBuffers(framebuffer); } void GPUSynch::FlushRegion(CacheAddr addr, u64 size) { - renderer.Rasterizer().FlushRegion(addr, size); + renderer->Rasterizer().FlushRegion(addr, size); } void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) { - renderer.Rasterizer().InvalidateRegion(addr, size); + renderer->Rasterizer().InvalidateRegion(addr, size); } void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { - renderer.Rasterizer().FlushAndInvalidateRegion(addr, size); + renderer->Rasterizer().FlushAndInvalidateRegion(addr, size); } } // namespace VideoCommon diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index c71baee89..866a94c8c 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -6,6 +6,10 @@ #include "video_core/gpu.h" +namespace Core::Frontend { +class GraphicsContext; +} + namespace VideoCore { class RendererBase; } // namespace VideoCore @@ -15,7 +19,8 @@ namespace VideoCommon { /// Implementation of GPU interface that runs the GPU synchronously class GPUSynch final : public Tegra::GPU { public: - explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer); + explicit GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer, + std::unique_ptr<Core::Frontend::GraphicsContext>&& context); ~GPUSynch() override; void Start() override; @@ -29,6 +34,9 @@ public: protected: void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, [[maybe_unused]] u32 value) const override {} + +private: + std::unique_ptr<Core::Frontend::GraphicsContext> context; }; } // namespace VideoCommon diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index b1088af3d..270c7ae0d 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -5,7 +5,7 @@ #include "common/assert.h" #include "common/microprofile.h" #include "core/core.h" -#include "core/frontend/scope_acquire_context.h" +#include "core/frontend/emu_window.h" #include "video_core/dma_pusher.h" #include "video_core/gpu.h" #include "video_core/gpu_thread.h" @@ -14,8 +14,8 @@ namespace VideoCommon::GPUThread { /// Runs the GPU thread -static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher, - SynchState& state) { +static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, + Tegra::DmaPusher& dma_pusher, SynchState& state) { MicroProfileOnThreadCreate("GpuThread"); // Wait for first GPU command before acquiring the window context @@ -27,7 +27,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p return; } - Core::Frontend::ScopeAcquireContext acquire_context{renderer.GetRenderWindow()}; + auto current_context = context.Acquire(); CommandDataContainer next; while (state.is_running) { @@ -62,8 +62,11 @@ ThreadManager::~ThreadManager() { thread.join(); } -void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { - thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; +void ThreadManager::StartThread(VideoCore::RendererBase& renderer, + Core::Frontend::GraphicsContext& context, + Tegra::DmaPusher& dma_pusher) { + thread = std::thread{RunThread, std::ref(renderer), std::ref(context), std::ref(dma_pusher), + std::ref(state)}; } void ThreadManager::SubmitList(Tegra::CommandList&& entries) { diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 882e2d9c7..be36c580e 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -10,7 +10,6 @@ #include <optional> #include <thread> #include <variant> - #include "common/threadsafe_queue.h" #include "video_core/gpu.h" @@ -20,6 +19,9 @@ class DmaPusher; } // namespace Tegra namespace Core { +namespace Frontend { +class GraphicsContext; +} class System; } // namespace Core @@ -99,7 +101,8 @@ public: ~ThreadManager(); /// Creates and starts the GPU thread. - void StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher); + void StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, + Tegra::DmaPusher& dma_pusher); /// Push GPU command entries to be processed void SubmitList(Tegra::CommandList&& entries); diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 5ec99a126..1d85219b6 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -46,7 +46,8 @@ public: /// Draws the latest frame to the window waiting timeout_ms for a frame to arrive (Renderer /// specific implementation) - virtual void TryPresent(int timeout_ms) = 0; + /// Returns true if a frame was drawn + virtual bool TryPresent(int timeout_ms) = 0; // Getter/setter functions: // ------------------------ diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 31add708f..346feeb2f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -386,11 +386,14 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color_fb, bool using texture_cache.GuardRenderTargets(true); View color_surface; if (using_color_fb) { - color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false); + const std::size_t index = regs.clear_buffers.RT; + color_surface = texture_cache.GetColorBufferSurface(index, true); + texture_cache.MarkColorBufferInUse(index); } View depth_surface; if (using_depth_fb || using_stencil_fb) { - depth_surface = texture_cache.GetDepthBufferSurface(false); + depth_surface = texture_cache.GetDepthBufferSurface(true); + texture_cache.MarkDepthBufferInUse(); } texture_cache.GuardRenderTargets(false); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index e3d31c3eb..046ee55a5 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -327,8 +327,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, std::size_t end) { - context->MakeCurrent(); - SCOPE_EXIT({ return context->DoneCurrent(); }); + const auto scope = context->Acquire(); for (std::size_t i = begin; i < end; ++i) { if (stop_loading) { diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index c7d24cf14..160ae4340 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -31,11 +31,11 @@ namespace { using Tegra::Engines::ShaderType; using Tegra::Shader::Attribute; -using Tegra::Shader::AttributeUse; using Tegra::Shader::Header; using Tegra::Shader::IpaInterpMode; using Tegra::Shader::IpaMode; using Tegra::Shader::IpaSampleMode; +using Tegra::Shader::PixelImap; using Tegra::Shader::Register; using VideoCommon::Shader::BuildTransformFeedback; using VideoCommon::Shader::Registry; @@ -702,20 +702,19 @@ private: code.AddNewLine(); } - std::string GetInputFlags(AttributeUse attribute) { + const char* GetInputFlags(PixelImap attribute) { switch (attribute) { - case AttributeUse::Perspective: - // Default, Smooth - return {}; - case AttributeUse::Constant: - return "flat "; - case AttributeUse::ScreenLinear: - return "noperspective "; - default: - case AttributeUse::Unused: - UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<u32>(attribute)); - return {}; + case PixelImap::Perspective: + return "smooth"; + case PixelImap::Constant: + return "flat"; + case PixelImap::ScreenLinear: + return "noperspective"; + case PixelImap::Unused: + break; } + UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute)); + return {}; } void DeclareInputAttributes() { @@ -749,8 +748,8 @@ private: std::string suffix; if (stage == ShaderType::Fragment) { - const auto input_mode{header.ps.GetAttributeUse(location)}; - if (skip_unused && input_mode == AttributeUse::Unused) { + const auto input_mode{header.ps.GetPixelImap(location)}; + if (input_mode == PixelImap::Unused) { return; } suffix = GetInputFlags(input_mode); @@ -927,7 +926,7 @@ private: const u32 address{generic_base + index * generic_stride + element * element_stride}; const bool declared = stage != ShaderType::Fragment || - header.ps.GetAttributeUse(index) != AttributeUse::Unused; + header.ps.GetPixelImap(index) != PixelImap::Unused; const std::string value = declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f"; code.AddLine("case 0x{:X}U: return {};", address, value); @@ -1142,8 +1141,7 @@ private: GetSwizzle(element)), Type::Float}; case ShaderType::Fragment: - return {element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)), - Type::Float}; + return {"gl_FragCoord"s + GetSwizzle(element), Type::Float}; default: UNREACHABLE(); } diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index fca5e3ec0..f1a28cc21 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -30,8 +30,6 @@ namespace OpenGL { namespace { -// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have -// to wait on available presentation frames. constexpr std::size_t SWAP_CHAIN_SIZE = 3; struct Frame { @@ -214,7 +212,7 @@ public: std::deque<Frame*> present_queue; Frame* previous_frame{}; - FrameMailbox() : has_debug_tool{HasDebugTool()} { + FrameMailbox() { for (auto& frame : swap_chain) { free_queue.push(&frame); } @@ -285,13 +283,9 @@ public: std::unique_lock lock{swap_chain_lock}; present_queue.push_front(frame); present_cv.notify_one(); - - DebugNotifyNextFrame(); } Frame* TryGetPresentFrame(int timeout_ms) { - DebugWaitForNextFrame(); - std::unique_lock lock{swap_chain_lock}; // wait for new entries in the present_queue present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms), @@ -317,38 +311,12 @@ public: previous_frame = frame; return frame; } - -private: - std::mutex debug_synch_mutex; - std::condition_variable debug_synch_condition; - std::atomic_int frame_for_debug{}; - const bool has_debug_tool; // When true, using a GPU debugger, so keep frames in lock-step - - /// Signal that a new frame is available (called from GPU thread) - void DebugNotifyNextFrame() { - if (!has_debug_tool) { - return; - } - frame_for_debug++; - std::lock_guard lock{debug_synch_mutex}; - debug_synch_condition.notify_one(); - } - - /// Wait for a new frame to be available (called from presentation thread) - void DebugWaitForNextFrame() { - if (!has_debug_tool) { - return; - } - const int last_frame = frame_for_debug; - std::unique_lock lock{debug_synch_mutex}; - debug_synch_condition.wait(lock, - [this, last_frame] { return frame_for_debug > last_frame; }); - } }; -RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system) +RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, + Core::Frontend::GraphicsContext& context) : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system}, - frame_mailbox{std::make_unique<FrameMailbox>()} {} + frame_mailbox{}, context{context}, has_debug_tool{HasDebugTool()} {} RendererOpenGL::~RendererOpenGL() = default; @@ -356,8 +324,6 @@ MICROPROFILE_DEFINE(OpenGL_RenderFrame, "OpenGL", "Render Frame", MP_RGB(128, 12 MICROPROFILE_DEFINE(OpenGL_WaitPresent, "OpenGL", "Wait For Present", MP_RGB(128, 128, 128)); void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - render_window.PollEvents(); - if (!framebuffer) { return; } @@ -413,6 +379,13 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { m_current_frame++; rasterizer->TickFrame(); } + + render_window.PollEvents(); + if (has_debug_tool) { + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + Present(0); + context.SwapBuffers(); + } } void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) { @@ -480,6 +453,8 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color } void RendererOpenGL::InitOpenGLObjects() { + frame_mailbox = std::make_unique<FrameMailbox>(); + glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f); @@ -692,12 +667,21 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); } -void RendererOpenGL::TryPresent(int timeout_ms) { +bool RendererOpenGL::TryPresent(int timeout_ms) { + if (has_debug_tool) { + LOG_DEBUG(Render_OpenGL, + "Skipping presentation because we are presenting on the main context"); + return false; + } + return Present(timeout_ms); +} + +bool RendererOpenGL::Present(int timeout_ms) { const auto& layout = render_window.GetFramebufferLayout(); auto frame = frame_mailbox->TryGetPresentFrame(timeout_ms); if (!frame) { LOG_DEBUG(Render_OpenGL, "TryGetPresentFrame returned no frame to present"); - return; + return false; } // Clearing before a full overwrite of a fbo can signal to drivers that they can avoid a @@ -725,6 +709,7 @@ void RendererOpenGL::TryPresent(int timeout_ms) { glFlush(); glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + return true; } void RendererOpenGL::RenderScreenshot() { diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 33073ce5b..50b647661 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -55,13 +55,14 @@ class FrameMailbox; class RendererOpenGL final : public VideoCore::RendererBase { public: - explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system); + explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, + Core::Frontend::GraphicsContext& context); ~RendererOpenGL() override; bool Init() override; void ShutDown() override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; - void TryPresent(int timeout_ms) override; + bool TryPresent(int timeout_ms) override; private: /// Initializes the OpenGL state and creates persistent objects. @@ -89,8 +90,11 @@ private: void PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer); + bool Present(int timeout_ms); + Core::Frontend::EmuWindow& emu_window; Core::System& system; + Core::Frontend::GraphicsContext& context; StateTracker state_tracker{system}; @@ -115,6 +119,8 @@ private: /// Frame presentation mailbox std::unique_ptr<FrameMailbox> frame_mailbox; + + bool has_debug_tool = false; }; } // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 42bb01418..6953aaafe 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -141,8 +141,9 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { render_window.PollEvents(); } -void RendererVulkan::TryPresent(int /*timeout_ms*/) { +bool RendererVulkan::TryPresent(int /*timeout_ms*/) { // TODO (bunnei): ImplementMe + return true; } bool RendererVulkan::Init() { diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 3da08d2e4..d14384e79 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -42,7 +42,7 @@ public: bool Init() override; void ShutDown() override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; - void TryPresent(int timeout_ms) override; + bool TryPresent(int timeout_ms) override; private: std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback( diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index d67f08cf9..b9f9e2714 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -35,7 +35,7 @@ namespace { using Sirit::Id; using Tegra::Engines::ShaderType; using Tegra::Shader::Attribute; -using Tegra::Shader::AttributeUse; +using Tegra::Shader::PixelImap; using Tegra::Shader::Register; using namespace VideoCommon::Shader; @@ -752,16 +752,16 @@ private: if (stage != ShaderType::Fragment) { continue; } - switch (header.ps.GetAttributeUse(location)) { - case AttributeUse::Constant: + switch (header.ps.GetPixelImap(location)) { + case PixelImap::Constant: Decorate(id, spv::Decoration::Flat); break; - case AttributeUse::ScreenLinear: - Decorate(id, spv::Decoration::NoPerspective); - break; - case AttributeUse::Perspective: + case PixelImap::Perspective: // Default break; + case PixelImap::ScreenLinear: + Decorate(id, spv::Decoration::NoPerspective); + break; default: UNREACHABLE_MSG("Unused attribute being fetched"); } @@ -1145,9 +1145,6 @@ private: switch (attribute) { case Attribute::Index::Position: { if (stage == ShaderType::Fragment) { - if (element == 3) { - return {Constant(t_float, 1.0f), Type::Float}; - } return {OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)), Type::Float}; } diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 28a49addd..b8f63922f 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -44,6 +44,9 @@ Node GetAtomOperation(AtomicOp op, bool is_signed, Node memory, Node data) { return OperationCode::AtomicIXor; case AtomicOp::Exch: return OperationCode::AtomicIExchange; + default: + UNIMPLEMENTED_MSG("op={}", static_cast<int>(op)); + return OperationCode::AtomicIAdd; } }(); return SignedOperation(operation_code, is_signed, std::move(memory), std::move(data)); diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 4944e9d69..e6edec459 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -11,12 +11,17 @@ namespace VideoCommon::Shader { +using std::move; using Tegra::Shader::ConditionCode; using Tegra::Shader::Instruction; +using Tegra::Shader::IpaInterpMode; using Tegra::Shader::OpCode; +using Tegra::Shader::PixelImap; using Tegra::Shader::Register; using Tegra::Shader::SystemVariable; +using Index = Tegra::Shader::Attribute::Index; + u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); @@ -213,27 +218,28 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { } case OpCode::Id::IPA: { const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff; - const auto attribute = instr.attribute.fmt28; - const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(), - instr.ipa.sample_mode.Value()}; + const Index index = attribute.index; Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8) - : GetInputAttribute(attribute.index, attribute.element); - const Tegra::Shader::Attribute::Index index = attribute.index.Value(); - const bool is_generic = index >= Tegra::Shader::Attribute::Index::Attribute_0 && - index <= Tegra::Shader::Attribute::Index::Attribute_31; - if (is_generic || is_physical) { - // TODO(Blinkhawk): There are cases where a perspective attribute use PASS. - // In theory by setting them as perspective, OpenGL does the perspective correction. - // A way must figured to reverse the last step of it. - if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) { - value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20)); + : GetInputAttribute(index, attribute.element); + + // Code taken from Ryujinx. + if (index >= Index::Attribute_0 && index <= Index::Attribute_31) { + const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0); + if (header.ps.GetPixelImap(location) == PixelImap::Perspective) { + Node position_w = GetInputAttribute(Index::Position, 3); + value = Operation(OperationCode::FMul, move(value), move(position_w)); } } - value = GetSaturatedFloat(value, instr.ipa.saturate); - SetRegister(bb, instr.gpr0, value); + if (instr.ipa.interp_mode == IpaInterpMode::Multiply) { + value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20)); + } + + value = GetSaturatedFloat(move(value), instr.ipa.saturate); + + SetRegister(bb, instr.gpr0, move(value)); break; } case OpCode::Id::OUT_R: { diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index a5f81a8a0..f60bdc60a 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -15,13 +15,13 @@ #endif #include "video_core/video_core.h" -namespace VideoCore { - -std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window, - Core::System& system) { +namespace { +std::unique_ptr<VideoCore::RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window, + Core::System& system, + Core::Frontend::GraphicsContext& context) { switch (Settings::values.renderer_backend) { case Settings::RendererBackend::OpenGL: - return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system); + return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system, context); #ifdef HAS_VULKAN case Settings::RendererBackend::Vulkan: return std::make_unique<Vulkan::RendererVulkan>(emu_window, system); @@ -30,13 +30,23 @@ std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_wind return nullptr; } } +} // Anonymous namespace -std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system) { - if (Settings::values.use_asynchronous_gpu_emulation) { - return std::make_unique<VideoCommon::GPUAsynch>(system, system.Renderer()); +namespace VideoCore { + +std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { + auto context = emu_window.CreateSharedContext(); + const auto scope = context->Acquire(); + auto renderer = CreateRenderer(emu_window, system, *context); + if (!renderer->Init()) { + return nullptr; } - return std::make_unique<VideoCommon::GPUSynch>(system, system.Renderer()); + if (Settings::values.use_asynchronous_gpu_emulation) { + return std::make_unique<VideoCommon::GPUAsynch>(system, std::move(renderer), + std::move(context)); + } + return std::make_unique<VideoCommon::GPUSynch>(system, std::move(renderer), std::move(context)); } u16 GetResolutionScaleFactor(const RendererBase& renderer) { diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index b8e0ac372..f5c27125d 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -22,17 +22,8 @@ namespace VideoCore { class RendererBase; -/** - * Creates a renderer instance. - * - * @note The returned renderer instance is simply allocated. Its Init() - * function still needs to be called to fully complete its setup. - */ -std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window, - Core::System& system); - /// Creates an emulated GPU instance using the given system context. -std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system); +std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system); u16 GetResolutionScaleFactor(const RendererBase& renderer); |