diff options
-rw-r--r-- | src/core/core_timing.cpp | 10 | ||||
-rw-r--r-- | src/core/core_timing.h | 1 | ||||
-rw-r--r-- | src/core/hle/kernel/server_session.cpp | 2 | ||||
-rw-r--r-- | src/core/hle/kernel/server_session.h | 7 | ||||
-rw-r--r-- | src/core/hle/kernel/thread.cpp | 2 | ||||
-rw-r--r-- | src/core/hle/service/sm/controller.cpp | 4 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 6 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 44 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 159 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 101 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/maxwell_to_gl.h | 25 |
11 files changed, 247 insertions, 114 deletions
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index f977d1b32..7953c8720 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -56,6 +56,9 @@ static u64 event_fifo_id; // to the event_queue by the emu thread static Common::MPSCQueue<Event, false> ts_queue; +// the queue for unscheduling the events from other threads threadsafe +static Common::MPSCQueue<std::pair<const EventType*, u64>, false> unschedule_queue; + constexpr int MAX_SLICE_LENGTH = 20000; static s64 idled_cycles; @@ -158,6 +161,10 @@ void UnscheduleEvent(const EventType* event_type, u64 userdata) { } } +void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata) { + unschedule_queue.Push(std::make_pair(event_type, userdata)); +} + void RemoveEvent(const EventType* event_type) { auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) { return e.type == event_type; }); @@ -194,6 +201,9 @@ void MoveEvents() { void Advance() { MoveEvents(); + for (std::pair<const EventType*, u64> ev; unschedule_queue.Pop(ev);) { + UnscheduleEvent(ev.first, ev.second); + } int cycles_executed = slice_length - downcount; global_timer += cycles_executed; diff --git a/src/core/core_timing.h b/src/core/core_timing.h index dfa161c0d..9ed757bd7 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -65,6 +65,7 @@ void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 user void ScheduleEventThreadsafe(s64 cycles_into_future, const EventType* event_type, u64 userdata); void UnscheduleEvent(const EventType* event_type, u64 userdata); +void UnscheduleEventThreadsafe(const EventType* event_type, u64 userdata); /// We only permit one event of each type in the queue at a time. void RemoveEvent(const EventType* event_type); diff --git a/src/core/hle/kernel/server_session.cpp b/src/core/hle/kernel/server_session.cpp index d09ca5992..51a1ec160 100644 --- a/src/core/hle/kernel/server_session.cpp +++ b/src/core/hle/kernel/server_session.cpp @@ -152,7 +152,7 @@ ResultCode ServerSession::HandleSyncRequest(SharedPtr<Thread> thread) { // Handle scenario when ConvertToDomain command was issued, as we must do the conversion at the // end of the command such that only commands following this one are handled as domains if (convert_to_domain) { - ASSERT_MSG(domain_request_handlers.empty(), "already a domain"); + ASSERT_MSG(IsSession(), "ServerSession is already a domain instance."); domain_request_handlers = {hle_handler}; convert_to_domain = false; } diff --git a/src/core/hle/kernel/server_session.h b/src/core/hle/kernel/server_session.h index 2bce54fee..1a88e66b9 100644 --- a/src/core/hle/kernel/server_session.h +++ b/src/core/hle/kernel/server_session.h @@ -97,7 +97,12 @@ public: /// Returns true if the session has been converted to a domain, otherwise False bool IsDomain() const { - return !domain_request_handlers.empty(); + return !IsSession(); + } + + /// Returns true if this session has not been converted to a domain, otherwise false. + bool IsSession() const { + return domain_request_handlers.empty(); } /// Converts the session to a domain at the end of the current command diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index a1a7867ce..cf4f94822 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -167,7 +167,7 @@ void Thread::WakeAfterDelay(s64 nanoseconds) { } void Thread::CancelWakeupTimer() { - CoreTiming::UnscheduleEvent(ThreadWakeupEventType, callback_handle); + CoreTiming::UnscheduleEventThreadsafe(ThreadWakeupEventType, callback_handle); } static boost::optional<s32> GetNextProcessorId(u64 mask) { diff --git a/src/core/hle/service/sm/controller.cpp b/src/core/hle/service/sm/controller.cpp index 518a0cc46..1cef73216 100644 --- a/src/core/hle/service/sm/controller.cpp +++ b/src/core/hle/service/sm/controller.cpp @@ -10,7 +10,7 @@ namespace Service::SM { void Controller::ConvertSessionToDomain(Kernel::HLERequestContext& ctx) { - ASSERT_MSG(!ctx.Session()->IsDomain(), "session is alread a domain"); + ASSERT_MSG(ctx.Session()->IsSession(), "Session is already a domain"); ctx.Session()->ConvertToDomain(); IPC::ResponseBuilder rb{ctx, 3}; @@ -41,7 +41,7 @@ void Controller::DuplicateSessionEx(Kernel::HLERequestContext& ctx) { void Controller::QueryPointerBufferSize(Kernel::HLERequestContext& ctx) { IPC::ResponseBuilder rb{ctx, 3}; rb.Push(RESULT_SUCCESS); - rb.Push<u32>(0x500); + rb.Push<u16>(0x500); LOG_WARNING(Service, "(STUBBED) called"); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 52a649e2f..9d1549fe9 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -648,11 +648,11 @@ std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers( if (used_buffer.IsIndirect()) { // Buffer is accessed indirectly, so upload the entire thing - size = buffer.size * sizeof(float); + size = buffer.size; if (size > MaxConstbufferSize) { - LOG_ERROR(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size, - MaxConstbufferSize); + LOG_CRITICAL(HW_GPU, "indirect constbuffer size {} exceeds maximum {}", size, + MaxConstbufferSize); size = MaxConstbufferSize; } } else { diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 5d58ebd4f..b6947b97b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -94,11 +94,11 @@ struct FormatTuple { static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S - {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5 + {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm, - false}, // A2B10G10R10 - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5 - {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8 + false}, // A2B10G10R10U + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U + {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8U {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RGBA16U @@ -119,13 +119,14 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // BC7U {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // G8R8 + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // G8R8U + {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // G8R8S {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8 {GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false}, // RGBA32F {GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false}, // RG32F {GL_R32F, GL_RED, GL_FLOAT, ComponentType::Float, false}, // R32F {GL_R16F, GL_RED, GL_HALF_FLOAT, ComponentType::Float, false}, // R16F - {GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16UNORM + {GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16U {GL_R16_SNORM, GL_RED, GL_SHORT, ComponentType::SNorm, false}, // R16S {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // R16UI {GL_R16I, GL_RED_INTEGER, GL_SHORT, ComponentType::SInt, false}, // R16I @@ -242,10 +243,10 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU // clang-format off MortonCopy<true, PixelFormat::ABGR8U>, MortonCopy<true, PixelFormat::ABGR8S>, - MortonCopy<true, PixelFormat::B5G6R5>, - MortonCopy<true, PixelFormat::A2B10G10R10>, - MortonCopy<true, PixelFormat::A1B5G5R5>, - MortonCopy<true, PixelFormat::R8>, + MortonCopy<true, PixelFormat::B5G6R5U>, + MortonCopy<true, PixelFormat::A2B10G10R10U>, + MortonCopy<true, PixelFormat::A1B5G5R5U>, + MortonCopy<true, PixelFormat::R8U>, MortonCopy<true, PixelFormat::R8UI>, MortonCopy<true, PixelFormat::RGBA16F>, MortonCopy<true, PixelFormat::RGBA16U>, @@ -260,13 +261,14 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU MortonCopy<true, PixelFormat::DXN2SNORM>, MortonCopy<true, PixelFormat::BC7U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>, - MortonCopy<true, PixelFormat::G8R8>, + MortonCopy<true, PixelFormat::G8R8U>, + MortonCopy<true, PixelFormat::G8R8S>, MortonCopy<true, PixelFormat::BGRA8>, MortonCopy<true, PixelFormat::RGBA32F>, MortonCopy<true, PixelFormat::RG32F>, MortonCopy<true, PixelFormat::R32F>, MortonCopy<true, PixelFormat::R16F>, - MortonCopy<true, PixelFormat::R16UNORM>, + MortonCopy<true, PixelFormat::R16U>, MortonCopy<true, PixelFormat::R16S>, MortonCopy<true, PixelFormat::R16UI>, MortonCopy<true, PixelFormat::R16I>, @@ -295,10 +297,10 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU // clang-format off MortonCopy<false, PixelFormat::ABGR8U>, MortonCopy<false, PixelFormat::ABGR8S>, - MortonCopy<false, PixelFormat::B5G6R5>, - MortonCopy<false, PixelFormat::A2B10G10R10>, - MortonCopy<false, PixelFormat::A1B5G5R5>, - MortonCopy<false, PixelFormat::R8>, + MortonCopy<false, PixelFormat::B5G6R5U>, + MortonCopy<false, PixelFormat::A2B10G10R10U>, + MortonCopy<false, PixelFormat::A1B5G5R5U>, + MortonCopy<false, PixelFormat::R8U>, MortonCopy<false, PixelFormat::R8UI>, MortonCopy<false, PixelFormat::RGBA16F>, MortonCopy<false, PixelFormat::RGBA16U>, @@ -315,13 +317,14 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU nullptr, nullptr, nullptr, - MortonCopy<false, PixelFormat::G8R8>, + MortonCopy<false, PixelFormat::G8R8U>, + MortonCopy<false, PixelFormat::G8R8S>, MortonCopy<false, PixelFormat::BGRA8>, MortonCopy<false, PixelFormat::RGBA32F>, MortonCopy<false, PixelFormat::RG32F>, MortonCopy<false, PixelFormat::R32F>, MortonCopy<false, PixelFormat::R16F>, - MortonCopy<false, PixelFormat::R16UNORM>, + MortonCopy<false, PixelFormat::R16U>, MortonCopy<false, PixelFormat::R16S>, MortonCopy<false, PixelFormat::R16UI>, MortonCopy<false, PixelFormat::R16I>, @@ -461,7 +464,7 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) { } static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) { - const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8)}; + const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)}; for (size_t y = 0; y < height; ++y) { for (size_t x = 0; x < width; ++x) { const size_t offset{bpp * (y * width + x)}; @@ -493,7 +496,8 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma ConvertS8Z24ToZ24S8(data, width, height); break; - case PixelFormat::G8R8: + case PixelFormat::G8R8U: + case PixelFormat::G8R8S: // Convert the G8R8 color format to R8G8, as OpenGL does not support G8R8. ConvertG8R8ToR8G8(data, width, height); break; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 0de87d8c2..55cf3782c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -25,10 +25,10 @@ struct SurfaceParams { enum class PixelFormat { ABGR8U = 0, ABGR8S = 1, - B5G6R5 = 2, - A2B10G10R10 = 3, - A1B5G5R5 = 4, - R8 = 5, + B5G6R5U = 2, + A2B10G10R10U = 3, + A1B5G5R5U = 4, + R8U = 5, R8UI = 6, RGBA16F = 7, RGBA16U = 8, @@ -43,36 +43,37 @@ struct SurfaceParams { DXN2SNORM = 17, BC7U = 18, ASTC_2D_4X4 = 19, - G8R8 = 20, - BGRA8 = 21, - RGBA32F = 22, - RG32F = 23, - R32F = 24, - R16F = 25, - R16UNORM = 26, - R16S = 27, - R16UI = 28, - R16I = 29, - RG16 = 30, - RG16F = 31, - RG16UI = 32, - RG16I = 33, - RG16S = 34, - RGB32F = 35, - SRGBA8 = 36, - RG8U = 37, - RG8S = 38, - RG32UI = 39, - R32UI = 40, + G8R8U = 20, + G8R8S = 21, + BGRA8 = 22, + RGBA32F = 23, + RG32F = 24, + R32F = 25, + R16F = 26, + R16U = 27, + R16S = 28, + R16UI = 29, + R16I = 30, + RG16 = 31, + RG16F = 32, + RG16UI = 33, + RG16I = 34, + RG16S = 35, + RGB32F = 36, + SRGBA8 = 37, + RG8U = 38, + RG8S = 39, + RG32UI = 40, + R32UI = 41, MaxColorFormat, // DepthStencil formats - Z24S8 = 41, - S8Z24 = 42, - Z32F = 43, - Z16 = 44, - Z32FS8 = 45, + Z24S8 = 42, + S8Z24 = 43, + Z32F = 44, + Z16 = 45, + Z32FS8 = 46, MaxDepthStencilFormat, @@ -112,10 +113,10 @@ struct SurfaceParams { constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{ 1, // ABGR8U 1, // ABGR8S - 1, // B5G6R5 - 1, // A2B10G10R10 - 1, // A1B5G5R5 - 1, // R8 + 1, // B5G6R5U + 1, // A2B10G10R10U + 1, // A1B5G5R5U + 1, // R8U 1, // R8UI 1, // RGBA16F 1, // RGBA16U @@ -130,13 +131,14 @@ struct SurfaceParams { 4, // DXN2SNORM 4, // BC7U 4, // ASTC_2D_4X4 - 1, // G8R8 + 1, // G8R8U + 1, // G8R8S 1, // BGRA8 1, // RGBA32F 1, // RG32F 1, // R32F 1, // R16F - 1, // R16UNORM + 1, // R16U 1, // R16S 1, // R16UI 1, // R16I @@ -169,10 +171,10 @@ struct SurfaceParams { constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ 32, // ABGR8U 32, // ABGR8S - 16, // B5G6R5 - 32, // A2B10G10R10 - 16, // A1B5G5R5 - 8, // R8 + 16, // B5G6R5U + 32, // A2B10G10R10U + 16, // A1B5G5R5U + 8, // R8U 8, // R8UI 64, // RGBA16F 64, // RGBA16U @@ -187,13 +189,14 @@ struct SurfaceParams { 128, // DXN2SNORM 128, // BC7U 32, // ASTC_2D_4X4 - 16, // G8R8 + 16, // G8R8U + 16, // G8R8S 32, // BGRA8 128, // RGBA32F 64, // RG32F 32, // R32F 16, // R16F - 16, // R16UNORM + 16, // R16U 16, // R16S 16, // R16UI 16, // R16I @@ -253,7 +256,7 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::BGRA8_UNORM: return PixelFormat::BGRA8; case Tegra::RenderTargetFormat::RGB10_A2_UNORM: - return PixelFormat::A2B10G10R10; + return PixelFormat::A2B10G10R10U; case Tegra::RenderTargetFormat::RGBA16_FLOAT: return PixelFormat::RGBA16F; case Tegra::RenderTargetFormat::RGBA16_UNORM: @@ -267,11 +270,11 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::R11G11B10_FLOAT: return PixelFormat::R11FG11FB10F; case Tegra::RenderTargetFormat::B5G6R5_UNORM: - return PixelFormat::B5G6R5; + return PixelFormat::B5G6R5U; case Tegra::RenderTargetFormat::RGBA32_UINT: return PixelFormat::RGBA32UI; case Tegra::RenderTargetFormat::R8_UNORM: - return PixelFormat::R8; + return PixelFormat::R8U; case Tegra::RenderTargetFormat::R8_UINT: return PixelFormat::R8UI; case Tegra::RenderTargetFormat::RG16_FLOAT: @@ -291,7 +294,7 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::R16_FLOAT: return PixelFormat::R16F; case Tegra::RenderTargetFormat::R16_UNORM: - return PixelFormat::R16UNORM; + return PixelFormat::R16U; case Tegra::RenderTargetFormat::R16_SNORM: return PixelFormat::R16S; case Tegra::RenderTargetFormat::R16_UINT: @@ -325,15 +328,33 @@ struct SurfaceParams { static_cast<u32>(component_type)); UNREACHABLE(); case Tegra::Texture::TextureFormat::B5G6R5: - return PixelFormat::B5G6R5; + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::B5G6R5U; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::A2B10G10R10: - return PixelFormat::A2B10G10R10; + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::A2B10G10R10U; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::A1B5G5R5: - return PixelFormat::A1B5G5R5; + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::A1B5G5R5U; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::R8: switch (component_type) { case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::R8; + return PixelFormat::R8U; case Tegra::Texture::ComponentType::UINT: return PixelFormat::R8UI; } @@ -341,11 +362,33 @@ struct SurfaceParams { static_cast<u32>(component_type)); UNREACHABLE(); case Tegra::Texture::TextureFormat::G8R8: - return PixelFormat::G8R8; + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::G8R8U; + case Tegra::Texture::ComponentType::SNORM: + return PixelFormat::G8R8S; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::R16_G16_B16_A16: - return PixelFormat::RGBA16F; + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::RGBA16U; + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::RGBA16F; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::BF10GF11RF11: - return PixelFormat::R11FG11FB10F; + switch (component_type) { + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::R11FG11FB10F; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::R32_G32_B32_A32: switch (component_type) { case Tegra::Texture::ComponentType::FLOAT: @@ -367,13 +410,19 @@ struct SurfaceParams { static_cast<u32>(component_type)); UNREACHABLE(); case Tegra::Texture::TextureFormat::R32_G32_B32: - return PixelFormat::RGB32F; + switch (component_type) { + case Tegra::Texture::ComponentType::FLOAT: + return PixelFormat::RGB32F; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::R16: switch (component_type) { case Tegra::Texture::ComponentType::FLOAT: return PixelFormat::R16F; case Tegra::Texture::ComponentType::UNORM: - return PixelFormat::R16UNORM; + return PixelFormat::R16U; case Tegra::Texture::ComponentType::SNORM: return PixelFormat::R16S; case Tegra::Texture::ComponentType::UINT: diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 6834d7085..e0dfdbb9f 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -367,31 +367,32 @@ public: } /// Generates code representing a uniform (C buffer) register, interpreted as the input type. - std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type) { + std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type, + Register::Size size = Register::Size::Word) { declr_const_buffers[index].MarkAsUsed(index, offset, stage); std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset / 4) + "][" + std::to_string(offset % 4) + ']'; if (type == GLSLRegister::Type::Float) { - return value; + // Do nothing, default } else if (type == GLSLRegister::Type::Integer) { - return "floatBitsToInt(" + value + ')'; + value = "floatBitsToInt(" + value + ')'; } else if (type == GLSLRegister::Type::UnsignedInteger) { - return "floatBitsToUint(" + value + ')'; + value = "floatBitsToUint(" + value + ')'; } else { UNREACHABLE(); } + + return ConvertIntegerSize(value, size); } - std::string GetUniformIndirect(u64 index, s64 offset, const Register& index_reg, + std::string GetUniformIndirect(u64 cbuf_index, s64 offset, const std::string& index_str, GLSLRegister::Type type) { - declr_const_buffers[index].MarkAsUsedIndirect(index, stage); - - std::string final_offset = "((floatBitsToInt(" + GetRegister(index_reg, 0) + ") + " + - std::to_string(offset) + ") / 4)"; + declr_const_buffers[cbuf_index].MarkAsUsedIndirect(cbuf_index, stage); - std::string value = - 'c' + std::to_string(index) + '[' + final_offset + " / 4][" + final_offset + " % 4]"; + std::string final_offset = fmt::format("({} + {})", index_str, offset / 4); + std::string value = 'c' + std::to_string(cbuf_index) + '[' + final_offset + " / 4][" + + final_offset + " % 4]"; if (type == GLSLRegister::Type::Float) { return value; @@ -1249,20 +1250,41 @@ private: op_a = "abs(" + op_a + ')'; } + if (instr.conversion.negate_a) { + op_a = "-(" + op_a + ')'; + } + regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1, 1, instr.alu.saturate_d, 0, instr.conversion.dest_size); break; } - case OpCode::Id::I2F_R: { + case OpCode::Id::I2F_R: + case OpCode::Id::I2F_C: { ASSERT_MSG(instr.conversion.dest_size == Register::Size::Word, "Unimplemented"); ASSERT_MSG(!instr.conversion.selector, "Unimplemented"); - std::string op_a = regs.GetRegisterAsInteger( - instr.gpr20, 0, instr.conversion.is_input_signed, instr.conversion.src_size); + + std::string op_a{}; + + if (instr.is_b_gpr) { + op_a = + regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_input_signed, + instr.conversion.src_size); + } else { + op_a = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + instr.conversion.is_input_signed + ? GLSLRegister::Type::Integer + : GLSLRegister::Type::UnsignedInteger, + instr.conversion.src_size); + } if (instr.conversion.abs_a) { op_a = "abs(" + op_a + ')'; } + if (instr.conversion.negate_a) { + op_a = "-(" + op_a + ')'; + } + regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); break; } @@ -1271,6 +1293,14 @@ private: ASSERT_MSG(instr.conversion.src_size == Register::Size::Word, "Unimplemented"); std::string op_a = regs.GetRegisterAsFloat(instr.gpr20); + if (instr.conversion.abs_a) { + op_a = "abs(" + op_a + ')'; + } + + if (instr.conversion.negate_a) { + op_a = "-(" + op_a + ')'; + } + switch (instr.conversion.f2f.rounding) { case Tegra::Shader::F2fRoundingOp::None: break; @@ -1293,21 +1323,29 @@ private: break; } - if (instr.conversion.abs_a) { - op_a = "abs(" + op_a + ')'; - } - regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d); break; } - case OpCode::Id::F2I_R: { + case OpCode::Id::F2I_R: + case OpCode::Id::F2I_C: { ASSERT_MSG(instr.conversion.src_size == Register::Size::Word, "Unimplemented"); - std::string op_a = regs.GetRegisterAsFloat(instr.gpr20); + std::string op_a{}; + + if (instr.is_b_gpr) { + op_a = regs.GetRegisterAsFloat(instr.gpr20); + } else { + op_a = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + GLSLRegister::Type::Float); + } if (instr.conversion.abs_a) { op_a = "abs(" + op_a + ')'; } + if (instr.conversion.negate_a) { + op_a = "-(" + op_a + ')'; + } + switch (instr.conversion.f2i.rounding) { case Tegra::Shader::F2iRoundingOp::None: break; @@ -1355,11 +1393,16 @@ private: case OpCode::Id::LD_C: { ASSERT_MSG(instr.ld_c.unknown == 0, "Unimplemented"); + // Add an extra scope and declare the index register inside to prevent + // overwriting it in case it is used as an output of the LD instruction. + shader.AddLine("{"); + ++shader.scope; + + shader.AddLine("uint index = (" + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + + " / 4) & (MAX_CONSTBUFFER_ELEMENTS - 1);"); + std::string op_a = - regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, instr.gpr8, - GLSLRegister::Type::Float); - std::string op_b = - regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, instr.gpr8, + regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, "index", GLSLRegister::Type::Float); switch (instr.ld_c.type.Value()) { @@ -1367,16 +1410,22 @@ private: regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); break; - case Tegra::Shader::UniformType::Double: + case Tegra::Shader::UniformType::Double: { + std::string op_b = + regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, + "index", GLSLRegister::Type::Float); regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); regs.SetRegisterToFloat(instr.gpr0.Value() + 1, 0, op_b, 1, 1); break; - + } default: LOG_CRITICAL(HW_GPU, "Unhandled type: {}", static_cast<unsigned>(instr.ld_c.type.Value())); UNREACHABLE(); } + + --shader.scope; + shader.AddLine("}"); break; } case OpCode::Id::ST_A: { diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 83ea0cfc0..8f719fdd8 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -24,16 +24,25 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; inline GLenum VertexType(Maxwell::VertexAttribute attrib) { switch (attrib.type) { + case Maxwell::VertexAttribute::Type::UnsignedInt: case Maxwell::VertexAttribute::Type::UnsignedNorm: { switch (attrib.size) { case Maxwell::VertexAttribute::Size::Size_8: case Maxwell::VertexAttribute::Size::Size_8_8: + case Maxwell::VertexAttribute::Size::Size_8_8_8: case Maxwell::VertexAttribute::Size::Size_8_8_8_8: return GL_UNSIGNED_BYTE; + case Maxwell::VertexAttribute::Size::Size_16: case Maxwell::VertexAttribute::Size::Size_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16: case Maxwell::VertexAttribute::Size::Size_16_16_16_16: return GL_UNSIGNED_SHORT; + case Maxwell::VertexAttribute::Size::Size_32: + case Maxwell::VertexAttribute::Size::Size_32_32: + case Maxwell::VertexAttribute::Size::Size_32_32_32: + case Maxwell::VertexAttribute::Size::Size_32_32_32_32: + return GL_UNSIGNED_INT; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return GL_UNSIGNED_INT_2_10_10_10_REV; } @@ -43,16 +52,25 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return {}; } + case Maxwell::VertexAttribute::Type::SignedInt: case Maxwell::VertexAttribute::Type::SignedNorm: { switch (attrib.size) { - case Maxwell::VertexAttribute::Size::Size_32_32_32: - return GL_INT; + case Maxwell::VertexAttribute::Size::Size_8: case Maxwell::VertexAttribute::Size::Size_8_8: + case Maxwell::VertexAttribute::Size::Size_8_8_8: case Maxwell::VertexAttribute::Size::Size_8_8_8_8: return GL_BYTE; + case Maxwell::VertexAttribute::Size::Size_16: case Maxwell::VertexAttribute::Size::Size_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: return GL_SHORT; + case Maxwell::VertexAttribute::Size::Size_32: + case Maxwell::VertexAttribute::Size::Size_32_32: + case Maxwell::VertexAttribute::Size::Size_32_32_32: + case Maxwell::VertexAttribute::Size::Size_32_32_32_32: + return GL_INT; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return GL_INT_2_10_10_10_REV; } @@ -62,9 +80,6 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return {}; } - case Maxwell::VertexAttribute::Type::UnsignedInt: - return GL_UNSIGNED_INT; - case Maxwell::VertexAttribute::Type::Float: return GL_FLOAT; } |