diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
3 files changed, 120 insertions, 72 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 15a33ed9b..d055b1dfa 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -92,7 +92,8 @@ struct FormatTuple { } static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8 + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U + {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm, false}, // A2B10G10R10 @@ -132,6 +133,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S {GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // SRGBA8 + {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S // DepthStencil formats {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, @@ -231,31 +233,33 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_bu static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr), SurfaceParams::MaxPixelFormat> morton_to_gl_fns = { - MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, - MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>, - MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>, - MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::RGBA32UI>, - MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>, - MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>, - MortonCopy<true, PixelFormat::DXN2UNORM>, MortonCopy<true, PixelFormat::DXN2SNORM>, - MortonCopy<true, PixelFormat::BC7U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>, - MortonCopy<true, PixelFormat::G8R8>, MortonCopy<true, PixelFormat::BGRA8>, - MortonCopy<true, PixelFormat::RGBA32F>, MortonCopy<true, PixelFormat::RG32F>, - MortonCopy<true, PixelFormat::R32F>, MortonCopy<true, PixelFormat::R16F>, - MortonCopy<true, PixelFormat::R16UNORM>, MortonCopy<true, PixelFormat::R16S>, - MortonCopy<true, PixelFormat::R16UI>, MortonCopy<true, PixelFormat::R16I>, - MortonCopy<true, PixelFormat::RG16>, MortonCopy<true, PixelFormat::RG16F>, - MortonCopy<true, PixelFormat::RG16UI>, MortonCopy<true, PixelFormat::RG16I>, - MortonCopy<true, PixelFormat::RG16S>, MortonCopy<true, PixelFormat::RGB32F>, - MortonCopy<true, PixelFormat::SRGBA8>, MortonCopy<true, PixelFormat::Z24S8>, - MortonCopy<true, PixelFormat::S8Z24>, MortonCopy<true, PixelFormat::Z32F>, - MortonCopy<true, PixelFormat::Z16>, MortonCopy<true, PixelFormat::Z32FS8>, + MortonCopy<true, PixelFormat::ABGR8U>, MortonCopy<true, PixelFormat::ABGR8S>, + MortonCopy<true, PixelFormat::B5G6R5>, MortonCopy<true, PixelFormat::A2B10G10R10>, + MortonCopy<true, PixelFormat::A1B5G5R5>, MortonCopy<true, PixelFormat::R8>, + MortonCopy<true, PixelFormat::RGBA16F>, MortonCopy<true, PixelFormat::R11FG11FB10F>, + MortonCopy<true, PixelFormat::RGBA32UI>, MortonCopy<true, PixelFormat::DXT1>, + MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>, + MortonCopy<true, PixelFormat::DXN1>, MortonCopy<true, PixelFormat::DXN2UNORM>, + MortonCopy<true, PixelFormat::DXN2SNORM>, MortonCopy<true, PixelFormat::BC7U>, + MortonCopy<true, PixelFormat::ASTC_2D_4X4>, MortonCopy<true, PixelFormat::G8R8>, + MortonCopy<true, PixelFormat::BGRA8>, MortonCopy<true, PixelFormat::RGBA32F>, + MortonCopy<true, PixelFormat::RG32F>, MortonCopy<true, PixelFormat::R32F>, + MortonCopy<true, PixelFormat::R16F>, MortonCopy<true, PixelFormat::R16UNORM>, + MortonCopy<true, PixelFormat::R16S>, MortonCopy<true, PixelFormat::R16UI>, + MortonCopy<true, PixelFormat::R16I>, MortonCopy<true, PixelFormat::RG16>, + MortonCopy<true, PixelFormat::RG16F>, MortonCopy<true, PixelFormat::RG16UI>, + MortonCopy<true, PixelFormat::RG16I>, MortonCopy<true, PixelFormat::RG16S>, + MortonCopy<true, PixelFormat::RGB32F>, MortonCopy<true, PixelFormat::SRGBA8>, + MortonCopy<true, PixelFormat::RG8S>, MortonCopy<true, PixelFormat::Z24S8>, + MortonCopy<true, PixelFormat::S8Z24>, MortonCopy<true, PixelFormat::Z32F>, + MortonCopy<true, PixelFormat::Z16>, MortonCopy<true, PixelFormat::Z32FS8>, }; static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr), SurfaceParams::MaxPixelFormat> gl_to_morton_fns = { - MortonCopy<false, PixelFormat::ABGR8>, + MortonCopy<false, PixelFormat::ABGR8U>, + MortonCopy<false, PixelFormat::ABGR8S>, MortonCopy<false, PixelFormat::B5G6R5>, MortonCopy<false, PixelFormat::A2B10G10R10>, MortonCopy<false, PixelFormat::A1B5G5R5>, @@ -290,6 +294,7 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU MortonCopy<false, PixelFormat::RG16S>, MortonCopy<false, PixelFormat::RGB32F>, MortonCopy<false, PixelFormat::SRGBA8>, + MortonCopy<false, PixelFormat::RG8S>, MortonCopy<false, PixelFormat::Z24S8>, MortonCopy<false, PixelFormat::S8Z24>, MortonCopy<false, PixelFormat::Z32F>, diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index e24ba8cfe..d7a43652e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -23,48 +23,50 @@ using PageMap = boost::icl::interval_map<u64, int>; struct SurfaceParams { enum class PixelFormat { - ABGR8 = 0, - B5G6R5 = 1, - A2B10G10R10 = 2, - A1B5G5R5 = 3, - R8 = 4, - RGBA16F = 5, - R11FG11FB10F = 6, - RGBA32UI = 7, - DXT1 = 8, - DXT23 = 9, - DXT45 = 10, - DXN1 = 11, // This is also known as BC4 - DXN2UNORM = 12, - DXN2SNORM = 13, - BC7U = 14, - ASTC_2D_4X4 = 15, - G8R8 = 16, - BGRA8 = 17, - RGBA32F = 18, - RG32F = 19, - R32F = 20, - R16F = 21, - R16UNORM = 22, - R16S = 23, - R16UI = 24, - R16I = 25, - RG16 = 26, - RG16F = 27, - RG16UI = 28, - RG16I = 29, - RG16S = 30, - RGB32F = 31, - SRGBA8 = 32, + ABGR8U = 0, + ABGR8S = 1, + B5G6R5 = 2, + A2B10G10R10 = 3, + A1B5G5R5 = 4, + R8 = 5, + RGBA16F = 6, + R11FG11FB10F = 7, + RGBA32UI = 8, + DXT1 = 9, + DXT23 = 10, + DXT45 = 11, + DXN1 = 12, // This is also known as BC4 + DXN2UNORM = 13, + DXN2SNORM = 14, + BC7U = 15, + ASTC_2D_4X4 = 16, + G8R8 = 17, + BGRA8 = 18, + RGBA32F = 19, + RG32F = 20, + R32F = 21, + R16F = 22, + R16UNORM = 23, + R16S = 24, + R16UI = 25, + R16I = 26, + RG16 = 27, + RG16F = 28, + RG16UI = 29, + RG16I = 30, + RG16S = 31, + RGB32F = 32, + SRGBA8 = 33, + RG8S = 34, MaxColorFormat, // DepthStencil formats - Z24S8 = 33, - S8Z24 = 34, - Z32F = 35, - Z16 = 36, - Z32FS8 = 37, + Z24S8 = 35, + S8Z24 = 36, + Z32F = 37, + Z16 = 38, + Z32FS8 = 39, MaxDepthStencilFormat, @@ -102,7 +104,8 @@ struct SurfaceParams { return 0; constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{ - 1, // ABGR8 + 1, // ABGR8U + 1, // ABGR8S 1, // B5G6R5 1, // A2B10G10R10 1, // A1B5G5R5 @@ -135,6 +138,7 @@ struct SurfaceParams { 1, // RG16S 1, // RGB32F 1, // SRGBA8 + 1, // RG8S 1, // Z24S8 1, // S8Z24 1, // Z32F @@ -151,7 +155,8 @@ struct SurfaceParams { return 0; constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ - 32, // ABGR8 + 32, // ABGR8U + 32, // ABGR8S 16, // B5G6R5 32, // A2B10G10R10 16, // A1B5G5R5 @@ -184,6 +189,7 @@ struct SurfaceParams { 32, // RG16S 96, // RGB32F 32, // SRGBA8 + 16, // RG8S 32, // Z24S8 32, // S8Z24 32, // Z32F @@ -223,7 +229,9 @@ struct SurfaceParams { // gamma. case Tegra::RenderTargetFormat::RGBA8_SRGB: case Tegra::RenderTargetFormat::RGBA8_UNORM: - return PixelFormat::ABGR8; + return PixelFormat::ABGR8U; + case Tegra::RenderTargetFormat::RGBA8_SNORM: + return PixelFormat::ABGR8S; case Tegra::RenderTargetFormat::BGRA8_UNORM: return PixelFormat::BGRA8; case Tegra::RenderTargetFormat::RGB10_A2_UNORM: @@ -252,6 +260,8 @@ struct SurfaceParams { return PixelFormat::RG16; case Tegra::RenderTargetFormat::RG16_SNORM: return PixelFormat::RG16S; + case Tegra::RenderTargetFormat::RG8_SNORM: + return PixelFormat::RG8S; case Tegra::RenderTargetFormat::R16_FLOAT: return PixelFormat::R16F; case Tegra::RenderTargetFormat::R16_UNORM: @@ -275,7 +285,15 @@ struct SurfaceParams { // TODO(Subv): Properly implement this switch (format) { case Tegra::Texture::TextureFormat::A8R8G8B8: - return PixelFormat::ABGR8; + switch (component_type) { + case Tegra::Texture::ComponentType::UNORM: + return PixelFormat::ABGR8U; + case Tegra::Texture::ComponentType::SNORM: + return PixelFormat::ABGR8S; + } + LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}", + static_cast<u32>(component_type)); + UNREACHABLE(); case Tegra::Texture::TextureFormat::B5G6R5: return PixelFormat::B5G6R5; case Tegra::Texture::TextureFormat::A2B10G10R10: @@ -402,8 +420,10 @@ struct SurfaceParams { case Tegra::RenderTargetFormat::R16_UNORM: case Tegra::RenderTargetFormat::B5G6R5_UNORM: return ComponentType::UNorm; + case Tegra::RenderTargetFormat::RGBA8_SNORM: case Tegra::RenderTargetFormat::RG16_SNORM: case Tegra::RenderTargetFormat::R16_SNORM: + case Tegra::RenderTargetFormat::RG8_SNORM: return ComponentType::SNorm; case Tegra::RenderTargetFormat::RGBA16_FLOAT: case Tegra::RenderTargetFormat::R11G11B10_FLOAT: @@ -429,7 +449,7 @@ struct SurfaceParams { static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { switch (format) { case Tegra::FramebufferConfig::PixelFormat::ABGR8: - return PixelFormat::ABGR8; + return PixelFormat::ABGR8U; default: LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 06bfe799c..85297bd00 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -141,6 +141,15 @@ private: ExitMethod jmp = Scan(target, end, labels); return exit_method = ParallelExit(no_jmp, jmp); } + case OpCode::Id::SSY: { + // The SSY instruction uses a similar encoding as the BRA instruction. + ASSERT_MSG(instr.bra.constant_buffer == 0, + "Constant buffer SSY is not supported"); + u32 target = offset + instr.bra.GetBranchTarget(); + labels.insert(target); + // Continue scanning for an exit method. + break; + } } } } @@ -830,7 +839,11 @@ private: ASSERT_MSG(instr.pred.full_pred != Pred::NeverExecute, "NeverExecute predicate not implemented"); - if (instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { + // Some instructions (like SSY) don't have a predicate field, they are always + // unconditionally executed. + bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->GetId()); + + if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { shader.AddLine("if (" + GetPredicateCondition(instr.pred.pred_index, instr.negate_pred != 0) + ')'); @@ -1670,16 +1683,25 @@ private: break; } case OpCode::Id::SSY: { - // The SSY opcode tells the GPU where to re-converge divergent execution paths, we - // can ignore this when generating GLSL code. + // The SSY opcode tells the GPU where to re-converge divergent execution paths, it + // sets the target of the jump that the SYNC instruction will make. The SSY opcode + // has a similar structure to the BRA opcode. + ASSERT_MSG(instr.bra.constant_buffer == 0, "Constant buffer SSY is not supported"); + + u32 target = offset + instr.bra.GetBranchTarget(); + shader.AddLine("ssy_target = " + std::to_string(target) + "u;"); break; } - case OpCode::Id::SYNC: + case OpCode::Id::SYNC: { + // The SYNC opcode jumps to the address previously set by the SSY opcode ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always); + shader.AddLine("{ jmp_to = ssy_target; break; }"); + break; + } case OpCode::Id::DEPBAR: { - // TODO(Subv): Find out if we actually have to care about these instructions or if + // TODO(Subv): Find out if we actually have to care about this instruction or if // the GLSL compiler takes care of that for us. - LOG_WARNING(HW_GPU, "DEPBAR/SYNC instruction is stubbed"); + LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed"); break; } default: { @@ -1693,7 +1715,7 @@ private: } // Close the predicate condition scope. - if (instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { + if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) { --shader.scope; shader.AddLine('}'); } @@ -1744,6 +1766,7 @@ private: } else { labels.insert(subroutine.begin); shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;"); + shader.AddLine("uint ssy_target = 0u;"); shader.AddLine("while (true) {"); ++shader.scope; @@ -1759,7 +1782,7 @@ private: u32 compile_end = CompileRange(label, next_label); if (compile_end > next_label && compile_end != PROGRAM_END) { // This happens only when there is a label inside a IF/LOOP block - shader.AddLine("{ jmp_to = " + std::to_string(compile_end) + "u; break; }"); + shader.AddLine(" jmp_to = " + std::to_string(compile_end) + "u; break; }"); labels.emplace(compile_end); } |