summaryrefslogtreecommitdiffstats
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp47
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h106
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp39
3 files changed, 120 insertions, 72 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 15a33ed9b..d055b1dfa 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -92,7 +92,8 @@ struct FormatTuple {
}
static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{
- {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U
+ {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S
{GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm,
false}, // A2B10G10R10
@@ -132,6 +133,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
{GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S
{GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // SRGBA8
+ {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S
// DepthStencil formats
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
@@ -231,31 +233,33 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_bu
static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr),
SurfaceParams::MaxPixelFormat>
morton_to_gl_fns = {
- MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>,
- MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>,
- MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>,
- MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::RGBA32UI>,
- MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>,
- MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>,
- MortonCopy<true, PixelFormat::DXN2UNORM>, MortonCopy<true, PixelFormat::DXN2SNORM>,
- MortonCopy<true, PixelFormat::BC7U>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
- MortonCopy<true, PixelFormat::G8R8>, MortonCopy<true, PixelFormat::BGRA8>,
- MortonCopy<true, PixelFormat::RGBA32F>, MortonCopy<true, PixelFormat::RG32F>,
- MortonCopy<true, PixelFormat::R32F>, MortonCopy<true, PixelFormat::R16F>,
- MortonCopy<true, PixelFormat::R16UNORM>, MortonCopy<true, PixelFormat::R16S>,
- MortonCopy<true, PixelFormat::R16UI>, MortonCopy<true, PixelFormat::R16I>,
- MortonCopy<true, PixelFormat::RG16>, MortonCopy<true, PixelFormat::RG16F>,
- MortonCopy<true, PixelFormat::RG16UI>, MortonCopy<true, PixelFormat::RG16I>,
- MortonCopy<true, PixelFormat::RG16S>, MortonCopy<true, PixelFormat::RGB32F>,
- MortonCopy<true, PixelFormat::SRGBA8>, MortonCopy<true, PixelFormat::Z24S8>,
- MortonCopy<true, PixelFormat::S8Z24>, MortonCopy<true, PixelFormat::Z32F>,
- MortonCopy<true, PixelFormat::Z16>, MortonCopy<true, PixelFormat::Z32FS8>,
+ MortonCopy<true, PixelFormat::ABGR8U>, MortonCopy<true, PixelFormat::ABGR8S>,
+ MortonCopy<true, PixelFormat::B5G6R5>, MortonCopy<true, PixelFormat::A2B10G10R10>,
+ MortonCopy<true, PixelFormat::A1B5G5R5>, MortonCopy<true, PixelFormat::R8>,
+ MortonCopy<true, PixelFormat::RGBA16F>, MortonCopy<true, PixelFormat::R11FG11FB10F>,
+ MortonCopy<true, PixelFormat::RGBA32UI>, MortonCopy<true, PixelFormat::DXT1>,
+ MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>,
+ MortonCopy<true, PixelFormat::DXN1>, MortonCopy<true, PixelFormat::DXN2UNORM>,
+ MortonCopy<true, PixelFormat::DXN2SNORM>, MortonCopy<true, PixelFormat::BC7U>,
+ MortonCopy<true, PixelFormat::ASTC_2D_4X4>, MortonCopy<true, PixelFormat::G8R8>,
+ MortonCopy<true, PixelFormat::BGRA8>, MortonCopy<true, PixelFormat::RGBA32F>,
+ MortonCopy<true, PixelFormat::RG32F>, MortonCopy<true, PixelFormat::R32F>,
+ MortonCopy<true, PixelFormat::R16F>, MortonCopy<true, PixelFormat::R16UNORM>,
+ MortonCopy<true, PixelFormat::R16S>, MortonCopy<true, PixelFormat::R16UI>,
+ MortonCopy<true, PixelFormat::R16I>, MortonCopy<true, PixelFormat::RG16>,
+ MortonCopy<true, PixelFormat::RG16F>, MortonCopy<true, PixelFormat::RG16UI>,
+ MortonCopy<true, PixelFormat::RG16I>, MortonCopy<true, PixelFormat::RG16S>,
+ MortonCopy<true, PixelFormat::RGB32F>, MortonCopy<true, PixelFormat::SRGBA8>,
+ MortonCopy<true, PixelFormat::RG8S>, MortonCopy<true, PixelFormat::Z24S8>,
+ MortonCopy<true, PixelFormat::S8Z24>, MortonCopy<true, PixelFormat::Z32F>,
+ MortonCopy<true, PixelFormat::Z16>, MortonCopy<true, PixelFormat::Z32FS8>,
};
static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPUVAddr),
SurfaceParams::MaxPixelFormat>
gl_to_morton_fns = {
- MortonCopy<false, PixelFormat::ABGR8>,
+ MortonCopy<false, PixelFormat::ABGR8U>,
+ MortonCopy<false, PixelFormat::ABGR8S>,
MortonCopy<false, PixelFormat::B5G6R5>,
MortonCopy<false, PixelFormat::A2B10G10R10>,
MortonCopy<false, PixelFormat::A1B5G5R5>,
@@ -290,6 +294,7 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, Tegra::GPU
MortonCopy<false, PixelFormat::RG16S>,
MortonCopy<false, PixelFormat::RGB32F>,
MortonCopy<false, PixelFormat::SRGBA8>,
+ MortonCopy<false, PixelFormat::RG8S>,
MortonCopy<false, PixelFormat::Z24S8>,
MortonCopy<false, PixelFormat::S8Z24>,
MortonCopy<false, PixelFormat::Z32F>,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index e24ba8cfe..d7a43652e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -23,48 +23,50 @@ using PageMap = boost::icl::interval_map<u64, int>;
struct SurfaceParams {
enum class PixelFormat {
- ABGR8 = 0,
- B5G6R5 = 1,
- A2B10G10R10 = 2,
- A1B5G5R5 = 3,
- R8 = 4,
- RGBA16F = 5,
- R11FG11FB10F = 6,
- RGBA32UI = 7,
- DXT1 = 8,
- DXT23 = 9,
- DXT45 = 10,
- DXN1 = 11, // This is also known as BC4
- DXN2UNORM = 12,
- DXN2SNORM = 13,
- BC7U = 14,
- ASTC_2D_4X4 = 15,
- G8R8 = 16,
- BGRA8 = 17,
- RGBA32F = 18,
- RG32F = 19,
- R32F = 20,
- R16F = 21,
- R16UNORM = 22,
- R16S = 23,
- R16UI = 24,
- R16I = 25,
- RG16 = 26,
- RG16F = 27,
- RG16UI = 28,
- RG16I = 29,
- RG16S = 30,
- RGB32F = 31,
- SRGBA8 = 32,
+ ABGR8U = 0,
+ ABGR8S = 1,
+ B5G6R5 = 2,
+ A2B10G10R10 = 3,
+ A1B5G5R5 = 4,
+ R8 = 5,
+ RGBA16F = 6,
+ R11FG11FB10F = 7,
+ RGBA32UI = 8,
+ DXT1 = 9,
+ DXT23 = 10,
+ DXT45 = 11,
+ DXN1 = 12, // This is also known as BC4
+ DXN2UNORM = 13,
+ DXN2SNORM = 14,
+ BC7U = 15,
+ ASTC_2D_4X4 = 16,
+ G8R8 = 17,
+ BGRA8 = 18,
+ RGBA32F = 19,
+ RG32F = 20,
+ R32F = 21,
+ R16F = 22,
+ R16UNORM = 23,
+ R16S = 24,
+ R16UI = 25,
+ R16I = 26,
+ RG16 = 27,
+ RG16F = 28,
+ RG16UI = 29,
+ RG16I = 30,
+ RG16S = 31,
+ RGB32F = 32,
+ SRGBA8 = 33,
+ RG8S = 34,
MaxColorFormat,
// DepthStencil formats
- Z24S8 = 33,
- S8Z24 = 34,
- Z32F = 35,
- Z16 = 36,
- Z32FS8 = 37,
+ Z24S8 = 35,
+ S8Z24 = 36,
+ Z32F = 37,
+ Z16 = 38,
+ Z32FS8 = 39,
MaxDepthStencilFormat,
@@ -102,7 +104,8 @@ struct SurfaceParams {
return 0;
constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{
- 1, // ABGR8
+ 1, // ABGR8U
+ 1, // ABGR8S
1, // B5G6R5
1, // A2B10G10R10
1, // A1B5G5R5
@@ -135,6 +138,7 @@ struct SurfaceParams {
1, // RG16S
1, // RGB32F
1, // SRGBA8
+ 1, // RG8S
1, // Z24S8
1, // S8Z24
1, // Z32F
@@ -151,7 +155,8 @@ struct SurfaceParams {
return 0;
constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
- 32, // ABGR8
+ 32, // ABGR8U
+ 32, // ABGR8S
16, // B5G6R5
32, // A2B10G10R10
16, // A1B5G5R5
@@ -184,6 +189,7 @@ struct SurfaceParams {
32, // RG16S
96, // RGB32F
32, // SRGBA8
+ 16, // RG8S
32, // Z24S8
32, // S8Z24
32, // Z32F
@@ -223,7 +229,9 @@ struct SurfaceParams {
// gamma.
case Tegra::RenderTargetFormat::RGBA8_SRGB:
case Tegra::RenderTargetFormat::RGBA8_UNORM:
- return PixelFormat::ABGR8;
+ return PixelFormat::ABGR8U;
+ case Tegra::RenderTargetFormat::RGBA8_SNORM:
+ return PixelFormat::ABGR8S;
case Tegra::RenderTargetFormat::BGRA8_UNORM:
return PixelFormat::BGRA8;
case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
@@ -252,6 +260,8 @@ struct SurfaceParams {
return PixelFormat::RG16;
case Tegra::RenderTargetFormat::RG16_SNORM:
return PixelFormat::RG16S;
+ case Tegra::RenderTargetFormat::RG8_SNORM:
+ return PixelFormat::RG8S;
case Tegra::RenderTargetFormat::R16_FLOAT:
return PixelFormat::R16F;
case Tegra::RenderTargetFormat::R16_UNORM:
@@ -275,7 +285,15 @@ struct SurfaceParams {
// TODO(Subv): Properly implement this
switch (format) {
case Tegra::Texture::TextureFormat::A8R8G8B8:
- return PixelFormat::ABGR8;
+ switch (component_type) {
+ case Tegra::Texture::ComponentType::UNORM:
+ return PixelFormat::ABGR8U;
+ case Tegra::Texture::ComponentType::SNORM:
+ return PixelFormat::ABGR8S;
+ }
+ LOG_CRITICAL(HW_GPU, "Unimplemented component_type={}",
+ static_cast<u32>(component_type));
+ UNREACHABLE();
case Tegra::Texture::TextureFormat::B5G6R5:
return PixelFormat::B5G6R5;
case Tegra::Texture::TextureFormat::A2B10G10R10:
@@ -402,8 +420,10 @@ struct SurfaceParams {
case Tegra::RenderTargetFormat::R16_UNORM:
case Tegra::RenderTargetFormat::B5G6R5_UNORM:
return ComponentType::UNorm;
+ case Tegra::RenderTargetFormat::RGBA8_SNORM:
case Tegra::RenderTargetFormat::RG16_SNORM:
case Tegra::RenderTargetFormat::R16_SNORM:
+ case Tegra::RenderTargetFormat::RG8_SNORM:
return ComponentType::SNorm;
case Tegra::RenderTargetFormat::RGBA16_FLOAT:
case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
@@ -429,7 +449,7 @@ struct SurfaceParams {
static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
switch (format) {
case Tegra::FramebufferConfig::PixelFormat::ABGR8:
- return PixelFormat::ABGR8;
+ return PixelFormat::ABGR8U;
default:
LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 06bfe799c..85297bd00 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -141,6 +141,15 @@ private:
ExitMethod jmp = Scan(target, end, labels);
return exit_method = ParallelExit(no_jmp, jmp);
}
+ case OpCode::Id::SSY: {
+ // The SSY instruction uses a similar encoding as the BRA instruction.
+ ASSERT_MSG(instr.bra.constant_buffer == 0,
+ "Constant buffer SSY is not supported");
+ u32 target = offset + instr.bra.GetBranchTarget();
+ labels.insert(target);
+ // Continue scanning for an exit method.
+ break;
+ }
}
}
}
@@ -830,7 +839,11 @@ private:
ASSERT_MSG(instr.pred.full_pred != Pred::NeverExecute,
"NeverExecute predicate not implemented");
- if (instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) {
+ // Some instructions (like SSY) don't have a predicate field, they are always
+ // unconditionally executed.
+ bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->GetId());
+
+ if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) {
shader.AddLine("if (" +
GetPredicateCondition(instr.pred.pred_index, instr.negate_pred != 0) +
')');
@@ -1670,16 +1683,25 @@ private:
break;
}
case OpCode::Id::SSY: {
- // The SSY opcode tells the GPU where to re-converge divergent execution paths, we
- // can ignore this when generating GLSL code.
+ // The SSY opcode tells the GPU where to re-converge divergent execution paths, it
+ // sets the target of the jump that the SYNC instruction will make. The SSY opcode
+ // has a similar structure to the BRA opcode.
+ ASSERT_MSG(instr.bra.constant_buffer == 0, "Constant buffer SSY is not supported");
+
+ u32 target = offset + instr.bra.GetBranchTarget();
+ shader.AddLine("ssy_target = " + std::to_string(target) + "u;");
break;
}
- case OpCode::Id::SYNC:
+ case OpCode::Id::SYNC: {
+ // The SYNC opcode jumps to the address previously set by the SSY opcode
ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always);
+ shader.AddLine("{ jmp_to = ssy_target; break; }");
+ break;
+ }
case OpCode::Id::DEPBAR: {
- // TODO(Subv): Find out if we actually have to care about these instructions or if
+ // TODO(Subv): Find out if we actually have to care about this instruction or if
// the GLSL compiler takes care of that for us.
- LOG_WARNING(HW_GPU, "DEPBAR/SYNC instruction is stubbed");
+ LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed");
break;
}
default: {
@@ -1693,7 +1715,7 @@ private:
}
// Close the predicate condition scope.
- if (instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) {
+ if (can_be_predicated && instr.pred.pred_index != static_cast<u64>(Pred::UnusedIndex)) {
--shader.scope;
shader.AddLine('}');
}
@@ -1744,6 +1766,7 @@ private:
} else {
labels.insert(subroutine.begin);
shader.AddLine("uint jmp_to = " + std::to_string(subroutine.begin) + "u;");
+ shader.AddLine("uint ssy_target = 0u;");
shader.AddLine("while (true) {");
++shader.scope;
@@ -1759,7 +1782,7 @@ private:
u32 compile_end = CompileRange(label, next_label);
if (compile_end > next_label && compile_end != PROGRAM_END) {
// This happens only when there is a label inside a IF/LOOP block
- shader.AddLine("{ jmp_to = " + std::to_string(compile_end) + "u; break; }");
+ shader.AddLine(" jmp_to = " + std::to_string(compile_end) + "u; break; }");
labels.emplace(compile_end);
}