summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/shader_bytecode.h20
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp127
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h7
-rw-r--r--src/video_core/shader/decode/shift.cpp113
4 files changed, 150 insertions, 117 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 81b6d9eff..402869fde 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -624,6 +624,19 @@ enum class ShuffleOperation : u64 {
Bfly = 3, // shuffleXorNV
};
+enum class ShfType : u64 {
+ Bits32 = 0,
+ U64 = 2,
+ S64 = 3,
+};
+
+enum class ShfXmode : u64 {
+ None = 0,
+ HI = 1,
+ X = 2,
+ XHI = 3,
+};
+
union Instruction {
constexpr Instruction& operator=(const Instruction& instr) {
value = instr.value;
@@ -776,6 +789,13 @@ union Instruction {
} shr;
union {
+ BitField<37, 2, ShfType> type;
+ BitField<48, 2, ShfXmode> xmode;
+ BitField<50, 1, u64> wrap;
+ BitField<20, 6, u64> immediate;
+ } shf;
+
+ union {
BitField<39, 5, u64> shift_amount;
BitField<48, 1, u64> negate_b;
BitField<49, 1, u64> negate_a;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 362942e09..46a7433ea 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -248,9 +248,6 @@ void RasterizerOpenGL::SetupVertexInstances(GLuint vao) {
}
GLintptr RasterizerOpenGL::SetupIndexBuffer() {
- if (accelerate_draw != AccelDraw::Indexed) {
- return 0;
- }
MICROPROFILE_SCOPE(OpenGL_Index);
const auto& regs = system.GPU().Maxwell3D().regs;
const std::size_t size = CalculateIndexBufferSize();
@@ -546,7 +543,8 @@ void RasterizerOpenGL::Clear() {
}
}
-void RasterizerOpenGL::DrawPrelude() {
+void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
+ MICROPROFILE_SCOPE(OpenGL_Drawing);
auto& gpu = system.GPU().Maxwell3D();
SyncRasterizeEnable(state);
@@ -567,9 +565,6 @@ void RasterizerOpenGL::DrawPrelude() {
buffer_cache.Acquire();
- // Draw the vertex batch
- const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
-
std::size_t buffer_size = CalculateVertexArraysSize();
// Add space for index buffer
@@ -596,7 +591,11 @@ void RasterizerOpenGL::DrawPrelude() {
// Upload vertex and index data.
SetupVertexBuffer(vao);
SetupVertexInstances(vao);
- index_buffer_offset = SetupIndexBuffer();
+
+ GLintptr index_buffer_offset;
+ if (is_indexed) {
+ index_buffer_offset = SetupIndexBuffer();
+ }
// Prepare packed bindings.
bind_ubo_pushbuffer.Setup();
@@ -630,6 +629,7 @@ void RasterizerOpenGL::DrawPrelude() {
// As all cached buffers are invalidated, we need to recheck their state.
gpu.dirty.ResetVertexArrays();
}
+ gpu.dirty.memory_general = false;
shader_program_manager->ApplyTo(state);
state.Apply();
@@ -637,106 +637,33 @@ void RasterizerOpenGL::DrawPrelude() {
if (texture_cache.TextureBarrier()) {
glTextureBarrier();
}
-}
-
-struct DrawParams {
- bool is_indexed{};
- bool is_instanced{};
- GLenum primitive_mode{};
- GLint count{};
- GLint base_vertex{};
-
- // Indexed settings
- GLenum index_format{};
- GLintptr index_buffer_offset{};
-
- // Instanced setting
- GLint num_instances{};
- GLint base_instance{};
-
- void DispatchDraw() {
- if (is_indexed) {
- const auto index_buffer_ptr = reinterpret_cast<const void*>(index_buffer_offset);
- if (is_instanced) {
- glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, count, index_format,
- index_buffer_ptr, num_instances,
- base_vertex, base_instance);
- } else {
- glDrawElementsBaseVertex(primitive_mode, count, index_format, index_buffer_ptr,
- base_vertex);
- }
- } else {
- if (is_instanced) {
- glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, count, num_instances,
- base_instance);
- } else {
- glDrawArrays(primitive_mode, base_vertex, count);
- }
- }
- }
-};
-
-bool RasterizerOpenGL::DrawBatch(bool is_indexed) {
- accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
- MICROPROFILE_SCOPE(OpenGL_Drawing);
-
- DrawPrelude();
-
- auto& maxwell3d = system.GPU().Maxwell3D();
- const auto& regs = maxwell3d.regs;
- const auto current_instance = maxwell3d.state.current_instance;
- DrawParams draw_call{};
- draw_call.is_indexed = is_indexed;
- draw_call.num_instances = static_cast<GLint>(1);
- draw_call.base_instance = static_cast<GLint>(current_instance);
- draw_call.is_instanced = current_instance > 0;
- draw_call.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
- if (draw_call.is_indexed) {
- draw_call.count = static_cast<GLint>(regs.index_array.count);
- draw_call.base_vertex = static_cast<GLint>(regs.vb_element_base);
- draw_call.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
- draw_call.index_buffer_offset = index_buffer_offset;
+ const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
+ const GLsizei num_instances =
+ static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1);
+ if (is_indexed) {
+ const GLenum index_format = MaxwellToGL::IndexFormat(gpu.regs.index_array.format);
+ const GLint base_vertex = static_cast<GLint>(gpu.regs.vb_element_base);
+ const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.index_array.count);
+ glDrawElementsInstancedBaseVertexBaseInstance(
+ primitive_mode, num_vertices, index_format,
+ reinterpret_cast<const void*>(index_buffer_offset), num_instances, base_vertex,
+ base_instance);
} else {
- draw_call.count = static_cast<GLint>(regs.vertex_buffer.count);
- draw_call.base_vertex = static_cast<GLint>(regs.vertex_buffer.first);
+ const GLint base_vertex = static_cast<GLint>(gpu.regs.vertex_buffer.first);
+ const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.vertex_buffer.count);
+ glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices, num_instances,
+ base_instance);
}
- draw_call.DispatchDraw();
+}
- maxwell3d.dirty.memory_general = false;
- accelerate_draw = AccelDraw::Disabled;
+bool RasterizerOpenGL::DrawBatch(bool is_indexed) {
+ Draw(is_indexed, false);
return true;
}
bool RasterizerOpenGL::DrawMultiBatch(bool is_indexed) {
- accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
-
- MICROPROFILE_SCOPE(OpenGL_Drawing);
-
- DrawPrelude();
-
- auto& maxwell3d = system.GPU().Maxwell3D();
- const auto& regs = maxwell3d.regs;
- const auto& draw_setup = maxwell3d.mme_draw;
- DrawParams draw_call{};
- draw_call.is_indexed = is_indexed;
- draw_call.num_instances = static_cast<GLint>(draw_setup.instance_count);
- draw_call.base_instance = static_cast<GLint>(regs.vb_base_instance);
- draw_call.is_instanced = draw_setup.instance_count > 1;
- draw_call.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
- if (draw_call.is_indexed) {
- draw_call.count = static_cast<GLint>(regs.index_array.count);
- draw_call.base_vertex = static_cast<GLint>(regs.vb_element_base);
- draw_call.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
- draw_call.index_buffer_offset = index_buffer_offset;
- } else {
- draw_call.count = static_cast<GLint>(regs.vertex_buffer.count);
- draw_call.base_vertex = static_cast<GLint>(regs.vertex_buffer.first);
- }
- draw_call.DispatchDraw();
-
- maxwell3d.dirty.memory_general = false;
- accelerate_draw = AccelDraw::Disabled;
+ Draw(is_indexed, true);
return true;
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 6a27cf497..0501f3828 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -103,7 +103,7 @@ private:
std::size_t size);
/// Syncs all the state, shaders, render targets and textures setting before a draw call.
- void DrawPrelude();
+ void Draw(bool is_indexed, bool is_instanced);
/// Configures the current textures to use for the draw command.
void SetupDrawTextures(std::size_t stage_index, const Shader& shader);
@@ -220,12 +220,7 @@ private:
GLintptr SetupIndexBuffer();
- GLintptr index_buffer_offset;
-
void SetupShaders(GLenum primitive_mode);
-
- enum class AccelDraw { Disabled, Arrays, Indexed };
- AccelDraw accelerate_draw = AccelDraw::Disabled;
};
} // namespace OpenGL
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index d419e9c45..3b391d3e6 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -10,8 +10,80 @@
namespace VideoCommon::Shader {
+using std::move;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
+using Tegra::Shader::ShfType;
+using Tegra::Shader::ShfXmode;
+
+namespace {
+
+Node IsFull(Node shift) {
+ return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32));
+}
+
+Node Shift(OperationCode opcode, Node value, Node shift) {
+ Node is_full = Operation(OperationCode::LogicalIEqual, shift, Immediate(32));
+ Node shifted = Operation(opcode, move(value), shift);
+ return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted));
+}
+
+Node ClampShift(Node shift, s32 size = 32) {
+ shift = Operation(OperationCode::IMax, move(shift), Immediate(0));
+ return Operation(OperationCode::IMin, move(shift), Immediate(size));
+}
+
+Node WrapShift(Node shift, s32 size = 32) {
+ return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1));
+}
+
+Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) {
+ // These values are used when the shift value is less than 32
+ Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift);
+ Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift);
+ Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low));
+
+ if (type == ShfType::Bits32) {
+ // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
+ return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less));
+ }
+
+ // And these when it's larger than or 32
+ const bool is_signed = type == ShfType::S64;
+ const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed);
+ Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
+ Node greater = Shift(opcode, high, move(reduced));
+
+ Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
+ Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
+
+ Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
+ return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
+}
+
+Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) {
+ // These values are used when the shift value is less than 32
+ Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift);
+ Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift);
+ Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high));
+
+ if (type == ShfType::Bits32) {
+ // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
+ return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less));
+ }
+
+ // And these when it's larger than or 32
+ Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
+ Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced));
+
+ Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
+ Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
+
+ Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
+ return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
+}
+
+} // Anonymous namespace
u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
@@ -28,29 +100,48 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
}
}();
- switch (opcode->get().GetId()) {
+ switch (const auto opid = opcode->get().GetId(); opid) {
case OpCode::Id::SHR_C:
case OpCode::Id::SHR_R:
case OpCode::Id::SHR_IMM: {
- if (instr.shr.wrap) {
- op_b = Operation(OperationCode::UBitwiseAnd, std::move(op_b), Immediate(0x1f));
- } else {
- op_b = Operation(OperationCode::IMax, std::move(op_b), Immediate(0));
- op_b = Operation(OperationCode::IMin, std::move(op_b), Immediate(31));
- }
+ op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b));
Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed,
- std::move(op_a), std::move(op_b));
+ move(op_a), move(op_b));
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, std::move(value));
+ SetRegister(bb, instr.gpr0, move(value));
break;
}
case OpCode::Id::SHL_C:
case OpCode::Id::SHL_R:
case OpCode::Id::SHL_IMM: {
- const Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b);
+ Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b);
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, value);
+ SetRegister(bb, instr.gpr0, move(value));
+ break;
+ }
+ case OpCode::Id::SHF_RIGHT_R:
+ case OpCode::Id::SHF_RIGHT_IMM:
+ case OpCode::Id::SHF_LEFT_R:
+ case OpCode::Id::SHF_LEFT_IMM: {
+ UNIMPLEMENTED_IF(instr.generates_cc);
+ UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}",
+ static_cast<int>(instr.shf.xmode.Value()));
+
+ if (instr.is_b_imm) {
+ op_b = Immediate(static_cast<u32>(instr.shf.immediate));
+ }
+ const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64;
+ Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size);
+
+ Node negated_shift = Operation(OperationCode::INegate, shift);
+ Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32));
+
+ const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM;
+ Node value = (is_right ? ShiftRight : ShiftLeft)(
+ move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type);
+
+ SetRegister(bb, instr.gpr0, move(value));
break;
}
default: