summaryrefslogtreecommitdiffstats
path: root/src/video_core/engines
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/engines')
-rw-r--r--src/video_core/engines/engine_upload.cpp8
-rw-r--r--src/video_core/engines/engine_upload.h4
-rw-r--r--src/video_core/engines/fermi_2d.cpp90
-rw-r--r--src/video_core/engines/fermi_2d.h331
-rw-r--r--src/video_core/engines/kepler_compute.cpp26
-rw-r--r--src/video_core/engines/kepler_compute.h5
-rw-r--r--src/video_core/engines/kepler_memory.cpp4
-rw-r--r--src/video_core/engines/kepler_memory.h2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp321
-rw-r--r--src/video_core/engines/maxwell_3d.h201
-rw-r--r--src/video_core/engines/maxwell_dma.cpp11
-rw-r--r--src/video_core/engines/maxwell_dma.h16
-rw-r--r--src/video_core/engines/shader_bytecode.h192
-rw-r--r--src/video_core/engines/shader_header.h13
14 files changed, 667 insertions, 557 deletions
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
index d44ad0cd8..71d7e1473 100644
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -11,16 +11,16 @@
namespace Tegra::Engines::Upload {
-State::State(MemoryManager& memory_manager, Registers& regs)
- : regs{regs}, memory_manager{memory_manager} {}
+State::State(MemoryManager& memory_manager_, Registers& regs_)
+ : regs{regs_}, memory_manager{memory_manager_} {}
State::~State() = default;
-void State::ProcessExec(const bool is_linear) {
+void State::ProcessExec(const bool is_linear_) {
write_offset = 0;
copy_size = regs.line_length_in * regs.line_count;
inner_buffer.resize(copy_size);
- this->is_linear = is_linear;
+ is_linear = is_linear_;
}
void State::ProcessData(const u32 data, const bool is_last_call) {
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h
index 462da419e..1c7f1effa 100644
--- a/src/video_core/engines/engine_upload.h
+++ b/src/video_core/engines/engine_upload.h
@@ -54,10 +54,10 @@ struct Registers {
class State {
public:
- State(MemoryManager& memory_manager, Registers& regs);
+ explicit State(MemoryManager& memory_manager_, Registers& regs_);
~State();
- void ProcessExec(bool is_linear);
+ void ProcessExec(bool is_linear_);
void ProcessData(u32 data, bool is_last_call);
private:
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 9409c4075..a01d334ad 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -10,7 +10,11 @@
namespace Tegra::Engines {
-Fermi2D::Fermi2D() = default;
+Fermi2D::Fermi2D() {
+ // Nvidia's OpenGL driver seems to assume these values
+ regs.src.depth = 1;
+ regs.dst.depth = 1;
+}
Fermi2D::~Fermi2D() = default;
@@ -21,79 +25,43 @@ void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid Fermi2D register, increase the size of the Regs structure");
-
regs.reg_array[method] = method_argument;
- switch (method) {
- // Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit,
- // so trigger on the second 32-bit write.
- case FERMI2D_REG_INDEX(blit_src_y) + 1: {
- HandleSurfaceCopy();
- break;
- }
+ if (method == FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1) {
+ Blit();
}
}
void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) {
- for (std::size_t i = 0; i < amount; i++) {
- CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
+ for (u32 i = 0; i < amount; ++i) {
+ CallMethod(method, base_start[i], methods_pending - i <= 1);
}
}
-static std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) {
- const u32 line_a = src_2 - src_1;
- const u32 line_b = dst_2 - dst_1;
- const u32 excess = std::max<s32>(0, line_a - src_line + src_1);
- return {line_b - (excess * line_b) / line_a, excess};
-}
-
-void Fermi2D::HandleSurfaceCopy() {
- LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}",
- static_cast<u32>(regs.operation));
+void Fermi2D::Blit() {
+ LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
+ regs.src.Address(), regs.dst.Address());
- // TODO(Subv): Only raw copies are implemented.
- ASSERT(regs.operation == Operation::SrcCopy);
+ UNIMPLEMENTED_IF_MSG(regs.operation != Operation::SrcCopy, "Operation is not copy");
+ UNIMPLEMENTED_IF_MSG(regs.src.layer != 0, "Source layer is not zero");
+ UNIMPLEMENTED_IF_MSG(regs.dst.layer != 0, "Destination layer is not zero");
+ UNIMPLEMENTED_IF_MSG(regs.src.depth != 1, "Source depth is not one");
+ UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
- const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)};
- const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)};
- u32 src_blit_x2, src_blit_y2;
- if (regs.blit_control.origin == Origin::Corner) {
- src_blit_x2 =
- static_cast<u32>((regs.blit_src_x + (regs.blit_du_dx * regs.blit_dst_width)) >> 32);
- src_blit_y2 =
- static_cast<u32>((regs.blit_src_y + (regs.blit_dv_dy * regs.blit_dst_height)) >> 32);
- } else {
- src_blit_x2 = static_cast<u32>((regs.blit_src_x >> 32) + regs.blit_dst_width);
- src_blit_y2 = static_cast<u32>((regs.blit_src_y >> 32) + regs.blit_dst_height);
- }
- u32 dst_blit_x2 = regs.blit_dst_x + regs.blit_dst_width;
- u32 dst_blit_y2 = regs.blit_dst_y + regs.blit_dst_height;
- const auto [new_dst_w, src_excess_x] =
- DelimitLine(src_blit_x1, src_blit_x2, regs.blit_dst_x, dst_blit_x2, regs.src.width);
- const auto [new_dst_h, src_excess_y] =
- DelimitLine(src_blit_y1, src_blit_y2, regs.blit_dst_y, dst_blit_y2, regs.src.height);
- dst_blit_x2 = new_dst_w + regs.blit_dst_x;
- src_blit_x2 = src_blit_x2 - src_excess_x;
- dst_blit_y2 = new_dst_h + regs.blit_dst_y;
- src_blit_y2 = src_blit_y2 - src_excess_y;
- const auto [new_src_w, dst_excess_x] =
- DelimitLine(regs.blit_dst_x, dst_blit_x2, src_blit_x1, src_blit_x2, regs.dst.width);
- const auto [new_src_h, dst_excess_y] =
- DelimitLine(regs.blit_dst_y, dst_blit_y2, src_blit_y1, src_blit_y2, regs.dst.height);
- src_blit_x2 = new_src_w + src_blit_x1;
- dst_blit_x2 = dst_blit_x2 - dst_excess_x;
- src_blit_y2 = new_src_h + src_blit_y1;
- dst_blit_y2 = dst_blit_y2 - dst_excess_y;
- const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
- const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2,
- dst_blit_y2};
- const Config copy_config{
+ const auto& args = regs.pixels_from_memory;
+ const Config config{
.operation = regs.operation,
- .filter = regs.blit_control.filter,
- .src_rect = src_rect,
- .dst_rect = dst_rect,
+ .filter = args.sample_mode.filter,
+ .dst_x0 = args.dst_x0,
+ .dst_y0 = args.dst_y0,
+ .dst_x1 = args.dst_x0 + args.dst_width,
+ .dst_y1 = args.dst_y0 + args.dst_height,
+ .src_x0 = static_cast<s32>(args.src_x0 >> 32),
+ .src_y0 = static_cast<s32>(args.src_y0 >> 32),
+ .src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
+ .src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
};
- if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) {
+ if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) {
UNIMPLEMENTED();
}
}
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 0909709ec..81522988e 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -53,8 +53,8 @@ public:
};
enum class Filter : u32 {
- PointSample = 0, // Nearest
- Linear = 1,
+ Point = 0,
+ Bilinear = 1,
};
enum class Operation : u32 {
@@ -67,88 +67,235 @@ public:
BlendPremult = 6,
};
- struct Regs {
- static constexpr std::size_t NUM_REGS = 0x258;
+ enum class MemoryLayout : u32 {
+ BlockLinear = 0,
+ Pitch = 1,
+ };
- struct Surface {
- RenderTargetFormat format;
- BitField<0, 1, u32> linear;
- union {
- BitField<0, 4, u32> block_width;
- BitField<4, 4, u32> block_height;
- BitField<8, 4, u32> block_depth;
- };
- u32 depth;
- u32 layer;
- u32 pitch;
- u32 width;
- u32 height;
- u32 address_high;
- u32 address_low;
-
- GPUVAddr Address() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
- }
-
- u32 BlockWidth() const {
- return block_width.Value();
- }
-
- u32 BlockHeight() const {
- return block_height.Value();
- }
-
- u32 BlockDepth() const {
- return block_depth.Value();
- }
- };
- static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
+ enum class CpuIndexWrap : u32 {
+ Wrap = 0,
+ NoWrap = 1,
+ };
+ struct Surface {
+ RenderTargetFormat format;
+ MemoryLayout linear;
union {
- struct {
- INSERT_UNION_PADDING_WORDS(0x80);
+ BitField<0, 4, u32> block_width;
+ BitField<4, 4, u32> block_height;
+ BitField<8, 4, u32> block_depth;
+ };
+ u32 depth;
+ u32 layer;
+ u32 pitch;
+ u32 width;
+ u32 height;
+ u32 addr_upper;
+ u32 addr_lower;
+
+ [[nodiscard]] constexpr GPUVAddr Address() const noexcept {
+ return (static_cast<GPUVAddr>(addr_upper) << 32) | static_cast<GPUVAddr>(addr_lower);
+ }
+ };
+ static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
- Surface dst;
+ enum class SectorPromotion : u32 {
+ NoPromotion = 0,
+ PromoteTo2V = 1,
+ PromoteTo2H = 2,
+ PromoteTo4 = 3,
+ };
+
+ enum class NumTpcs : u32 {
+ All = 0,
+ One = 1,
+ };
- INSERT_UNION_PADDING_WORDS(2);
+ enum class RenderEnableMode : u32 {
+ False = 0,
+ True = 1,
+ Conditional = 2,
+ RenderIfEqual = 3,
+ RenderIfNotEqual = 4,
+ };
- Surface src;
+ enum class ColorKeyFormat : u32 {
+ A16R56G6B5 = 0,
+ A1R5G55B5 = 1,
+ A8R8G8B8 = 2,
+ A2R10G10B10 = 3,
+ Y8 = 4,
+ Y16 = 5,
+ Y32 = 6,
+ };
- INSERT_UNION_PADDING_WORDS(0x15);
+ union Beta4 {
+ BitField<0, 8, u32> b;
+ BitField<8, 8, u32> g;
+ BitField<16, 8, u32> r;
+ BitField<24, 8, u32> a;
+ };
- Operation operation;
+ struct Point {
+ u32 x;
+ u32 y;
+ };
- INSERT_UNION_PADDING_WORDS(0x177);
+ enum class PatternSelect : u32 {
+ MonoChrome8x8 = 0,
+ MonoChrome64x1 = 1,
+ MonoChrome1x64 = 2,
+ Color = 3,
+ };
+ enum class NotifyType : u32 {
+ WriteOnly = 0,
+ WriteThenAwaken = 1,
+ };
+
+ enum class MonochromePatternColorFormat : u32 {
+ A8X8R8G6B5 = 0,
+ A1R5G5B5 = 1,
+ A8R8G8B8 = 2,
+ A8Y8 = 3,
+ A8X8Y16 = 4,
+ Y32 = 5,
+ };
+
+ enum class MonochromePatternFormat : u32 {
+ CGA6_M1 = 0,
+ LE_M1 = 1,
+ };
+
+ union Regs {
+ static constexpr std::size_t NUM_REGS = 0x258;
+ struct {
+ u32 object;
+ INSERT_UNION_PADDING_WORDS(0x3F);
+ u32 no_operation;
+ NotifyType notify;
+ INSERT_UNION_PADDING_WORDS(0x2);
+ u32 wait_for_idle;
+ INSERT_UNION_PADDING_WORDS(0xB);
+ u32 pm_trigger;
+ INSERT_UNION_PADDING_WORDS(0xF);
+ u32 context_dma_notify;
+ u32 dst_context_dma;
+ u32 src_context_dma;
+ u32 semaphore_context_dma;
+ INSERT_UNION_PADDING_WORDS(0x1C);
+ Surface dst;
+ CpuIndexWrap pixels_from_cpu_index_wrap;
+ u32 kind2d_check_enable;
+ Surface src;
+ SectorPromotion pixels_from_memory_sector_promotion;
+ INSERT_UNION_PADDING_WORDS(0x1);
+ NumTpcs num_tpcs;
+ u32 render_enable_addr_upper;
+ u32 render_enable_addr_lower;
+ RenderEnableMode render_enable_mode;
+ INSERT_UNION_PADDING_WORDS(0x4);
+ u32 clip_x0;
+ u32 clip_y0;
+ u32 clip_width;
+ u32 clip_height;
+ BitField<0, 1, u32> clip_enable;
+ BitField<0, 3, ColorKeyFormat> color_key_format;
+ u32 color_key;
+ BitField<0, 1, u32> color_key_enable;
+ BitField<0, 8, u32> rop;
+ u32 beta1;
+ Beta4 beta4;
+ Operation operation;
+ union {
+ BitField<0, 6, u32> x;
+ BitField<8, 6, u32> y;
+ } pattern_offset;
+ BitField<0, 2, PatternSelect> pattern_select;
+ INSERT_UNION_PADDING_WORDS(0xC);
+ struct {
+ BitField<0, 3, MonochromePatternColorFormat> color_format;
+ BitField<0, 1, MonochromePatternFormat> format;
+ u32 color0;
+ u32 color1;
+ u32 pattern0;
+ u32 pattern1;
+ } monochrome_pattern;
+ struct {
+ std::array<u32, 0x40> X8R8G8B8;
+ std::array<u32, 0x20> R5G6B5;
+ std::array<u32, 0x20> X1R5G5B5;
+ std::array<u32, 0x10> Y8;
+ } color_pattern;
+ INSERT_UNION_PADDING_WORDS(0x10);
+ struct {
+ u32 prim_mode;
+ u32 prim_color_format;
+ u32 prim_color;
+ u32 line_tie_break_bits;
+ INSERT_UNION_PADDING_WORDS(0x14);
+ u32 prim_point_xy;
+ INSERT_UNION_PADDING_WORDS(0x7);
+ std::array<Point, 0x40> prim_point;
+ } render_solid;
+ struct {
+ u32 data_type;
+ u32 color_format;
+ u32 index_format;
+ u32 mono_format;
+ u32 wrap;
+ u32 color0;
+ u32 color1;
+ u32 mono_opacity;
+ INSERT_UNION_PADDING_WORDS(0x6);
+ u32 src_width;
+ u32 src_height;
+ u32 dx_du_frac;
+ u32 dx_du_int;
+ u32 dx_dv_frac;
+ u32 dy_dv_int;
+ u32 dst_x0_frac;
+ u32 dst_x0_int;
+ u32 dst_y0_frac;
+ u32 dst_y0_int;
+ u32 data;
+ } pixels_from_cpu;
+ INSERT_UNION_PADDING_WORDS(0x3);
+ u32 big_endian_control;
+ INSERT_UNION_PADDING_WORDS(0x3);
+ struct {
+ BitField<0, 3, u32> block_shape;
+ BitField<0, 5, u32> corral_size;
+ BitField<0, 1, u32> safe_overlap;
union {
- u32 raw;
BitField<0, 1, Origin> origin;
BitField<4, 1, Filter> filter;
- } blit_control;
-
+ } sample_mode;
INSERT_UNION_PADDING_WORDS(0x8);
-
- u32 blit_dst_x;
- u32 blit_dst_y;
- u32 blit_dst_width;
- u32 blit_dst_height;
- u64 blit_du_dx;
- u64 blit_dv_dy;
- u64 blit_src_x;
- u64 blit_src_y;
-
- INSERT_UNION_PADDING_WORDS(0x21);
- };
- std::array<u32, NUM_REGS> reg_array;
+ s32 dst_x0;
+ s32 dst_y0;
+ s32 dst_width;
+ s32 dst_height;
+ s64 du_dx;
+ s64 dv_dy;
+ s64 src_x0;
+ s64 src_y0;
+ } pixels_from_memory;
};
+ std::array<u32, NUM_REGS> reg_array;
} regs{};
struct Config {
- Operation operation{};
- Filter filter{};
- Common::Rectangle<u32> src_rect;
- Common::Rectangle<u32> dst_rect;
+ Operation operation;
+ Filter filter;
+ s32 dst_x0;
+ s32 dst_y0;
+ s32 dst_x1;
+ s32 dst_y1;
+ s32 src_x0;
+ s32 src_y0;
+ s32 src_x1;
+ s32 src_y1;
};
private:
@@ -156,25 +303,49 @@ private:
/// Performs the copy from the source surface to the destination surface as configured in the
/// registers.
- void HandleSurfaceCopy();
+ void Blit();
};
#define ASSERT_REG_POSITION(field_name, position) \
- static_assert(offsetof(Fermi2D::Regs, field_name) == position * 4, \
+ static_assert(offsetof(Fermi2D::Regs, field_name) == position, \
"Field " #field_name " has invalid position")
-ASSERT_REG_POSITION(dst, 0x80);
-ASSERT_REG_POSITION(src, 0x8C);
-ASSERT_REG_POSITION(operation, 0xAB);
-ASSERT_REG_POSITION(blit_control, 0x223);
-ASSERT_REG_POSITION(blit_dst_x, 0x22c);
-ASSERT_REG_POSITION(blit_dst_y, 0x22d);
-ASSERT_REG_POSITION(blit_dst_width, 0x22e);
-ASSERT_REG_POSITION(blit_dst_height, 0x22f);
-ASSERT_REG_POSITION(blit_du_dx, 0x230);
-ASSERT_REG_POSITION(blit_dv_dy, 0x232);
-ASSERT_REG_POSITION(blit_src_x, 0x234);
-ASSERT_REG_POSITION(blit_src_y, 0x236);
+ASSERT_REG_POSITION(object, 0x0);
+ASSERT_REG_POSITION(no_operation, 0x100);
+ASSERT_REG_POSITION(notify, 0x104);
+ASSERT_REG_POSITION(wait_for_idle, 0x110);
+ASSERT_REG_POSITION(pm_trigger, 0x140);
+ASSERT_REG_POSITION(context_dma_notify, 0x180);
+ASSERT_REG_POSITION(dst_context_dma, 0x184);
+ASSERT_REG_POSITION(src_context_dma, 0x188);
+ASSERT_REG_POSITION(semaphore_context_dma, 0x18C);
+ASSERT_REG_POSITION(dst, 0x200);
+ASSERT_REG_POSITION(pixels_from_cpu_index_wrap, 0x228);
+ASSERT_REG_POSITION(kind2d_check_enable, 0x22C);
+ASSERT_REG_POSITION(src, 0x230);
+ASSERT_REG_POSITION(pixels_from_memory_sector_promotion, 0x258);
+ASSERT_REG_POSITION(num_tpcs, 0x260);
+ASSERT_REG_POSITION(render_enable_addr_upper, 0x264);
+ASSERT_REG_POSITION(render_enable_addr_lower, 0x268);
+ASSERT_REG_POSITION(clip_x0, 0x280);
+ASSERT_REG_POSITION(clip_y0, 0x284);
+ASSERT_REG_POSITION(clip_width, 0x288);
+ASSERT_REG_POSITION(clip_height, 0x28c);
+ASSERT_REG_POSITION(clip_enable, 0x290);
+ASSERT_REG_POSITION(color_key_format, 0x294);
+ASSERT_REG_POSITION(color_key, 0x298);
+ASSERT_REG_POSITION(rop, 0x2A0);
+ASSERT_REG_POSITION(beta1, 0x2A4);
+ASSERT_REG_POSITION(beta4, 0x2A8);
+ASSERT_REG_POSITION(operation, 0x2AC);
+ASSERT_REG_POSITION(pattern_offset, 0x2B0);
+ASSERT_REG_POSITION(pattern_select, 0x2B4);
+ASSERT_REG_POSITION(monochrome_pattern, 0x2E8);
+ASSERT_REG_POSITION(color_pattern, 0x300);
+ASSERT_REG_POSITION(render_solid, 0x580);
+ASSERT_REG_POSITION(pixels_from_cpu, 0x800);
+ASSERT_REG_POSITION(big_endian_control, 0x870);
+ASSERT_REG_POSITION(pixels_from_memory, 0x880);
#undef ASSERT_REG_POSITION
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 898370739..ba387506e 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -58,24 +58,6 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun
}
}
-Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const {
- const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value();
- ASSERT(cbuf_mask[regs.tex_cb_index]);
-
- const auto& texinfo = launch_description.const_buffer_config[regs.tex_cb_index];
- ASSERT(texinfo.Address() != 0);
-
- const GPUVAddr address = texinfo.Address() + offset * sizeof(Texture::TextureHandle);
- ASSERT(address < texinfo.Address() + texinfo.size);
-
- const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(address)};
- return GetTextureInfo(tex_handle);
-}
-
-Texture::FullTextureInfo KeplerCompute::GetTextureInfo(Texture::TextureHandle tex_handle) const {
- return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
-}
-
u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
ASSERT(stage == ShaderType::Compute);
const auto& buffer = launch_description.const_buffer_config[const_buffer];
@@ -98,9 +80,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
const Texture::TextureHandle tex_handle{handle};
- const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
- SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
- result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
+ const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
+ const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
+
+ SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
+ result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
return result;
}
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 7f2500aab..51a041202 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -209,11 +209,6 @@ public:
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
u32 methods_pending) override;
- Texture::FullTextureInfo GetTexture(std::size_t offset) const;
-
- /// Given a texture handle, returns the TSC and TIC entries.
- Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
-
u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index dc71b2eec..9911140e9 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -14,8 +14,8 @@
namespace Tegra::Engines {
-KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager)
- : system{system}, upload_state{memory_manager, regs.upload} {}
+KeplerMemory::KeplerMemory(Core::System& system_, MemoryManager& memory_manager)
+ : system{system_}, upload_state{memory_manager, regs.upload} {}
KeplerMemory::~KeplerMemory() = default;
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index 5b7f71a00..62483589e 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -35,7 +35,7 @@ namespace Tegra::Engines {
class KeplerMemory final : public EngineInterface {
public:
- KeplerMemory(Core::System& system, MemoryManager& memory_manager);
+ explicit KeplerMemory(Core::System& system_, MemoryManager& memory_manager);
~KeplerMemory();
/// Write the value to the register identified by method.
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 57ebc785f..9be651e24 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -2,7 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <cinttypes>
#include <cstring>
#include <optional>
#include "common/assert.h"
@@ -124,6 +123,116 @@ void Maxwell3D::InitializeRegisterDefaults() {
mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true;
}
+void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call) {
+ if (executing_macro == 0) {
+ // A macro call must begin by writing the macro method's register, not its argument.
+ ASSERT_MSG((method % 2) == 0,
+ "Can't start macro execution by writing to the ARGS register");
+ executing_macro = method;
+ }
+
+ macro_params.insert(macro_params.end(), base_start, base_start + amount);
+
+ // Call the macro when there are no more parameters in the command buffer
+ if (is_last_call) {
+ CallMacroMethod(executing_macro, macro_params);
+ macro_params.clear();
+ }
+}
+
+u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
+ // Keep track of the register value in shadow_state when requested.
+ const auto control = shadow_state.shadow_ram_control;
+ if (control == Regs::ShadowRamControl::Track ||
+ control == Regs::ShadowRamControl::TrackWithFilter) {
+ shadow_state.reg_array[method] = argument;
+ return argument;
+ }
+ if (control == Regs::ShadowRamControl::Replay) {
+ return shadow_state.reg_array[method];
+ }
+ return argument;
+}
+
+void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) {
+ if (regs.reg_array[method] == argument) {
+ return;
+ }
+ regs.reg_array[method] = argument;
+
+ for (const auto& table : dirty.tables) {
+ dirty.flags[table[method]] = true;
+ }
+}
+
+void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument,
+ bool is_last_call) {
+ switch (method) {
+ case MAXWELL3D_REG_INDEX(wait_for_idle):
+ return rasterizer->WaitForIdle();
+ case MAXWELL3D_REG_INDEX(shadow_ram_control):
+ shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(nonshadow_argument);
+ return;
+ case MAXWELL3D_REG_INDEX(macros.data):
+ return macro_engine->AddCode(regs.macros.upload_address, argument);
+ case MAXWELL3D_REG_INDEX(macros.bind):
+ return ProcessMacroBind(argument);
+ case MAXWELL3D_REG_INDEX(firmware[4]):
+ return ProcessFirmwareCall4();
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[3]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[4]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[5]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[6]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[7]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[8]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[9]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[10]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[11]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]):
+ return StartCBData(method);
+ case MAXWELL3D_REG_INDEX(cb_bind[0]):
+ return ProcessCBBind(0);
+ case MAXWELL3D_REG_INDEX(cb_bind[1]):
+ return ProcessCBBind(1);
+ case MAXWELL3D_REG_INDEX(cb_bind[2]):
+ return ProcessCBBind(2);
+ case MAXWELL3D_REG_INDEX(cb_bind[3]):
+ return ProcessCBBind(3);
+ case MAXWELL3D_REG_INDEX(cb_bind[4]):
+ return ProcessCBBind(4);
+ case MAXWELL3D_REG_INDEX(draw.vertex_end_gl):
+ return DrawArrays();
+ case MAXWELL3D_REG_INDEX(clear_buffers):
+ return ProcessClearBuffers();
+ case MAXWELL3D_REG_INDEX(query.query_get):
+ return ProcessQueryGet();
+ case MAXWELL3D_REG_INDEX(condition.mode):
+ return ProcessQueryCondition();
+ case MAXWELL3D_REG_INDEX(counter_reset):
+ return ProcessCounterReset();
+ case MAXWELL3D_REG_INDEX(sync_info):
+ return ProcessSyncPoint();
+ case MAXWELL3D_REG_INDEX(exec_upload):
+ return upload_state.ProcessExec(regs.exec_upload.linear != 0);
+ case MAXWELL3D_REG_INDEX(data_upload):
+ upload_state.ProcessData(argument, is_last_call);
+ if (is_last_call) {
+ OnMemoryWrite();
+ }
+ return;
+ case MAXWELL3D_REG_INDEX(fragment_barrier):
+ return rasterizer->FragmentBarrier();
+ case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
+ return rasterizer->TiledCacheBarrier();
+ }
+}
+
void Maxwell3D::CallMacroMethod(u32 method, const std::vector<u32>& parameters) {
// Reset the current macro.
executing_macro = 0;
@@ -157,142 +266,16 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
// Methods after 0xE00 are special, they're actually triggers for some microcode that was
// uploaded to the GPU during initialization.
if (method >= MacroRegistersStart) {
- // We're trying to execute a macro
- if (executing_macro == 0) {
- // A macro call must begin by writing the macro method's register, not its argument.
- ASSERT_MSG((method % 2) == 0,
- "Can't start macro execution by writing to the ARGS register");
- executing_macro = method;
- }
-
- macro_params.push_back(method_argument);
-
- // Call the macro when there are no more parameters in the command buffer
- if (is_last_call) {
- CallMacroMethod(executing_macro, macro_params);
- macro_params.clear();
- }
+ ProcessMacro(method, &method_argument, 1, is_last_call);
return;
}
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid Maxwell3D register, increase the size of the Regs structure");
- u32 arg = method_argument;
- // Keep track of the register value in shadow_state when requested.
- if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Track ||
- shadow_state.shadow_ram_control == Regs::ShadowRamControl::TrackWithFilter) {
- shadow_state.reg_array[method] = arg;
- } else if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Replay) {
- arg = shadow_state.reg_array[method];
- }
-
- if (regs.reg_array[method] != arg) {
- regs.reg_array[method] = arg;
-
- for (const auto& table : dirty.tables) {
- dirty.flags[table[method]] = true;
- }
- }
-
- switch (method) {
- case MAXWELL3D_REG_INDEX(wait_for_idle): {
- rasterizer->WaitForIdle();
- break;
- }
- case MAXWELL3D_REG_INDEX(shadow_ram_control): {
- shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(method_argument);
- break;
- }
- case MAXWELL3D_REG_INDEX(macros.data): {
- macro_engine->AddCode(regs.macros.upload_address, arg);
- break;
- }
- case MAXWELL3D_REG_INDEX(macros.bind): {
- ProcessMacroBind(arg);
- break;
- }
- case MAXWELL3D_REG_INDEX(firmware[4]): {
- ProcessFirmwareCall4();
- break;
- }
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[3]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[4]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[5]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[6]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[7]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[8]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[9]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[10]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[11]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): {
- StartCBData(method);
- break;
- }
- case MAXWELL3D_REG_INDEX(cb_bind[0]): {
- ProcessCBBind(0);
- break;
- }
- case MAXWELL3D_REG_INDEX(cb_bind[1]): {
- ProcessCBBind(1);
- break;
- }
- case MAXWELL3D_REG_INDEX(cb_bind[2]): {
- ProcessCBBind(2);
- break;
- }
- case MAXWELL3D_REG_INDEX(cb_bind[3]): {
- ProcessCBBind(3);
- break;
- }
- case MAXWELL3D_REG_INDEX(cb_bind[4]): {
- ProcessCBBind(4);
- break;
- }
- case MAXWELL3D_REG_INDEX(draw.vertex_end_gl): {
- DrawArrays();
- break;
- }
- case MAXWELL3D_REG_INDEX(clear_buffers): {
- ProcessClearBuffers();
- break;
- }
- case MAXWELL3D_REG_INDEX(query.query_get): {
- ProcessQueryGet();
- break;
- }
- case MAXWELL3D_REG_INDEX(condition.mode): {
- ProcessQueryCondition();
- break;
- }
- case MAXWELL3D_REG_INDEX(counter_reset): {
- ProcessCounterReset();
- break;
- }
- case MAXWELL3D_REG_INDEX(sync_info): {
- ProcessSyncPoint();
- break;
- }
- case MAXWELL3D_REG_INDEX(exec_upload): {
- upload_state.ProcessExec(regs.exec_upload.linear != 0);
- break;
- }
- case MAXWELL3D_REG_INDEX(data_upload): {
- upload_state.ProcessData(arg, is_last_call);
- if (is_last_call) {
- OnMemoryWrite();
- }
- break;
- }
- default:
- break;
- }
+ const u32 argument = ProcessShadowRam(method, method_argument);
+ ProcessDirtyRegisters(method, argument);
+ ProcessMethodCall(method, argument, method_argument, is_last_call);
}
void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
@@ -300,23 +283,7 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
// Methods after 0xE00 are special, they're actually triggers for some microcode that was
// uploaded to the GPU during initialization.
if (method >= MacroRegistersStart) {
- // We're trying to execute a macro
- if (executing_macro == 0) {
- // A macro call must begin by writing the macro method's register, not its argument.
- ASSERT_MSG((method % 2) == 0,
- "Can't start macro execution by writing to the ARGS register");
- executing_macro = method;
- }
-
- for (std::size_t i = 0; i < amount; i++) {
- macro_params.push_back(base_start[i]);
- }
-
- // Call the macro when there are no more parameters in the command buffer
- if (amount == methods_pending) {
- CallMacroMethod(executing_macro, macro_params);
- macro_params.clear();
- }
+ ProcessMacro(method, base_start, amount, amount == methods_pending);
return;
}
switch (method) {
@@ -335,15 +302,14 @@ void Maxwell3D::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[12]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
- case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): {
+ case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]):
ProcessCBMultiData(method, base_start, amount);
break;
- }
- default: {
+ default:
for (std::size_t i = 0; i < amount; i++) {
CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
}
- }
+ break;
}
}
@@ -396,7 +362,7 @@ void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) {
}
void Maxwell3D::FlushMMEInlineDraw() {
- LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
+ LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
regs.vertex_buffer.count);
ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
ASSERT(mme_draw.instance_count == mme_draw.gl_end_count);
@@ -541,8 +507,7 @@ void Maxwell3D::ProcessCounterReset() {
rasterizer->ResetCounter(QueryType::SamplesPassed);
break;
default:
- LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}",
- static_cast<int>(regs.counter_reset));
+ LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.counter_reset);
break;
}
}
@@ -557,7 +522,7 @@ void Maxwell3D::ProcessSyncPoint() {
}
void Maxwell3D::DrawArrays() {
- LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
+ LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(),
regs.vertex_buffer.count);
ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
@@ -595,12 +560,12 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
return 0;
case Regs::QuerySelect::SamplesPassed:
// Deferred.
- rasterizer->Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed,
+ rasterizer->Query(regs.query.QueryAddress(), QueryType::SamplesPassed,
system.GPU().GetTicks());
return std::nullopt;
default:
LOG_DEBUG(HW_GPU, "Unimplemented query select type {}",
- static_cast<u32>(regs.query.query_get.select.Value()));
+ regs.query.query_get.select.Value());
return 1;
}
}
@@ -677,7 +642,7 @@ void Maxwell3D::FinishCBData() {
}
Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
- const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)};
+ const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)};
Texture::TICEntry tic_entry;
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
@@ -686,43 +651,19 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
}
Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
- const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)};
+ const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)};
Texture::TSCEntry tsc_entry;
memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
return tsc_entry;
}
-Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_handle) const {
- return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
-}
-
-Texture::FullTextureInfo Maxwell3D::GetStageTexture(ShaderType stage, std::size_t offset) const {
- const auto stage_index = static_cast<std::size_t>(stage);
- const auto& shader = state.shader_stages[stage_index];
- const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
- ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
-
- const GPUVAddr tex_info_address =
- tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
-
- ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
-
- const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
-
- return GetTextureInfo(tex_handle);
-}
-
u32 Maxwell3D::GetRegisterValue(u32 method) const {
ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
return regs.reg_array[method];
}
void Maxwell3D::ProcessClearBuffers() {
- ASSERT(regs.clear_buffers.R == regs.clear_buffers.G &&
- regs.clear_buffers.R == regs.clear_buffers.B &&
- regs.clear_buffers.R == regs.clear_buffers.A);
-
rasterizer->Clear();
}
@@ -730,9 +671,7 @@ u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offse
ASSERT(stage != ShaderType::Compute);
const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
const auto& buffer = shader_stage.const_buffers[const_buffer];
- u32 result;
- std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32));
- return result;
+ return memory_manager.Read<u32>(buffer.address + offset);
}
SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
@@ -750,9 +689,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
const Texture::TextureHandle tex_handle{handle};
- const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
- SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
- result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
+ const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
+ const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
+
+ SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
+ result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
return result;
}
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index bc289c55d..bf9e07c9b 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -438,16 +438,6 @@ public:
DecrWrapOGL = 0x8508,
};
- enum class MemoryLayout : u32 {
- Linear = 0,
- BlockLinear = 1,
- };
-
- enum class InvMemoryLayout : u32 {
- BlockLinear = 0,
- Linear = 1,
- };
-
enum class CounterReset : u32 {
SampleCnt = 0x01,
Unk02 = 0x02,
@@ -589,21 +579,31 @@ public:
NegativeW = 7,
};
+ enum class SamplerIndex : u32 {
+ Independently = 0,
+ ViaHeaderIndex = 1,
+ };
+
+ struct TileMode {
+ union {
+ BitField<0, 4, u32> block_width;
+ BitField<4, 4, u32> block_height;
+ BitField<8, 4, u32> block_depth;
+ BitField<12, 1, u32> is_pitch_linear;
+ BitField<16, 1, u32> is_3d;
+ };
+ };
+ static_assert(sizeof(TileMode) == 4);
+
struct RenderTargetConfig {
u32 address_high;
u32 address_low;
u32 width;
u32 height;
Tegra::RenderTargetFormat format;
+ TileMode tile_mode;
union {
- BitField<0, 3, u32> block_width;
- BitField<4, 3, u32> block_height;
- BitField<8, 3, u32> block_depth;
- BitField<12, 1, InvMemoryLayout> type;
- BitField<16, 1, u32> is_3d;
- } memory_layout;
- union {
- BitField<0, 16, u32> layers;
+ BitField<0, 16, u32> depth;
BitField<16, 1, u32> volume;
};
u32 layer_stride;
@@ -755,7 +755,11 @@ public:
u32 data_upload;
- INSERT_UNION_PADDING_WORDS(0x44);
+ INSERT_UNION_PADDING_WORDS(0x16);
+
+ u32 force_early_fragment_tests;
+
+ INSERT_UNION_PADDING_WORDS(0x2D);
struct {
union {
@@ -828,7 +832,11 @@ public:
u32 patch_vertices;
- INSERT_UNION_PADDING_WORDS(0xC);
+ INSERT_UNION_PADDING_WORDS(0x4);
+
+ u32 fragment_barrier;
+
+ INSERT_UNION_PADDING_WORDS(0x7);
std::array<ScissorTest, NumViewports> scissor_test;
@@ -838,7 +846,15 @@ public:
u32 stencil_back_mask;
u32 stencil_back_func_mask;
- INSERT_UNION_PADDING_WORDS(0xC);
+ INSERT_UNION_PADDING_WORDS(0x5);
+
+ u32 invalidate_texture_data_cache;
+
+ INSERT_UNION_PADDING_WORDS(0x1);
+
+ u32 tiled_cache_barrier;
+
+ INSERT_UNION_PADDING_WORDS(0x4);
u32 color_mask_common;
@@ -862,12 +878,7 @@ public:
u32 address_high;
u32 address_low;
Tegra::DepthFormat format;
- union {
- BitField<0, 4, u32> block_width;
- BitField<4, 4, u32> block_height;
- BitField<8, 4, u32> block_depth;
- BitField<20, 1, InvMemoryLayout> type;
- } memory_layout;
+ TileMode tile_mode;
u32 layer_stride;
GPUVAddr Address() const {
@@ -876,7 +887,18 @@ public:
}
} zeta;
- INSERT_UNION_PADDING_WORDS(0x41);
+ struct {
+ union {
+ BitField<0, 16, u32> x;
+ BitField<16, 16, u32> width;
+ };
+ union {
+ BitField<0, 16, u32> y;
+ BitField<16, 16, u32> height;
+ };
+ } render_area;
+
+ INSERT_UNION_PADDING_WORDS(0x3F);
union {
BitField<0, 4, u32> stencil;
@@ -917,7 +939,7 @@ public:
BitField<25, 3, u32> map_7;
};
- u32 GetMap(std::size_t index) const {
+ u32 Map(std::size_t index) const {
const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3,
map_4, map_5, map_6, map_7};
ASSERT(index < maps.size());
@@ -930,11 +952,13 @@ public:
u32 zeta_width;
u32 zeta_height;
union {
- BitField<0, 16, u32> zeta_layers;
+ BitField<0, 16, u32> zeta_depth;
BitField<16, 1, u32> zeta_volume;
};
- INSERT_UNION_PADDING_WORDS(0x26);
+ SamplerIndex sampler_index;
+
+ INSERT_UNION_PADDING_WORDS(0x25);
u32 depth_test_enable;
@@ -960,6 +984,7 @@ public:
float b;
float a;
} blend_color;
+
INSERT_UNION_PADDING_WORDS(0x4);
struct {
@@ -997,7 +1022,12 @@ public:
float line_width_smooth;
float line_width_aliased;
- INSERT_UNION_PADDING_WORDS(0x1F);
+ INSERT_UNION_PADDING_WORDS(0x1B);
+
+ u32 invalidate_sampler_cache_no_wfi;
+ u32 invalidate_texture_header_cache_no_wfi;
+
+ INSERT_UNION_PADDING_WORDS(0x2);
u32 vb_element_base;
u32 vb_base_instance;
@@ -1041,13 +1071,13 @@ public:
} condition;
struct {
- u32 tsc_address_high;
- u32 tsc_address_low;
- u32 tsc_limit;
+ u32 address_high;
+ u32 address_low;
+ u32 limit;
- GPUVAddr TSCAddress() const {
- return static_cast<GPUVAddr>(
- (static_cast<GPUVAddr>(tsc_address_high) << 32) | tsc_address_low);
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+ address_low);
}
} tsc;
@@ -1058,13 +1088,13 @@ public:
u32 line_smooth_enable;
struct {
- u32 tic_address_high;
- u32 tic_address_low;
- u32 tic_limit;
+ u32 address_high;
+ u32 address_low;
+ u32 limit;
- GPUVAddr TICAddress() const {
- return static_cast<GPUVAddr>(
- (static_cast<GPUVAddr>(tic_address_high) << 32) | tic_address_low);
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+ address_low);
}
} tic;
@@ -1393,12 +1423,6 @@ public:
void FlushMMEInlineDraw();
- /// Given a texture handle, returns the TSC and TIC entries.
- Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
-
- /// Returns the texture information for a specific texture in a specific shader stage.
- Texture::FullTextureInfo GetStageTexture(ShaderType stage, std::size_t offset) const;
-
u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
@@ -1461,38 +1485,13 @@ public:
private:
void InitializeRegisterDefaults();
- Core::System& system;
- MemoryManager& memory_manager;
-
- VideoCore::RasterizerInterface* rasterizer = nullptr;
-
- /// Start offsets of each macro in macro_memory
- std::array<u32, 0x80> macro_positions = {};
-
- std::array<bool, Regs::NUM_REGS> mme_inline{};
-
- /// Macro method that is currently being executed / being fed parameters.
- u32 executing_macro = 0;
- /// Parameters that have been submitted to the macro call so far.
- std::vector<u32> macro_params;
-
- /// Interpreter for the macro codes uploaded to the GPU.
- std::unique_ptr<MacroEngine> macro_engine;
-
- static constexpr u32 null_cb_data = 0xFFFFFFFF;
- struct {
- std::array<std::array<u32, 0x4000>, 16> buffer;
- u32 current{null_cb_data};
- u32 id{null_cb_data};
- u32 start_pos{};
- u32 counter{};
- } cb_data_state;
+ void ProcessMacro(u32 method, const u32* base_start, u32 amount, bool is_last_call);
- Upload::State upload_state;
+ u32 ProcessShadowRam(u32 method, u32 argument);
- bool execute_on{true};
+ void ProcessDirtyRegisters(u32 method, u32 argument);
- std::array<u8, Regs::NUM_REGS> dirty_pointers{};
+ void ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call);
/// Retrieves information about a specific TIC entry from the TIC buffer.
Texture::TICEntry GetTICEntry(u32 tic_index) const;
@@ -1502,8 +1501,8 @@ private:
/**
* Call a macro on this engine.
+ *
* @param method Method to call
- * @param num_parameters Number of arguments
* @param parameters Arguments to the method call
*/
void CallMacroMethod(u32 method, const std::vector<u32>& parameters);
@@ -1552,6 +1551,38 @@ private:
/// Returns a query's value or an empty object if the value will be deferred through a cache.
std::optional<u64> GetQueryResult();
+
+ Core::System& system;
+ MemoryManager& memory_manager;
+
+ VideoCore::RasterizerInterface* rasterizer = nullptr;
+
+ /// Start offsets of each macro in macro_memory
+ std::array<u32, 0x80> macro_positions{};
+
+ std::array<bool, Regs::NUM_REGS> mme_inline{};
+
+ /// Macro method that is currently being executed / being fed parameters.
+ u32 executing_macro = 0;
+ /// Parameters that have been submitted to the macro call so far.
+ std::vector<u32> macro_params;
+
+ /// Interpreter for the macro codes uploaded to the GPU.
+ std::unique_ptr<MacroEngine> macro_engine;
+
+ static constexpr u32 null_cb_data = 0xFFFFFFFF;
+ struct CBDataState {
+ std::array<std::array<u32, 0x4000>, 16> buffer;
+ u32 current{null_cb_data};
+ u32 id{null_cb_data};
+ u32 start_pos{};
+ u32 counter{};
+ };
+ CBDataState cb_data_state;
+
+ Upload::State upload_state;
+
+ bool execute_on{true};
};
#define ASSERT_REG_POSITION(field_name, position) \
@@ -1564,6 +1595,7 @@ ASSERT_REG_POSITION(shadow_ram_control, 0x49);
ASSERT_REG_POSITION(upload, 0x60);
ASSERT_REG_POSITION(exec_upload, 0x6C);
ASSERT_REG_POSITION(data_upload, 0x6D);
+ASSERT_REG_POSITION(force_early_fragment_tests, 0x84);
ASSERT_REG_POSITION(sync_info, 0xB2);
ASSERT_REG_POSITION(tess_mode, 0xC8);
ASSERT_REG_POSITION(tess_level_outer, 0xC9);
@@ -1586,10 +1618,13 @@ ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);
ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);
ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
ASSERT_REG_POSITION(patch_vertices, 0x373);
+ASSERT_REG_POSITION(fragment_barrier, 0x378);
ASSERT_REG_POSITION(scissor_test, 0x380);
ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
+ASSERT_REG_POSITION(invalidate_texture_data_cache, 0x3DD);
+ASSERT_REG_POSITION(tiled_cache_barrier, 0x3DF);
ASSERT_REG_POSITION(color_mask_common, 0x3E4);
ASSERT_REG_POSITION(depth_bounds, 0x3E7);
ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
@@ -1597,6 +1632,7 @@ ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED);
ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE);
ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF);
ASSERT_REG_POSITION(zeta, 0x3F8);
+ASSERT_REG_POSITION(render_area, 0x3FD);
ASSERT_REG_POSITION(clear_flags, 0x43E);
ASSERT_REG_POSITION(fill_rectangle, 0x44F);
ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
@@ -1605,7 +1641,8 @@ ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E);
ASSERT_REG_POSITION(rt_control, 0x487);
ASSERT_REG_POSITION(zeta_width, 0x48a);
ASSERT_REG_POSITION(zeta_height, 0x48b);
-ASSERT_REG_POSITION(zeta_layers, 0x48c);
+ASSERT_REG_POSITION(zeta_depth, 0x48c);
+ASSERT_REG_POSITION(sampler_index, 0x48D);
ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
@@ -1629,6 +1666,8 @@ ASSERT_REG_POSITION(frag_color_clamp, 0x4EA);
ASSERT_REG_POSITION(screen_y_control, 0x4EB);
ASSERT_REG_POSITION(line_width_smooth, 0x4EC);
ASSERT_REG_POSITION(line_width_aliased, 0x4ED);
+ASSERT_REG_POSITION(invalidate_sampler_cache_no_wfi, 0x509);
+ASSERT_REG_POSITION(invalidate_texture_header_cache_no_wfi, 0x50A);
ASSERT_REG_POSITION(vb_element_base, 0x50D);
ASSERT_REG_POSITION(vb_base_instance, 0x50E);
ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index e88290754..ba750748c 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -16,8 +16,10 @@ namespace Tegra::Engines {
using namespace Texture;
-MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager)
- : system{system}, memory_manager{memory_manager} {}
+MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_)
+ : system{system_}, memory_manager{memory_manager_} {}
+
+MaxwellDMA::~MaxwellDMA() = default;
void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register");
@@ -94,6 +96,7 @@ void MaxwellDMA::CopyPitchToPitch() {
}
void MaxwellDMA::CopyBlockLinearToPitch() {
+ UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0);
UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0);
UNIMPLEMENTED_IF(regs.src_params.layer != 0);
@@ -114,8 +117,6 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
const u32 block_depth = src_params.block_size.depth;
const size_t src_size =
CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
- const size_t src_layer_size =
- CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth);
if (read_buffer.size() < src_size) {
read_buffer.resize(src_size);
@@ -135,6 +136,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
}
void MaxwellDMA::CopyPitchToBlockLinear() {
+ UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one");
+
const auto& dst_params = regs.dst_params;
const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in;
const u32 width = dst_params.width;
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 50f445efc..3c59eeb13 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -72,11 +72,13 @@ public:
struct RenderEnable {
enum class Mode : u32 {
- FALSE = 0,
- TRUE = 1,
- CONDITIONAL = 2,
- RENDER_IF_EQUAL = 3,
- RENDER_IF_NOT_EQUAL = 4,
+ // Note: This uses Pascal case in order to avoid the identifiers
+ // FALSE and TRUE, which are reserved on Darwin.
+ False = 0,
+ True = 1,
+ Conditional = 2,
+ RenderIfEqual = 3,
+ RenderIfNotEqual = 4,
};
PackedGPUVAddr address;
@@ -185,8 +187,8 @@ public:
};
static_assert(sizeof(RemapConst) == 12);
- explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager);
- ~MaxwellDMA() = default;
+ explicit MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_);
+ ~MaxwellDMA();
/// Write the value to the register identified by method.
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index d374b73cf..8b45f1b62 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -32,31 +32,31 @@ struct Register {
constexpr Register() = default;
- constexpr Register(u64 value) : value(value) {}
+ constexpr Register(u64 value_) : value(value_) {}
- constexpr operator u64() const {
+ [[nodiscard]] constexpr operator u64() const {
return value;
}
template <typename T>
- constexpr u64 operator-(const T& oth) const {
+ [[nodiscard]] constexpr u64 operator-(const T& oth) const {
return value - oth;
}
template <typename T>
- constexpr u64 operator&(const T& oth) const {
+ [[nodiscard]] constexpr u64 operator&(const T& oth) const {
return value & oth;
}
- constexpr u64 operator&(const Register& oth) const {
+ [[nodiscard]] constexpr u64 operator&(const Register& oth) const {
return value & oth.value;
}
- constexpr u64 operator~() const {
+ [[nodiscard]] constexpr u64 operator~() const {
return ~value;
}
- u64 GetSwizzledIndex(u64 elem) const {
+ [[nodiscard]] u64 GetSwizzledIndex(u64 elem) const {
elem = (value + elem) & 3;
return (value & ~3) + elem;
}
@@ -75,7 +75,7 @@ enum class AttributeSize : u64 {
union Attribute {
Attribute() = default;
- constexpr explicit Attribute(u64 value) : value(value) {}
+ constexpr explicit Attribute(u64 value_) : value(value_) {}
enum class Index : u64 {
LayerViewportPointSize = 6,
@@ -107,7 +107,7 @@ union Attribute {
BitField<31, 1, u64> patch;
BitField<47, 3, AttributeSize> size;
- bool IsPhysical() const {
+ [[nodiscard]] bool IsPhysical() const {
return patch == 0 && element == 0 && static_cast<u64>(index.Value()) == 0;
}
} fmt20;
@@ -124,7 +124,7 @@ union Attribute {
union Sampler {
Sampler() = default;
- constexpr explicit Sampler(u64 value) : value(value) {}
+ constexpr explicit Sampler(u64 value_) : value(value_) {}
enum class Index : u64 {
Sampler_0 = 8,
@@ -137,7 +137,7 @@ union Sampler {
union Image {
Image() = default;
- constexpr explicit Image(u64 value) : value{value} {}
+ constexpr explicit Image(u64 value_) : value{value_} {}
BitField<36, 13, u64> index;
u64 value;
@@ -505,14 +505,14 @@ struct IpaMode {
IpaInterpMode interpolation_mode;
IpaSampleMode sampling_mode;
- bool operator==(const IpaMode& a) const {
+ [[nodiscard]] bool operator==(const IpaMode& a) const {
return std::tie(interpolation_mode, sampling_mode) ==
std::tie(a.interpolation_mode, a.sampling_mode);
}
- bool operator!=(const IpaMode& a) const {
+ [[nodiscard]] bool operator!=(const IpaMode& a) const {
return !operator==(a);
}
- bool operator<(const IpaMode& a) const {
+ [[nodiscard]] bool operator<(const IpaMode& a) const {
return std::tie(interpolation_mode, sampling_mode) <
std::tie(a.interpolation_mode, a.sampling_mode);
}
@@ -658,10 +658,10 @@ union Instruction {
return *this;
}
- constexpr Instruction(u64 value) : value{value} {}
+ constexpr Instruction(u64 value_) : value{value_} {}
constexpr Instruction(const Instruction& instr) : value(instr.value) {}
- constexpr bool Bit(u64 offset) const {
+ [[nodiscard]] constexpr bool Bit(u64 offset) const {
return ((value >> offset) & 1) != 0;
}
@@ -746,34 +746,34 @@ union Instruction {
BitField<28, 8, u64> imm_lut28;
BitField<48, 8, u64> imm_lut48;
- u32 GetImmLut28() const {
+ [[nodiscard]] u32 GetImmLut28() const {
return static_cast<u32>(imm_lut28);
}
- u32 GetImmLut48() const {
+ [[nodiscard]] u32 GetImmLut48() const {
return static_cast<u32>(imm_lut48);
}
} lop3;
- u16 GetImm20_16() const {
+ [[nodiscard]] u16 GetImm20_16() const {
return static_cast<u16>(imm20_16);
}
- u32 GetImm20_19() const {
+ [[nodiscard]] u32 GetImm20_19() const {
u32 imm{static_cast<u32>(imm20_19)};
imm <<= 12;
imm |= negate_imm ? 0x80000000 : 0;
return imm;
}
- u32 GetImm20_32() const {
+ [[nodiscard]] u32 GetImm20_32() const {
return static_cast<u32>(imm20_32);
}
- s32 GetSignedImm20_20() const {
- u32 immediate = static_cast<u32>(imm20_19 | (negate_imm << 19));
+ [[nodiscard]] s32 GetSignedImm20_20() const {
+ const auto immediate = static_cast<u32>(imm20_19 | (negate_imm << 19));
// Sign extend the 20-bit value.
- u32 mask = 1U << (20 - 1);
+ const auto mask = 1U << (20 - 1);
return static_cast<s32>((immediate ^ mask) - mask);
}
} alu;
@@ -857,7 +857,7 @@ union Instruction {
BitField<56, 1, u64> second_negate;
BitField<30, 9, u64> second;
- u32 PackImmediates() const {
+ [[nodiscard]] u32 PackImmediates() const {
// Immediates are half floats shifted.
constexpr u32 imm_shift = 6;
return static_cast<u32>((first << imm_shift) | (second << (16 + imm_shift)));
@@ -1033,7 +1033,7 @@ union Instruction {
BitField<28, 2, AtomicType> type;
BitField<30, 22, s64> offset;
- s32 GetImmediateOffset() const {
+ [[nodiscard]] s32 GetImmediateOffset() const {
return static_cast<s32>(offset << 2);
}
} atoms;
@@ -1215,7 +1215,7 @@ union Instruction {
BitField<39, 4, u64> rounding;
// H0, H1 extract for F16 missing
BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value
- F2fRoundingOp GetRoundingMode() const {
+ [[nodiscard]] F2fRoundingOp GetRoundingMode() const {
constexpr u64 rounding_mask = 0x0B;
return static_cast<F2fRoundingOp>(rounding.Value() & rounding_mask);
}
@@ -1239,15 +1239,15 @@ union Instruction {
BitField<54, 1, u64> aoffi_flag;
BitField<55, 3, TextureProcessMode> process_mode;
- bool IsComponentEnabled(std::size_t component) const {
- return ((1ull << component) & component_mask) != 0;
+ [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
+ return ((1ULL << component) & component_mask) != 0;
}
- TextureProcessMode GetTextureProcessMode() const {
+ [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
return process_mode;
}
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::DC:
return dc_flag != 0;
@@ -1271,15 +1271,15 @@ union Instruction {
BitField<36, 1, u64> aoffi_flag;
BitField<37, 3, TextureProcessMode> process_mode;
- bool IsComponentEnabled(std::size_t component) const {
+ [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
return ((1ULL << component) & component_mask) != 0;
}
- TextureProcessMode GetTextureProcessMode() const {
+ [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
return process_mode;
}
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::DC:
return dc_flag != 0;
@@ -1299,7 +1299,7 @@ union Instruction {
BitField<31, 4, u64> component_mask;
BitField<49, 1, u64> nodep_flag;
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::NODEP:
return nodep_flag != 0;
@@ -1309,7 +1309,7 @@ union Instruction {
return false;
}
- bool IsComponentEnabled(std::size_t component) const {
+ [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
return ((1ULL << component) & component_mask) != 0;
}
} txq;
@@ -1321,11 +1321,11 @@ union Instruction {
BitField<35, 1, u64> ndv_flag;
BitField<49, 1, u64> nodep_flag;
- bool IsComponentEnabled(std::size_t component) const {
- return ((1ull << component) & component_mask) != 0;
+ [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
+ return ((1ULL << component) & component_mask) != 0;
}
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::NDV:
return (ndv_flag != 0);
@@ -1347,7 +1347,7 @@ union Instruction {
BitField<54, 2, u64> offset_mode;
BitField<56, 2, u64> component;
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::NDV:
return ndv_flag != 0;
@@ -1373,7 +1373,7 @@ union Instruction {
BitField<33, 2, u64> offset_mode;
BitField<37, 2, u64> component;
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::NDV:
return ndv_flag != 0;
@@ -1399,7 +1399,7 @@ union Instruction {
BitField<52, 2, u64> component;
BitField<55, 1, u64> fp16_flag;
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::DC:
return dc_flag != 0;
@@ -1422,24 +1422,27 @@ union Instruction {
BitField<53, 4, u64> texture_info;
BitField<59, 1, u64> fp32_flag;
- TextureType GetTextureType() const {
+ [[nodiscard]] TextureType GetTextureType() const {
// The TEXS instruction has a weird encoding for the texture type.
- if (texture_info == 0)
+ if (texture_info == 0) {
return TextureType::Texture1D;
- if (texture_info >= 1 && texture_info <= 9)
+ }
+ if (texture_info >= 1 && texture_info <= 9) {
return TextureType::Texture2D;
- if (texture_info >= 10 && texture_info <= 11)
+ }
+ if (texture_info >= 10 && texture_info <= 11) {
return TextureType::Texture3D;
- if (texture_info >= 12 && texture_info <= 13)
+ }
+ if (texture_info >= 12 && texture_info <= 13) {
return TextureType::TextureCube;
+ }
- LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}",
- static_cast<u32>(texture_info.Value()));
+ LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value());
UNREACHABLE();
return TextureType::Texture1D;
}
- TextureProcessMode GetTextureProcessMode() const {
+ [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
switch (texture_info) {
case 0:
case 2:
@@ -1458,7 +1461,7 @@ union Instruction {
return TextureProcessMode::None;
}
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::DC:
return (texture_info >= 4 && texture_info <= 6) || texture_info == 9;
@@ -1470,16 +1473,16 @@ union Instruction {
return false;
}
- bool IsArrayTexture() const {
+ [[nodiscard]] bool IsArrayTexture() const {
// TEXS only supports Texture2D arrays.
return texture_info >= 7 && texture_info <= 9;
}
- bool HasTwoDestinations() const {
+ [[nodiscard]] bool HasTwoDestinations() const {
return gpr28.Value() != Register::ZeroIndex;
}
- bool IsComponentEnabled(std::size_t component) const {
+ [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
static constexpr std::array<std::array<u32, 8>, 4> mask_lut{{
{},
{0x1, 0x2, 0x4, 0x8, 0x3, 0x9, 0xa, 0xc},
@@ -1506,7 +1509,7 @@ union Instruction {
BitField<54, 1, u64> cl;
BitField<55, 1, u64> process_mode;
- TextureProcessMode GetTextureProcessMode() const {
+ [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
return process_mode == 0 ? TextureProcessMode::LZ : TextureProcessMode::LL;
}
} tld;
@@ -1516,7 +1519,7 @@ union Instruction {
BitField<53, 4, u64> texture_info;
BitField<59, 1, u64> fp32_flag;
- TextureType GetTextureType() const {
+ [[nodiscard]] TextureType GetTextureType() const {
// The TLDS instruction has a weird encoding for the texture type.
if (texture_info <= 1) {
return TextureType::Texture1D;
@@ -1529,19 +1532,19 @@ union Instruction {
return TextureType::Texture3D;
}
- LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}",
- static_cast<u32>(texture_info.Value()));
+ LOG_CRITICAL(HW_GPU, "Unhandled texture_info: {}", texture_info.Value());
UNREACHABLE();
return TextureType::Texture1D;
}
- TextureProcessMode GetTextureProcessMode() const {
- if (texture_info == 1 || texture_info == 5 || texture_info == 12)
+ [[nodiscard]] TextureProcessMode GetTextureProcessMode() const {
+ if (texture_info == 1 || texture_info == 5 || texture_info == 12) {
return TextureProcessMode::LL;
+ }
return TextureProcessMode::LZ;
}
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::AOFFI:
return texture_info == 12 || texture_info == 4;
@@ -1555,7 +1558,7 @@ union Instruction {
return false;
}
- bool IsArrayTexture() const {
+ [[nodiscard]] bool IsArrayTexture() const {
// TEXS only supports Texture2D arrays.
return texture_info == 8;
}
@@ -1567,7 +1570,7 @@ union Instruction {
BitField<35, 1, u64> aoffi_flag;
BitField<49, 1, u64> nodep_flag;
- bool UsesMiscMode(TextureMiscMode mode) const {
+ [[nodiscard]] bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::AOFFI:
return aoffi_flag != 0;
@@ -1591,7 +1594,7 @@ union Instruction {
BitField<20, 3, StoreType> store_data_layout;
BitField<20, 4, u64> component_mask_selector;
- bool IsComponentEnabled(std::size_t component) const {
+ [[nodiscard]] bool IsComponentEnabled(std::size_t component) const {
ASSERT(mode == SurfaceDataMode::P);
constexpr u8 R = 0b0001;
constexpr u8 G = 0b0010;
@@ -1604,7 +1607,7 @@ union Instruction {
return std::bitset<4>{mask.at(component_mask_selector)}.test(component);
}
- StoreType GetStoreDataLayout() const {
+ [[nodiscard]] StoreType GetStoreDataLayout() const {
ASSERT(mode == SurfaceDataMode::D_BA);
return store_data_layout;
}
@@ -1622,14 +1625,15 @@ union Instruction {
BitField<20, 24, u64> target;
BitField<5, 1, u64> constant_buffer;
- s32 GetBranchTarget() const {
+ [[nodiscard]] s32 GetBranchTarget() const {
// Sign extend the branch target offset
- u32 mask = 1U << (24 - 1);
- u32 value = static_cast<u32>(target);
+ const auto mask = 1U << (24 - 1);
+ const auto target_value = static_cast<u32>(target);
+ constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction));
+
// The branch offset is relative to the next instruction and is stored in bytes, so
// divide it by the size of an instruction and add 1 to it.
- return static_cast<s32>((value ^ mask) - mask) / static_cast<s32>(sizeof(Instruction)) +
- 1;
+ return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1;
}
} bra;
@@ -1637,14 +1641,15 @@ union Instruction {
BitField<20, 24, u64> target;
BitField<5, 1, u64> constant_buffer;
- s32 GetBranchExtend() const {
+ [[nodiscard]] s32 GetBranchExtend() const {
// Sign extend the branch target offset
- u32 mask = 1U << (24 - 1);
- u32 value = static_cast<u32>(target);
+ const auto mask = 1U << (24 - 1);
+ const auto target_value = static_cast<u32>(target);
+ constexpr auto instruction_size = static_cast<s32>(sizeof(Instruction));
+
// The branch offset is relative to the next instruction and is stored in bytes, so
// divide it by the size of an instruction and add 1 to it.
- return static_cast<s32>((value ^ mask) - mask) / static_cast<s32>(sizeof(Instruction)) +
- 1;
+ return static_cast<s32>((target_value ^ mask) - mask) / instruction_size + 1;
}
} brx;
@@ -1697,7 +1702,7 @@ union Instruction {
BitField<50, 1, u64> is_op_b_register;
BitField<51, 3, VmnmxOperation> operation;
- VmnmxType SourceFormatA() const {
+ [[nodiscard]] VmnmxType SourceFormatA() const {
switch (src_format_a) {
case 0b11:
return VmnmxType::Bits32;
@@ -1708,7 +1713,7 @@ union Instruction {
}
}
- VmnmxType SourceFormatB() const {
+ [[nodiscard]] VmnmxType SourceFormatB() const {
switch (src_format_b) {
case 0b11:
return VmnmxType::Bits32;
@@ -1739,7 +1744,7 @@ union Instruction {
BitField<20, 14, u64> shifted_offset;
BitField<34, 5, u64> index;
- u64 GetOffset() const {
+ [[nodiscard]] u64 GetOffset() const {
return shifted_offset * 4;
}
} cbuf34;
@@ -1748,7 +1753,7 @@ union Instruction {
BitField<20, 16, s64> offset;
BitField<36, 5, u64> index;
- s64 GetOffset() const {
+ [[nodiscard]] s64 GetOffset() const {
return offset;
}
} cbuf36;
@@ -1893,6 +1898,7 @@ public:
ICMP_IMM,
FCMP_RR,
FCMP_RC,
+ FCMP_IMMR,
MUFU, // Multi-Function Operator
RRO_C, // Range Reduction Operator
RRO_R,
@@ -1996,29 +2002,29 @@ public:
/// Returns whether an opcode has an execution predicate field or not (ie, whether it can be
/// conditionally executed).
- static bool IsPredicatedInstruction(Id opcode) {
+ [[nodiscard]] static bool IsPredicatedInstruction(Id opcode) {
// TODO(Subv): Add the rest of unpredicated instructions.
return opcode != Id::SSY && opcode != Id::PBK;
}
class Matcher {
public:
- constexpr Matcher(const char* const name, u16 mask, u16 expected, Id id, Type type)
- : name{name}, mask{mask}, expected{expected}, id{id}, type{type} {}
+ constexpr Matcher(const char* const name_, u16 mask_, u16 expected_, Id id_, Type type_)
+ : name{name_}, mask{mask_}, expected{expected_}, id{id_}, type{type_} {}
- constexpr const char* GetName() const {
+ [[nodiscard]] constexpr const char* GetName() const {
return name;
}
- constexpr u16 GetMask() const {
+ [[nodiscard]] constexpr u16 GetMask() const {
return mask;
}
- constexpr Id GetId() const {
+ [[nodiscard]] constexpr Id GetId() const {
return id;
}
- constexpr Type GetType() const {
+ [[nodiscard]] constexpr Type GetType() const {
return type;
}
@@ -2027,7 +2033,7 @@ public:
* @param instruction The instruction to test
* @returns true if the given instruction matches.
*/
- constexpr bool Matches(u16 instruction) const {
+ [[nodiscard]] constexpr bool Matches(u16 instruction) const {
return (instruction & mask) == expected;
}
@@ -2039,7 +2045,8 @@ public:
Type type;
};
- static std::optional<std::reference_wrapper<const Matcher>> Decode(Instruction instr) {
+ using DecodeResult = std::optional<std::reference_wrapper<const Matcher>>;
+ [[nodiscard]] static DecodeResult Decode(Instruction instr) {
static const auto table{GetDecodeTable()};
const auto matches_instruction = [instr](const auto& matcher) {
@@ -2061,7 +2068,7 @@ private:
* A '0' in a bitstring indicates that a zero must be present at that bit position.
* A '1' in a bitstring indicates that a one must be present at that bit position.
*/
- static constexpr auto GetMaskAndExpect(const char* const bitstring) {
+ [[nodiscard]] static constexpr auto GetMaskAndExpect(const char* const bitstring) {
u16 mask = 0, expect = 0;
for (std::size_t i = 0; i < opcode_bitsize; i++) {
const std::size_t bit_position = opcode_bitsize - i - 1;
@@ -2083,14 +2090,14 @@ private:
public:
/// Creates a matcher that can match and parse instructions based on bitstring.
- static constexpr auto GetMatcher(const char* const bitstring, Id op, Type type,
- const char* const name) {
+ [[nodiscard]] static constexpr auto GetMatcher(const char* const bitstring, Id op,
+ Type type, const char* const name) {
const auto [mask, expected] = GetMaskAndExpect(bitstring);
return Matcher(name, mask, expected, op, type);
}
};
- static std::vector<Matcher> GetDecodeTable() {
+ [[nodiscard]] static std::vector<Matcher> GetDecodeTable() {
std::vector<Matcher> table = {
#define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name)
INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
@@ -2205,6 +2212,7 @@ private:
INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),
INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
+ INST("0011011-1010----", Id::FCMP_IMMR, Type::Arithmetic, "FCMP_IMMR"),
INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index 72e2a33d5..ceec05459 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -41,30 +41,30 @@ struct Header {
BitField<26, 1, u32> does_load_or_store;
BitField<27, 1, u32> does_fp64;
BitField<28, 4, u32> stream_out_mask;
- } common0{};
+ } common0;
union {
BitField<0, 24, u32> shader_local_memory_low_size;
BitField<24, 8, u32> per_patch_attribute_count;
- } common1{};
+ } common1;
union {
BitField<0, 24, u32> shader_local_memory_high_size;
BitField<24, 8, u32> threads_per_input_primitive;
- } common2{};
+ } common2;
union {
BitField<0, 24, u32> shader_local_memory_crs_size;
BitField<24, 4, OutputTopology> output_topology;
BitField<28, 4, u32> reserved;
- } common3{};
+ } common3;
union {
BitField<0, 12, u32> max_output_vertices;
BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
BitField<20, 4, u32> reserved;
BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
- } common4{};
+ } common4;
union {
struct {
@@ -145,7 +145,7 @@ struct Header {
}
} ps;
- std::array<u32, 0xF> raw{};
+ std::array<u32, 0xF> raw;
};
u64 GetLocalMemorySize() const {
@@ -153,7 +153,6 @@ struct Header {
(common2.shader_local_memory_high_size << 24));
}
};
-
static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
} // namespace Tegra::Shader