summaryrefslogtreecommitdiffstats
path: root/src/video_core/engines
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/engines')
-rw-r--r--src/video_core/engines/maxwell_3d.cpp122
-rw-r--r--src/video_core/engines/maxwell_3d.h56
-rw-r--r--src/video_core/engines/shader_bytecode.h4
3 files changed, 122 insertions, 60 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 7cea146f0..842cdcbcf 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -4,17 +4,21 @@
#include <cinttypes>
#include <cstring>
+#include <optional>
#include "common/assert.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
+#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/textures/texture.h"
namespace Tegra::Engines {
+using VideoCore::QueryType;
+
/// First register id that is actually a Macro call.
constexpr u32 MacroRegistersStart = 0xE00;
@@ -399,6 +403,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
ProcessQueryCondition();
break;
}
+ case MAXWELL3D_REG_INDEX(counter_reset): {
+ ProcessCounterReset();
+ break;
+ }
case MAXWELL3D_REG_INDEX(sync_info): {
ProcessSyncPoint();
break;
@@ -519,61 +527,51 @@ void Maxwell3D::ProcessFirmwareCall4() {
regs.reg_array[0xd00] = 1;
}
-void Maxwell3D::ProcessQueryGet() {
+void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
+ struct LongQueryResult {
+ u64_le value;
+ u64_le timestamp;
+ };
+ static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size");
const GPUVAddr sequence_address{regs.query.QueryAddress()};
- // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
- // VAddr before writing.
+ if (long_query) {
+ // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
+ // GPU, this command may actually take a while to complete in real hardware due to GPU
+ // wait queues.
+ LongQueryResult query_result{payload, system.GPU().GetTicks()};
+ memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
+ } else {
+ memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload));
+ }
+}
+void Maxwell3D::ProcessQueryGet() {
// TODO(Subv): Support the other query units.
ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
"Units other than CROP are unimplemented");
- u64 result = 0;
-
- // TODO(Subv): Support the other query variables
- switch (regs.query.query_get.select) {
- case Regs::QuerySelect::Zero:
- // This seems to actually write the query sequence to the query address.
- result = regs.query.query_sequence;
+ switch (regs.query.query_get.operation) {
+ case Regs::QueryOperation::Release:
+ StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
break;
- default:
- result = 1;
- UNIMPLEMENTED_MSG("Unimplemented query select type {}",
- static_cast<u32>(regs.query.query_get.select.Value()));
- }
-
- // TODO(Subv): Research and implement how query sync conditions work.
-
- struct LongQueryResult {
- u64_le value;
- u64_le timestamp;
- };
- static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size");
-
- switch (regs.query.query_get.mode) {
- case Regs::QueryMode::Write:
- case Regs::QueryMode::Write2: {
- u32 sequence = regs.query.query_sequence;
- if (regs.query.query_get.short_query) {
- // Write the current query sequence to the sequence address.
- // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
- // query.
- memory_manager.Write<u32>(sequence_address, sequence);
- } else {
- // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
- // GPU, this command may actually take a while to complete in real hardware due to GPU
- // wait queues.
- LongQueryResult query_result{};
- query_result.value = result;
- // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
- query_result.timestamp = system.CoreTiming().GetTicks();
- memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
+ case Regs::QueryOperation::Acquire:
+ // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
+ // matches the current payload.
+ UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
+ break;
+ case Regs::QueryOperation::Counter:
+ if (const std::optional<u64> result = GetQueryResult()) {
+ // If the query returns an empty optional it means it's cached and deferred.
+ // In this case we have a non-empty result, so we stamp it immediately.
+ StampQueryResult(*result, regs.query.query_get.short_query == 0);
}
break;
- }
+ case Regs::QueryOperation::Trap:
+ UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
+ break;
default:
- UNIMPLEMENTED_MSG("Query mode {} not implemented",
- static_cast<u32>(regs.query.query_get.mode.Value()));
+ UNIMPLEMENTED_MSG("Unknown query operation");
+ break;
}
}
@@ -590,20 +588,20 @@ void Maxwell3D::ProcessQueryCondition() {
}
case Regs::ConditionMode::ResNonZero: {
Regs::QueryCompare cmp;
- memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
+ memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U;
break;
}
case Regs::ConditionMode::Equal: {
Regs::QueryCompare cmp;
- memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
+ memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
execute_on =
cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode;
break;
}
case Regs::ConditionMode::NotEqual: {
Regs::QueryCompare cmp;
- memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
+ memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
execute_on =
cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode;
break;
@@ -616,6 +614,18 @@ void Maxwell3D::ProcessQueryCondition() {
}
}
+void Maxwell3D::ProcessCounterReset() {
+ switch (regs.counter_reset) {
+ case Regs::CounterReset::SampleCnt:
+ rasterizer.ResetCounter(QueryType::SamplesPassed);
+ break;
+ default:
+ LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}",
+ static_cast<int>(regs.counter_reset));
+ break;
+ }
+}
+
void Maxwell3D::ProcessSyncPoint() {
const u32 sync_point = regs.sync_info.sync_point.Value();
const u32 increment = regs.sync_info.increment.Value();
@@ -658,6 +668,22 @@ void Maxwell3D::DrawArrays() {
}
}
+std::optional<u64> Maxwell3D::GetQueryResult() {
+ switch (regs.query.query_get.select) {
+ case Regs::QuerySelect::Zero:
+ return 0;
+ case Regs::QuerySelect::SamplesPassed:
+ // Deferred.
+ rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed,
+ system.GPU().GetTicks());
+ return {};
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented query select type {}",
+ static_cast<u32>(regs.query.query_get.select.Value()));
+ return 1;
+ }
+}
+
void Maxwell3D::ProcessCBBind(std::size_t stage_index) {
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
auto& shader = state.shader_stages[stage_index];
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 7b1912a66..26939be3f 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -6,6 +6,7 @@
#include <array>
#include <bitset>
+#include <optional>
#include <type_traits>
#include <unordered_map>
#include <vector>
@@ -71,12 +72,11 @@ public:
static constexpr std::size_t MaxConstBuffers = 18;
static constexpr std::size_t MaxConstBufferSize = 0x10000;
- enum class QueryMode : u32 {
- Write = 0,
- Sync = 1,
- // TODO(Subv): It is currently unknown what the difference between method 2 and method 0
- // is.
- Write2 = 2,
+ enum class QueryOperation : u32 {
+ Release = 0,
+ Acquire = 1,
+ Counter = 2,
+ Trap = 3,
};
enum class QueryUnit : u32 {
@@ -410,6 +410,27 @@ public:
Linear = 1,
};
+ enum class CounterReset : u32 {
+ SampleCnt = 0x01,
+ Unk02 = 0x02,
+ Unk03 = 0x03,
+ Unk04 = 0x04,
+ EmittedPrimitives = 0x10, // Not tested
+ Unk11 = 0x11,
+ Unk12 = 0x12,
+ Unk13 = 0x13,
+ Unk15 = 0x15,
+ Unk16 = 0x16,
+ Unk17 = 0x17,
+ Unk18 = 0x18,
+ Unk1A = 0x1A,
+ Unk1B = 0x1B,
+ Unk1C = 0x1C,
+ Unk1D = 0x1D,
+ Unk1E = 0x1E,
+ GeneratedPrimitives = 0x1F,
+ };
+
struct Cull {
enum class FrontFace : u32 {
ClockWise = 0x0900,
@@ -858,7 +879,7 @@ public:
BitField<7, 1, u32> c7;
} clip_distance_enabled;
- INSERT_UNION_PADDING_WORDS(0x1);
+ u32 samplecnt_enable;
float point_size;
@@ -866,7 +887,11 @@ public:
u32 point_sprite_enable;
- INSERT_UNION_PADDING_WORDS(0x5);
+ INSERT_UNION_PADDING_WORDS(0x3);
+
+ CounterReset counter_reset;
+
+ INSERT_UNION_PADDING_WORDS(0x1);
u32 zeta_enable;
@@ -1081,7 +1106,7 @@ public:
u32 query_sequence;
union {
u32 raw;
- BitField<0, 2, QueryMode> mode;
+ BitField<0, 2, QueryOperation> operation;
BitField<4, 1, u32> fence;
BitField<12, 4, QueryUnit> unit;
BitField<16, 1, QuerySyncCondition> sync_cond;
@@ -1413,9 +1438,15 @@ private:
/// Handles a write to the QUERY_GET register.
void ProcessQueryGet();
- // Handles Conditional Rendering
+ /// Writes the query result accordingly.
+ void StampQueryResult(u64 payload, bool long_query);
+
+ /// Handles conditional rendering.
void ProcessQueryCondition();
+ /// Handles counter resets.
+ void ProcessCounterReset();
+
/// Handles writes to syncing register.
void ProcessSyncPoint();
@@ -1432,6 +1463,9 @@ private:
// Handles a instance drawcall from MME
void StepInstance(MMEDrawMode expected_mode, u32 count);
+
+ /// Returns a query's value or an empty object if the value will be deferred through a cache.
+ std::optional<u64> GetQueryResult();
};
#define ASSERT_REG_POSITION(field_name, position) \
@@ -1497,8 +1531,10 @@ ASSERT_REG_POSITION(screen_y_control, 0x4EB);
ASSERT_REG_POSITION(vb_element_base, 0x50D);
ASSERT_REG_POSITION(vb_base_instance, 0x50E);
ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
+ASSERT_REG_POSITION(samplecnt_enable, 0x545);
ASSERT_REG_POSITION(point_size, 0x546);
ASSERT_REG_POSITION(point_sprite_enable, 0x548);
+ASSERT_REG_POSITION(counter_reset, 0x54C);
ASSERT_REG_POSITION(zeta_enable, 0x54E);
ASSERT_REG_POSITION(multisample_control, 0x54F);
ASSERT_REG_POSITION(condition, 0x554);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 402869fde..c9bc83cd7 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1677,11 +1677,11 @@ union Instruction {
} xmad;
union {
- BitField<20, 14, u64> offset;
+ BitField<20, 14, u64> shifted_offset;
BitField<34, 5, u64> index;
u64 GetOffset() const {
- return offset * 4;
+ return shifted_offset * 4;
}
} cbuf34;