summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h10
-rw-r--r--src/video_core/dma_pusher.cpp34
-rw-r--r--src/video_core/dma_pusher.h5
-rw-r--r--src/video_core/engines/engine_interface.h8
-rw-r--r--src/video_core/engines/engine_upload.h8
-rw-r--r--src/video_core/engines/kepler_compute.cpp20
-rw-r--r--src/video_core/engines/kepler_compute.h17
-rw-r--r--src/video_core/engines/maxwell_3d.cpp6
-rw-r--r--src/video_core/engines/puller.cpp15
-rw-r--r--src/video_core/host1x/codecs/codec.cpp3
-rw-r--r--src/video_core/macro/macro.cpp24
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp11
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp18
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp11
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp20
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp14
-rw-r--r--src/video_core/vulkan_common/vulkan_debug_callback.cpp27
-rw-r--r--src/video_core/vulkan_common/vulkan_debug_callback.h2
-rw-r--r--src/video_core/vulkan_common/vulkan_instance.cpp8
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.cpp1
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.h5
22 files changed, 188 insertions, 83 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index f0f450edb..8be7bd594 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -289,8 +289,11 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_ad
MarkWrittenBuffer(buffer_id, *cpu_addr, size);
break;
case ObtainBufferOperation::DiscardWrite: {
- IntervalType interval{*cpu_addr, size};
+ VAddr cpu_addr_start = Common::AlignDown(*cpu_addr, 64);
+ VAddr cpu_addr_end = Common::AlignUp(*cpu_addr + size, 64);
+ IntervalType interval{cpu_addr_start, cpu_addr_end};
ClearDownload(interval);
+ common_ranges.subtract(interval);
break;
}
default:
@@ -1159,6 +1162,11 @@ void BufferCache<P>::UpdateDrawIndirect() {
.size = static_cast<u32>(size),
.buffer_id = FindBuffer(*cpu_addr, static_cast<u32>(size)),
};
+ VAddr cpu_addr_start = Common::AlignDown(*cpu_addr, 64);
+ VAddr cpu_addr_end = Common::AlignUp(*cpu_addr + size, 64);
+ IntervalType interval{cpu_addr_start, cpu_addr_end};
+ ClearDownload(interval);
+ common_ranges.subtract(interval);
};
if (current_draw_indirect->include_count) {
update(current_draw_indirect->count_start_address, sizeof(u32),
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 9f1b340a9..58ce0d8c2 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -14,6 +14,7 @@
namespace Tegra {
constexpr u32 MacroRegistersStart = 0xE00;
+constexpr u32 ComputeInline = 0x6D;
DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_,
Control::ChannelState& channel_state_)
@@ -83,12 +84,35 @@ bool DmaPusher::Step() {
dma_state.dma_get, command_list_header.size * sizeof(u32));
}
}
- Core::Memory::GpuGuestMemory<Tegra::CommandHeader,
- Core::Memory::GuestMemoryFlags::UnsafeRead>
- headers(memory_manager, dma_state.dma_get, command_list_header.size, &command_headers);
- ProcessCommands(headers);
+ const auto safe_process = [&] {
+ Core::Memory::GpuGuestMemory<Tegra::CommandHeader,
+ Core::Memory::GuestMemoryFlags::SafeRead>
+ headers(memory_manager, dma_state.dma_get, command_list_header.size,
+ &command_headers);
+ ProcessCommands(headers);
+ };
+ const auto unsafe_process = [&] {
+ Core::Memory::GpuGuestMemory<Tegra::CommandHeader,
+ Core::Memory::GuestMemoryFlags::UnsafeRead>
+ headers(memory_manager, dma_state.dma_get, command_list_header.size,
+ &command_headers);
+ ProcessCommands(headers);
+ };
+ if (Settings::IsGPULevelHigh()) {
+ if (dma_state.method >= MacroRegistersStart) {
+ unsafe_process();
+ return true;
+ }
+ if (subchannel_type[dma_state.subchannel] == Engines::EngineTypes::KeplerCompute &&
+ dma_state.method == ComputeInline) {
+ unsafe_process();
+ return true;
+ }
+ safe_process();
+ return true;
+ }
+ unsafe_process();
}
-
return true;
}
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 8a2784cdc..c9fab2d90 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -130,8 +130,10 @@ public:
void DispatchCalls();
- void BindSubchannel(Engines::EngineInterface* engine, u32 subchannel_id) {
+ void BindSubchannel(Engines::EngineInterface* engine, u32 subchannel_id,
+ Engines::EngineTypes engine_type) {
subchannels[subchannel_id] = engine;
+ subchannel_type[subchannel_id] = engine_type;
}
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
@@ -170,6 +172,7 @@ private:
const bool ib_enable{true}; ///< IB mode enabled
std::array<Engines::EngineInterface*, max_subchannels> subchannels{};
+ std::array<Engines::EngineTypes, max_subchannels> subchannel_type;
GPU& gpu;
Core::System& system;
diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h
index 392322358..54631ee6c 100644
--- a/src/video_core/engines/engine_interface.h
+++ b/src/video_core/engines/engine_interface.h
@@ -11,6 +11,14 @@
namespace Tegra::Engines {
+enum class EngineTypes : u32 {
+ KeplerCompute,
+ Maxwell3D,
+ Fermi2D,
+ MaxwellDMA,
+ KeplerMemory,
+};
+
class EngineInterface {
public:
virtual ~EngineInterface() = default;
diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h
index 7242d2529..21bf8aeb4 100644
--- a/src/video_core/engines/engine_upload.h
+++ b/src/video_core/engines/engine_upload.h
@@ -69,6 +69,14 @@ public:
/// Binds a rasterizer to this engine.
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+ GPUVAddr ExecTargetAddress() const {
+ return regs.dest.Address();
+ }
+
+ u32 GetUploadSize() const {
+ return copy_size;
+ }
+
private:
void ProcessData(std::span<const u8> read_buffer);
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index a38d9528a..cd61ab222 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -43,16 +43,33 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal
switch (method) {
case KEPLER_COMPUTE_REG_INDEX(exec_upload): {
+ UploadInfo info{.upload_address = upload_address,
+ .exec_address = upload_state.ExecTargetAddress(),
+ .copy_size = upload_state.GetUploadSize()};
+ uploads.push_back(info);
upload_state.ProcessExec(regs.exec_upload.linear != 0);
break;
}
case KEPLER_COMPUTE_REG_INDEX(data_upload): {
+ upload_address = current_dma_segment;
upload_state.ProcessData(method_argument, is_last_call);
break;
}
- case KEPLER_COMPUTE_REG_INDEX(launch):
+ case KEPLER_COMPUTE_REG_INDEX(launch): {
+ const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
+
+ for (auto& data : uploads) {
+ const GPUVAddr offset = data.exec_address - launch_desc_loc;
+ if (offset / sizeof(u32) == LAUNCH_REG_INDEX(grid_dim_x) &&
+ memory_manager.IsMemoryDirty(data.upload_address, data.copy_size)) {
+ indirect_compute = {data.upload_address};
+ }
+ }
+ uploads.clear();
ProcessLaunch();
+ indirect_compute = std::nullopt;
break;
+ }
default:
break;
}
@@ -62,6 +79,7 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun
u32 methods_pending) {
switch (method) {
case KEPLER_COMPUTE_REG_INDEX(data_upload):
+ upload_address = current_dma_segment;
upload_state.ProcessData(base_start, amount);
return;
default:
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 2092e685f..735e05fb4 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -5,6 +5,7 @@
#include <array>
#include <cstddef>
+#include <optional>
#include <vector>
#include "common/bit_field.h"
#include "common/common_funcs.h"
@@ -36,6 +37,9 @@ namespace Tegra::Engines {
#define KEPLER_COMPUTE_REG_INDEX(field_name) \
(offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
+#define LAUNCH_REG_INDEX(field_name) \
+ (offsetof(Tegra::Engines::KeplerCompute::LaunchParams, field_name) / sizeof(u32))
+
class KeplerCompute final : public EngineInterface {
public:
explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager);
@@ -201,6 +205,10 @@ public:
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
u32 methods_pending) override;
+ std::optional<GPUVAddr> GetIndirectComputeAddress() const {
+ return indirect_compute;
+ }
+
private:
void ProcessLaunch();
@@ -216,6 +224,15 @@ private:
MemoryManager& memory_manager;
VideoCore::RasterizerInterface* rasterizer = nullptr;
Upload::State upload_state;
+ GPUVAddr upload_address;
+
+ struct UploadInfo {
+ GPUVAddr upload_address;
+ GPUVAddr exec_address;
+ u32 copy_size;
+ };
+ std::vector<UploadInfo> uploads;
+ std::optional<GPUVAddr> indirect_compute{};
};
#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index c3696096d..06e349e43 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -257,6 +257,7 @@ u32 Maxwell3D::GetMaxCurrentVertices() {
const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
num_vertices = std::max(
num_vertices, address_size / std::max(attribute.SizeInBytes(), array.stride.Value()));
+ break;
}
return num_vertices;
}
@@ -269,10 +270,13 @@ size_t Maxwell3D::EstimateIndexBufferSize() {
std::numeric_limits<u32>::max()};
const size_t byte_size = regs.index_buffer.FormatSizeInBytes();
const size_t log2_byte_size = Common::Log2Ceil64(byte_size);
+ const size_t cap{GetMaxCurrentVertices() * 3 * byte_size};
+ const size_t lower_cap =
+ std::min<size_t>(static_cast<size_t>(end_address - start_address), cap);
return std::min<size_t>(
memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[log2_byte_size]) /
byte_size,
- static_cast<size_t>(end_address - start_address));
+ lower_cap);
}
u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp
index 7718a09b3..6de2543b7 100644
--- a/src/video_core/engines/puller.cpp
+++ b/src/video_core/engines/puller.cpp
@@ -34,19 +34,24 @@ void Puller::ProcessBindMethod(const MethodCall& method_call) {
bound_engines[method_call.subchannel] = engine_id;
switch (engine_id) {
case EngineID::FERMI_TWOD_A:
- dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel);
+ dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel,
+ EngineTypes::Fermi2D);
break;
case EngineID::MAXWELL_B:
- dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel);
+ dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel,
+ EngineTypes::Maxwell3D);
break;
case EngineID::KEPLER_COMPUTE_B:
- dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel);
+ dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel,
+ EngineTypes::KeplerCompute);
break;
case EngineID::MAXWELL_DMA_COPY_A:
- dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel);
+ dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel,
+ EngineTypes::MaxwellDMA);
break;
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
- dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel);
+ dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel,
+ EngineTypes::KeplerMemory);
break;
default:
UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
diff --git a/src/video_core/host1x/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp
index 220cce28a..8d7da50fc 100644
--- a/src/video_core/host1x/codecs/codec.cpp
+++ b/src/video_core/host1x/codecs/codec.cpp
@@ -319,6 +319,7 @@ void Codec::Decode() {
LOG_WARNING(Service_NVDRV, "Zero width or height in frame");
return;
}
+ bool is_interlaced = initial_frame->interlaced_frame != 0;
if (av_codec_ctx->hw_device_ctx) {
final_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter};
ASSERT_MSG(final_frame, "av_frame_alloc final_frame failed");
@@ -334,7 +335,7 @@ void Codec::Decode() {
UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format);
return;
}
- if (!final_frame->interlaced_frame) {
+ if (!is_interlaced) {
av_frames.push(std::move(final_frame));
} else {
if (!filters_initialized) {
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp
index 905505ca1..5d0bb9cc4 100644
--- a/src/video_core/macro/macro.cpp
+++ b/src/video_core/macro/macro.cpp
@@ -27,14 +27,24 @@ MICROPROFILE_DEFINE(MacroHLE, "GPU", "Execute macro HLE", MP_RGB(128, 192, 192))
namespace Tegra {
-static void Dump(u64 hash, std::span<const u32> code) {
+static void Dump(u64 hash, std::span<const u32> code, bool decompiled = false) {
const auto base_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::DumpDir)};
const auto macro_dir{base_dir / "macros"};
if (!Common::FS::CreateDir(base_dir) || !Common::FS::CreateDir(macro_dir)) {
LOG_ERROR(Common_Filesystem, "Failed to create macro dump directories");
return;
}
- const auto name{macro_dir / fmt::format("{:016x}.macro", hash)};
+ auto name{macro_dir / fmt::format("{:016x}.macro", hash)};
+
+ if (decompiled) {
+ auto new_name{macro_dir / fmt::format("decompiled_{:016x}.macro", hash)};
+ if (Common::FS::Exists(name)) {
+ (void)Common::FS::RenameFile(name, new_name);
+ return;
+ }
+ name = new_name;
+ }
+
std::fstream macro_file(name, std::ios::out | std::ios::binary);
if (!macro_file) {
LOG_ERROR(Common_Filesystem, "Unable to open or create file at {}",
@@ -90,9 +100,6 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
if (!mid_method.has_value()) {
cache_info.lle_program = Compile(macro_code->second);
cache_info.hash = Common::HashValue(macro_code->second);
- if (Settings::values.dump_macros) {
- Dump(cache_info.hash, macro_code->second);
- }
} else {
const auto& macro_cached = uploaded_macro_code[mid_method.value()];
const auto rebased_method = method - mid_method.value();
@@ -102,9 +109,6 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
code.size() * sizeof(u32));
cache_info.hash = Common::HashValue(code);
cache_info.lle_program = Compile(code);
- if (Settings::values.dump_macros) {
- Dump(cache_info.hash, code);
- }
}
auto hle_program = hle_macros->GetHLEProgram(cache_info.hash);
@@ -117,6 +121,10 @@ void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
MICROPROFILE_SCOPE(MacroHLE);
cache_info.hle_program->Execute(parameters, method);
}
+
+ if (Settings::values.dump_macros) {
+ Dump(cache_info.hash, macro_code->second, cache_info.has_hle_program);
+ }
}
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 1ba31be88..dd03efecd 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -380,6 +380,17 @@ void RasterizerOpenGL::DispatchCompute() {
pipeline->SetEngine(kepler_compute, gpu_memory);
pipeline->Configure();
const auto& qmd{kepler_compute->launch_description};
+ auto indirect_address = kepler_compute->GetIndirectComputeAddress();
+ if (indirect_address) {
+ // DispatchIndirect
+ static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
+ const auto post_op = VideoCommon::ObtainBufferOperation::DiscardWrite;
+ const auto [buffer, offset] =
+ buffer_cache.ObtainBuffer(*indirect_address, 12, sync_info, post_op);
+ glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer->Handle());
+ glDispatchComputeIndirect(static_cast<GLintptr>(offset));
+ return;
+ }
glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z);
++num_queued_commands;
has_written_global_memory |= pipeline->WritesGlobalMemory();
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 454bb66a4..c4c30d807 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -66,21 +66,6 @@ std::string BuildCommaSeparatedExtensions(
return fmt::format("{}", fmt::join(available_extensions, ","));
}
-DebugCallback MakeDebugCallback(const vk::Instance& instance, const vk::InstanceDispatch& dld) {
- if (!Settings::values.renderer_debug) {
- return DebugCallback{};
- }
- const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld);
- const auto it = std::ranges::find_if(*properties, [](const auto& prop) {
- return std::strcmp(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, prop.extensionName) == 0;
- });
- if (it != properties->end()) {
- return CreateDebugUtilsCallback(instance);
- } else {
- return CreateDebugReportCallback(instance);
- }
-}
-
} // Anonymous namespace
Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
@@ -103,7 +88,8 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary(context.get())),
instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
Settings::values.renderer_debug.GetValue())),
- debug_callback(MakeDebugCallback(instance, dld)),
+ debug_messenger(Settings::values.renderer_debug ? CreateDebugUtilsCallback(instance)
+ : vk::DebugUtilsMessenger{}),
surface(CreateSurface(instance, render_window.GetWindowInfo())),
device(CreateDevice(instance, dld, *surface)), memory_allocator(device), state_tracker(),
scheduler(device, state_tracker),
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 89e98425e..590bc1c64 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -35,8 +35,6 @@ class GPU;
namespace Vulkan {
-using DebugCallback = std::variant<vk::DebugUtilsMessenger, vk::DebugReportCallback>;
-
Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
VkSurfaceKHR surface);
@@ -75,7 +73,7 @@ private:
vk::InstanceDispatch dld;
vk::Instance instance;
- DebugCallback debug_callback;
+ vk::DebugUtilsMessenger debug_messenger;
vk::SurfaceKHR surface;
ScreenInfo screen_info;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 60a6ac651..e15865d16 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -529,17 +529,20 @@ void BufferCacheRuntime::BindVertexBuffers(VideoCommon::HostBindings<Buffer>& bi
buffer_handles.push_back(handle);
}
if (device.IsExtExtendedDynamicStateSupported()) {
- scheduler.Record([bindings_ = std::move(bindings),
+ scheduler.Record([this, bindings_ = std::move(bindings),
buffer_handles_ = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) {
cmdbuf.BindVertexBuffers2EXT(bindings_.min_index,
- bindings_.max_index - bindings_.min_index,
+ std::min(bindings_.max_index - bindings_.min_index,
+ device.GetMaxVertexInputBindings()),
buffer_handles_.data(), bindings_.offsets.data(),
bindings_.sizes.data(), bindings_.strides.data());
});
} else {
- scheduler.Record([bindings_ = std::move(bindings),
+ scheduler.Record([this, bindings_ = std::move(bindings),
buffer_handles_ = std::move(buffer_handles)](vk::CommandBuffer cmdbuf) {
- cmdbuf.BindVertexBuffers(bindings_.min_index, bindings_.max_index - bindings_.min_index,
+ cmdbuf.BindVertexBuffers(bindings_.min_index,
+ std::min(bindings_.max_index - bindings_.min_index,
+ device.GetMaxVertexInputBindings()),
buffer_handles_.data(), bindings_.offsets.data());
});
}
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index c1314ca99..4f83a88e1 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -611,9 +611,6 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
- if (Settings::values.dump_shaders) {
- env.Dump(hash, key.unique_hashes[index]);
- }
if (!uses_vertex_a || index != 1) {
// Normal path
programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info);
@@ -624,6 +621,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
programs[index] = MergeDualVertexPrograms(program_va, program_vb, env);
}
+ if (Settings::values.dump_shaders) {
+ env.Dump(hash, key.unique_hashes[index]);
+ }
+
if (programs[index].info.requires_layer_emulation) {
layer_source_program = &programs[index];
}
@@ -664,6 +665,19 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
std::move(modules), infos);
} catch (const Shader::Exception& exception) {
+ auto hash = key.Hash();
+ size_t env_index{0};
+ for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
+ if (key.unique_hashes[index] == 0) {
+ continue;
+ }
+ Shader::Environment& env{*envs[env_index]};
+ ++env_index;
+
+ const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
+ Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0);
+ env.Dump(hash, key.unique_hashes[index]);
+ }
LOG_ERROR(Render_Vulkan, "{}", exception.what());
return nullptr;
}
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 032f694bc..01e76a82c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -463,6 +463,20 @@ void RasterizerVulkan::DispatchCompute() {
pipeline->Configure(*kepler_compute, *gpu_memory, scheduler, buffer_cache, texture_cache);
const auto& qmd{kepler_compute->launch_description};
+ auto indirect_address = kepler_compute->GetIndirectComputeAddress();
+ if (indirect_address) {
+ // DispatchIndirect
+ static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
+ const auto post_op = VideoCommon::ObtainBufferOperation::DiscardWrite;
+ const auto [buffer, offset] =
+ buffer_cache.ObtainBuffer(*indirect_address, 12, sync_info, post_op);
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([indirect_buffer = buffer->Handle(),
+ indirect_offset = offset](vk::CommandBuffer cmdbuf) {
+ cmdbuf.DispatchIndirect(indirect_buffer, indirect_offset);
+ });
+ return;
+ }
const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); });
diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.cpp b/src/video_core/vulkan_common/vulkan_debug_callback.cpp
index 67e8065a4..448df2d3a 100644
--- a/src/video_core/vulkan_common/vulkan_debug_callback.cpp
+++ b/src/video_core/vulkan_common/vulkan_debug_callback.cpp
@@ -63,22 +63,6 @@ VkBool32 DebugUtilCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
return VK_FALSE;
}
-VkBool32 DebugReportCallback(VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT objectType,
- uint64_t object, size_t location, int32_t messageCode,
- const char* pLayerPrefix, const char* pMessage, void* pUserData) {
- const VkDebugReportFlagBitsEXT severity = static_cast<VkDebugReportFlagBitsEXT>(flags);
- const std::string_view message{pMessage};
- if (severity & VK_DEBUG_REPORT_ERROR_BIT_EXT) {
- LOG_CRITICAL(Render_Vulkan, "{}", message);
- } else if (severity & VK_DEBUG_REPORT_WARNING_BIT_EXT) {
- LOG_WARNING(Render_Vulkan, "{}", message);
- } else if (severity & VK_DEBUG_REPORT_INFORMATION_BIT_EXT) {
- LOG_INFO(Render_Vulkan, "{}", message);
- } else if (severity & VK_DEBUG_REPORT_DEBUG_BIT_EXT) {
- LOG_DEBUG(Render_Vulkan, "{}", message);
- }
- return VK_FALSE;
-}
} // Anonymous namespace
vk::DebugUtilsMessenger CreateDebugUtilsCallback(const vk::Instance& instance) {
@@ -98,15 +82,4 @@ vk::DebugUtilsMessenger CreateDebugUtilsCallback(const vk::Instance& instance) {
});
}
-vk::DebugReportCallback CreateDebugReportCallback(const vk::Instance& instance) {
- return instance.CreateDebugReportCallback({
- .sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT,
- .pNext = nullptr,
- .flags = VK_DEBUG_REPORT_DEBUG_BIT_EXT | VK_DEBUG_REPORT_INFORMATION_BIT_EXT |
- VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT,
- .pfnCallback = DebugReportCallback,
- .pUserData = nullptr,
- });
-}
-
} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.h b/src/video_core/vulkan_common/vulkan_debug_callback.h
index a8af7b406..5e940782f 100644
--- a/src/video_core/vulkan_common/vulkan_debug_callback.h
+++ b/src/video_core/vulkan_common/vulkan_debug_callback.h
@@ -9,6 +9,4 @@ namespace Vulkan {
vk::DebugUtilsMessenger CreateDebugUtilsCallback(const vk::Instance& instance);
-vk::DebugReportCallback CreateDebugReportCallback(const vk::Instance& instance);
-
} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp
index bc16145be..180657a75 100644
--- a/src/video_core/vulkan_common/vulkan_instance.cpp
+++ b/src/video_core/vulkan_common/vulkan_instance.cpp
@@ -76,11 +76,9 @@ namespace {
extensions.push_back(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME);
}
#endif
- if (enable_validation) {
- const bool debug_utils =
- AreExtensionsSupported(dld, std::array{VK_EXT_DEBUG_UTILS_EXTENSION_NAME});
- extensions.push_back(debug_utils ? VK_EXT_DEBUG_UTILS_EXTENSION_NAME
- : VK_EXT_DEBUG_REPORT_EXTENSION_NAME);
+ if (enable_validation &&
+ AreExtensionsSupported(dld, std::array{VK_EXT_DEBUG_UTILS_EXTENSION_NAME})) {
+ extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
}
return extensions;
}
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 78e5a248f..c3f388d89 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -92,6 +92,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkCmdCopyImage);
X(vkCmdCopyImageToBuffer);
X(vkCmdDispatch);
+ X(vkCmdDispatchIndirect);
X(vkCmdDraw);
X(vkCmdDrawIndexed);
X(vkCmdDrawIndirect);
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index c226a2a29..049fa8038 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -203,6 +203,7 @@ struct DeviceDispatch : InstanceDispatch {
PFN_vkCmdCopyImage vkCmdCopyImage{};
PFN_vkCmdCopyImageToBuffer vkCmdCopyImageToBuffer{};
PFN_vkCmdDispatch vkCmdDispatch{};
+ PFN_vkCmdDispatchIndirect vkCmdDispatchIndirect{};
PFN_vkCmdDraw vkCmdDraw{};
PFN_vkCmdDrawIndexed vkCmdDrawIndexed{};
PFN_vkCmdDrawIndirect vkCmdDrawIndirect{};
@@ -1209,6 +1210,10 @@ public:
dld->vkCmdDispatch(handle, x, y, z);
}
+ void DispatchIndirect(VkBuffer indirect_buffer, VkDeviceSize offset) const noexcept {
+ dld->vkCmdDispatchIndirect(handle, indirect_buffer, offset);
+ }
+
void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
VkDependencyFlags dependency_flags, Span<VkMemoryBarrier> memory_barriers,
Span<VkBufferMemoryBarrier> buffer_barriers,