// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include #include #include #include #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "common/assert.h" #include "common/logging/log.h" #include "common/microprofile.h" #include "common/scope_exit.h" #include "common/settings.h" #include "video_core/control/channel_state.h" #include "video_core/engines/draw_manager.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/shader_cache.h" #include "video_core/texture_cache/texture_cache_base.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; using MaxwellDrawState = Tegra::Engines::DrawManager::State; using VideoCommon::ImageViewId; using VideoCommon::ImageViewType; MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192)); MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128)); MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128)); MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128)); namespace { struct DrawParams { u32 base_instance; u32 num_instances; u32 base_vertex; u32 num_vertices; u32 first_index; bool is_indexed; }; VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index, float scale) { const auto& src = regs.viewport_transform[index]; const auto conv = [scale](float value) { float new_value = value * scale; if (scale < 1.0f) { const bool sign = std::signbit(value); new_value = std::round(std::abs(new_value)); new_value = sign ? -new_value : new_value; } return new_value; }; const float x = conv(src.translate_x - src.scale_x); const float width = conv(src.scale_x * 2.0f); float y = conv(src.translate_y - src.scale_y); float height = conv(src.scale_y * 2.0f); bool y_negate = regs.window_origin.mode != Maxwell::WindowOrigin::Mode::UpperLeft; if (!device.IsNvViewportSwizzleSupported()) { y_negate = y_negate != (src.swizzle.y == Maxwell::ViewportSwizzle::NegativeY); } if (y_negate) { y += height; height = -height; } const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; VkViewport viewport{ .x = x, .y = y, .width = width != 0.0f ? width : 1.0f, .height = height != 0.0f ? height : 1.0f, .minDepth = src.translate_z - src.scale_z * reduce_z, .maxDepth = src.translate_z + src.scale_z, }; if (!device.IsExtDepthRangeUnrestrictedSupported()) { viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f); viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f); } return viewport; } VkRect2D GetScissorState(const Maxwell& regs, size_t index, u32 up_scale = 1, u32 down_shift = 0) { const auto& src = regs.scissor_test[index]; VkRect2D scissor; const auto scale_up = [&](s32 value) -> s32 { if (value == 0) { return 0U; } const s32 upset = value * up_scale; s32 acumm = 0; if ((up_scale >> down_shift) == 0) { acumm = upset % 2; } const s32 converted_value = (value * up_scale) >> down_shift; return value < 0 ? std::min(converted_value - acumm, -1) : std::max(converted_value + acumm, 1); }; if (src.enable) { scissor.offset.x = scale_up(static_cast(src.min_x)); scissor.offset.y = scale_up(static_cast(src.min_y)); scissor.extent.width = scale_up(src.max_x - src.min_x); scissor.extent.height = scale_up(src.max_y - src.min_y); } else { scissor.offset.x = 0; scissor.offset.y = 0; scissor.extent.width = std::numeric_limits::max(); scissor.extent.height = std::numeric_limits::max(); } return scissor; } DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances, bool is_indexed) { DrawParams params{ .base_instance = draw_state.base_instance, .num_instances = num_instances, .base_vertex = is_indexed ? draw_state.base_index : draw_state.vertex_buffer.first, .num_vertices = is_indexed ? draw_state.index_buffer.count : draw_state.vertex_buffer.count, .first_index = is_indexed ? draw_state.index_buffer.first : 0, .is_indexed = is_indexed, }; // 6 triangle vertices per quad, base vertex is part of the index // See BindQuadIndexBuffer for more details if (draw_state.topology == Maxwell::PrimitiveTopology::Quads) { params.num_vertices = (params.num_vertices / 4) * 6; params.base_vertex = 0; params.is_indexed = true; } else if (draw_state.topology == Maxwell::PrimitiveTopology::QuadStrip) { params.num_vertices = (params.num_vertices - 2) / 2 * 6; params.base_vertex = 0; params.is_indexed = true; } return params; } } // Anonymous namespace RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_, const Device& device_, MemoryAllocator& memory_allocator_, StateTracker& state_tracker_, Scheduler& scheduler_) : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_}, staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), guest_descriptor_queue(device, scheduler), compute_pass_descriptor_queue(device, scheduler), blit_image(device, scheduler, state_tracker, descriptor_pool), render_pass_cache(device), texture_cache_runtime{ device, scheduler, memory_allocator, staging_pool, blit_image, render_pass_cache, descriptor_pool, compute_pass_descriptor_queue}, texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool), buffer_cache(*this, cpu_memory_, buffer_cache_runtime), pipeline_cache(*this, device, scheduler, descriptor_pool, guest_descriptor_queue, render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), query_cache{*this, cpu_memory_, device, scheduler}, accelerate_dma(buffer_cache, texture_cache, scheduler), fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), wfi_event(device.GetLogical().CreateEvent()) { scheduler.SetQueryCache(query_cache); } RasterizerVulkan::~RasterizerVulkan() = default; template void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { MICROPROFILE_SCOPE(Vulkan_Drawing); SCOPE_EXIT({ gpu.TickWork(); }); FlushWork(); gpu_memory->FlushCaching(); #if ANDROID if (Settings::IsGPULevelHigh()) { // This is problematic on Android, disable on GPU Normal. query_cache.UpdateCounters(); } #else query_cache.UpdateCounters(); #endif GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; if (!pipeline) { return; } std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; // update engine as channel may be different. pipeline->SetEngine(maxwell3d, gpu_memory); pipeline->Configure(is_indexed); BeginTransformFeedback(); UpdateDynamicStates(); draw_func(); EndTransformFeedback(); } void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { PrepareDraw(is_indexed, [this, is_indexed, instance_count] { const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); const u32 num_instances{instance_count}; const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed)}; scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) { if (draw_params.is_indexed) { cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, draw_params.first_index, draw_params.base_vertex, draw_params.base_instance); } else { cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, draw_params.base_vertex, draw_params.base_instance); } }); }); } void RasterizerVulkan::DrawIndirect() { const auto& params = maxwell3d->draw_manager->GetIndirectParams(); buffer_cache.SetDrawIndirect(¶ms); PrepareDraw(params.is_indexed, [this, ¶ms] { const auto indirect_buffer = buffer_cache.GetDrawIndirectBuffer(); const auto& buffer = indirect_buffer.first; const auto& offset = indirect_buffer.second; if (params.include_count) { const auto count = buffer_cache.GetDrawIndirectCount(); const auto& draw_buffer = count.first; const auto& offset_base = count.second; scheduler.Record([draw_buffer_obj = draw_buffer->Handle(), buffer_obj = buffer->Handle(), offset_base, offset, params](vk::CommandBuffer cmdbuf) { if (params.is_indexed) { cmdbuf.DrawIndexedIndirectCount( buffer_obj, offset, draw_buffer_obj, offset_base, static_cast(params.max_draw_counts), static_cast(params.stride)); } else { cmdbuf.DrawIndirectCount(buffer_obj, offset, draw_buffer_obj, offset_base, static_cast(params.max_draw_counts), static_cast(params.stride)); } }); return; } scheduler.Record([buffer_obj = buffer->Handle(), offset, params](vk::CommandBuffer cmdbuf) { if (params.is_indexed) { cmdbuf.DrawIndexedIndirect(buffer_obj, offset, static_cast(params.max_draw_counts), static_cast(params.stride)); } else { cmdbuf.DrawIndirect(buffer_obj, offset, static_cast(params.max_draw_counts), static_cast(params.stride)); } }); }); buffer_cache.SetDrawIndirect(nullptr); } void RasterizerVulkan::DrawTexture() { MICROPROFILE_SCOPE(Vulkan_Drawing); SCOPE_EXIT({ gpu.TickWork(); }); FlushWork(); #if ANDROID if (Settings::IsGPULevelHigh()) { // This is problematic on Android, disable on GPU Normal. query_cache.UpdateCounters(); } #else query_cache.UpdateCounters(); #endif texture_cache.SynchronizeGraphicsDescriptors(); texture_cache.UpdateRenderTargets(false); UpdateDynamicStates(); const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState(); const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler); const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture); Region2D dst_region = {Offset2D{.x = static_cast(draw_texture_state.dst_x0), .y = static_cast(draw_texture_state.dst_y0)}, Offset2D{.x = static_cast(draw_texture_state.dst_x1), .y = static_cast(draw_texture_state.dst_y1)}}; Region2D src_region = {Offset2D{.x = static_cast(draw_texture_state.src_x0), .y = static_cast(draw_texture_state.src_y0)}, Offset2D{.x = static_cast(draw_texture_state.src_x1), .y = static_cast(draw_texture_state.src_y1)}}; blit_image.BlitColor(texture_cache.GetFramebuffer(), texture.RenderTarget(), texture.ImageHandle(), sampler->Handle(), dst_region, src_region, texture.size); } void RasterizerVulkan::Clear(u32 layer_count) { MICROPROFILE_SCOPE(Vulkan_Clearing); FlushWork(); gpu_memory->FlushCaching(); #if ANDROID if (Settings::IsGPULevelHigh()) { // This is problematic on Android, disable on GPU Normal. query_cache.UpdateCounters(); } #else query_cache.UpdateCounters(); #endif auto& regs = maxwell3d->regs; const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B || regs.clear_surface.A; const bool use_depth = regs.clear_surface.Z; const bool use_stencil = regs.clear_surface.S; if (!use_color && !use_depth && !use_stencil) { return; } std::scoped_lock lock{texture_cache.mutex}; texture_cache.UpdateRenderTargets(true); const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); const VkExtent2D render_area = framebuffer->RenderArea(); scheduler.RequestRenderpass(framebuffer); u32 up_scale = 1; u32 down_shift = 0; if (texture_cache.IsRescaling()) { up_scale = Settings::values.resolution_info.up_scale; down_shift = Settings::values.resolution_info.down_shift; } UpdateViewportsState(regs); VkRect2D default_scissor; default_scissor.offset.x = 0; default_scissor.offset.y = 0; default_scissor.extent.width = std::numeric_limits::max(); default_scissor.extent.height = std::numeric_limits::max(); VkClearRect clear_rect{ .rect = regs.clear_control.use_scissor ? GetScissorState(regs, 0, up_scale, down_shift) : default_scissor, .baseArrayLayer = regs.clear_surface.layer, .layerCount = layer_count, }; if (clear_rect.rect.extent.width == 0 || clear_rect.rect.extent.height == 0) { return; } clear_rect.rect.extent = VkExtent2D{ .width = std::min(clear_rect.rect.extent.width, render_area.width), .height = std::min(clear_rect.rect.extent.height, render_area.height), }; const u32 color_attachment = regs.clear_surface.RT; if (use_color && framebuffer->HasAspectColorBit(color_attachment)) { const auto format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(regs.rt[color_attachment].format); bool is_integer = IsPixelFormatInteger(format); bool is_signed = IsPixelFormatSignedInteger(format); size_t int_size = PixelComponentSizeBitsInteger(format); VkClearValue clear_value{}; if (!is_integer) { std::memcpy(clear_value.color.float32, regs.clear_color.data(), regs.clear_color.size() * sizeof(f32)); } else if (!is_signed) { for (size_t i = 0; i < 4; i++) { clear_value.color.uint32[i] = static_cast( static_cast(static_cast(int_size) << 1U) * regs.clear_color[i]); } } else { for (size_t i = 0; i < 4; i++) { clear_value.color.int32[i] = static_cast(static_cast(static_cast(int_size - 1) << 1) * (regs.clear_color[i] - 0.5f)); } } if (regs.clear_surface.R && regs.clear_surface.G && regs.clear_surface.B && regs.clear_surface.A) { scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) { const VkClearAttachment attachment{ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .colorAttachment = color_attachment, .clearValue = clear_value, }; cmdbuf.ClearAttachments(attachment, clear_rect); }); } else { u8 color_mask = static_cast(regs.clear_surface.R | regs.clear_surface.G << 1 | regs.clear_surface.B << 2 | regs.clear_surface.A << 3); Region2D dst_region = { Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y}, Offset2D{.x = clear_rect.rect.offset.x + static_cast(clear_rect.rect.extent.width), .y = clear_rect.rect.offset.y + static_cast(clear_rect.rect.extent.height)}}; blit_image.ClearColor(framebuffer, color_mask, regs.clear_color, dst_region); } } if (!use_depth && !use_stencil) { return; } VkImageAspectFlags aspect_flags = 0; if (use_depth && framebuffer->HasAspectDepthBit()) { aspect_flags |= VK_IMAGE_ASPECT_DEPTH_BIT; } if (use_stencil && framebuffer->HasAspectStencilBit()) { aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT; } if (aspect_flags == 0) { return; } if (use_stencil && regs.stencil_front_mask != 0xFF && regs.stencil_front_mask != 0) { Region2D dst_region = { Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y}, Offset2D{.x = clear_rect.rect.offset.x + static_cast(clear_rect.rect.extent.width), .y = clear_rect.rect.offset.y + static_cast(clear_rect.rect.extent.height)}}; blit_image.ClearDepthStencil(framebuffer, use_depth, regs.clear_depth, static_cast(regs.stencil_front_mask), regs.clear_stencil, regs.stencil_front_func_mask, dst_region); } else { scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { VkClearAttachment attachment; attachment.aspectMask = aspect_flags; attachment.colorAttachment = 0; attachment.clearValue.depthStencil.depth = clear_depth; attachment.clearValue.depthStencil.stencil = clear_stencil; cmdbuf.ClearAttachments(attachment, clear_rect); }); } } void RasterizerVulkan::DispatchCompute() { FlushWork(); gpu_memory->FlushCaching(); ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; if (!pipeline) { return; } std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; pipeline->Configure(*kepler_compute, *gpu_memory, scheduler, buffer_cache, texture_cache); const auto& qmd{kepler_compute->launch_description}; auto indirect_address = kepler_compute->GetIndirectComputeAddress(); if (indirect_address) { // DispatchIndirect static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; const auto post_op = VideoCommon::ObtainBufferOperation::DiscardWrite; const auto [buffer, offset] = buffer_cache.ObtainBuffer(*indirect_address, 12, sync_info, post_op); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([indirect_buffer = buffer->Handle(), indirect_offset = offset](vk::CommandBuffer cmdbuf) { cmdbuf.DispatchIndirect(indirect_buffer, indirect_offset); }); return; } const std::array dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); } void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { query_cache.ResetCounter(type); } void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional timestamp) { query_cache.Query(gpu_addr, type, timestamp); } void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) { buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size); } void Vulkan::RasterizerVulkan::DisableGraphicsUniformBuffer(size_t stage, u32 index) { buffer_cache.DisableGraphicsUniformBuffer(stage, index); } void RasterizerVulkan::FlushAll() {} void RasterizerVulkan::FlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { if (addr == 0 || size == 0) { return; } if (True(which & VideoCommon::CacheType::TextureCache)) { std::scoped_lock lock{texture_cache.mutex}; texture_cache.DownloadMemory(addr, size); } if ((True(which & VideoCommon::CacheType::BufferCache))) { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.DownloadMemory(addr, size); } if ((True(which & VideoCommon::CacheType::QueryCache))) { query_cache.FlushRegion(addr, size); } } bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { if ((True(which & VideoCommon::CacheType::BufferCache))) { std::scoped_lock lock{buffer_cache.mutex}; if (buffer_cache.IsRegionGpuModified(addr, size)) { return true; } } if (!Settings::IsGPULevelHigh()) { return false; } if (True(which & VideoCommon::CacheType::TextureCache)) { std::scoped_lock lock{texture_cache.mutex}; return texture_cache.IsRegionGpuModified(addr, size); } return false; } VideoCore::RasterizerDownloadArea RasterizerVulkan::GetFlushArea(VAddr addr, u64 size) { { std::scoped_lock lock{texture_cache.mutex}; auto area = texture_cache.GetFlushArea(addr, size); if (area) { return *area; } } VideoCore::RasterizerDownloadArea new_area{ .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE), .end_address = Common::AlignUp(addr + size, Core::Memory::YUZU_PAGESIZE), .preemtive = true, }; return new_area; } void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { if (addr == 0 || size == 0) { return; } if (True(which & VideoCommon::CacheType::TextureCache)) { std::scoped_lock lock{texture_cache.mutex}; texture_cache.WriteMemory(addr, size); } if ((True(which & VideoCommon::CacheType::BufferCache))) { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.WriteMemory(addr, size); } if ((True(which & VideoCommon::CacheType::QueryCache))) { query_cache.InvalidateRegion(addr, size); } if ((True(which & VideoCommon::CacheType::ShaderCache))) { pipeline_cache.InvalidateRegion(addr, size); } } void RasterizerVulkan::InnerInvalidation(std::span> sequences) { { std::scoped_lock lock{texture_cache.mutex}; for (const auto& [addr, size] : sequences) { texture_cache.WriteMemory(addr, size); } } { std::scoped_lock lock{buffer_cache.mutex}; for (const auto& [addr, size] : sequences) { buffer_cache.WriteMemory(addr, size); } } { for (const auto& [addr, size] : sequences) { query_cache.InvalidateRegion(addr, size); pipeline_cache.InvalidateRegion(addr, size); } } } bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return false; } { std::scoped_lock lock{buffer_cache.mutex}; if (buffer_cache.OnCPUWrite(addr, size)) { return true; } } { std::scoped_lock lock{texture_cache.mutex}; texture_cache.WriteMemory(addr, size); } pipeline_cache.InvalidateRegion(addr, size); return false; } void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) { if (addr == 0 || size == 0) { return; } { std::scoped_lock lock{texture_cache.mutex}; texture_cache.WriteMemory(addr, size); } { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.CachedWriteMemory(addr, size); } pipeline_cache.InvalidateRegion(addr, size); } void RasterizerVulkan::InvalidateGPUCache() { gpu.InvalidateGPUCache(); } void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { { std::scoped_lock lock{texture_cache.mutex}; texture_cache.UnmapMemory(addr, size); } { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.WriteMemory(addr, size); } pipeline_cache.OnCacheInvalidation(addr, size); } void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { { std::scoped_lock lock{texture_cache.mutex}; texture_cache.UnmapGPUMemory(as_id, addr, size); } } void RasterizerVulkan::SignalFence(std::function&& func) { fence_manager.SignalFence(std::move(func)); } void RasterizerVulkan::SyncOperation(std::function&& func) { fence_manager.SyncOperation(std::move(func)); } void RasterizerVulkan::SignalSyncPoint(u32 value) { fence_manager.SignalSyncPoint(value); } void RasterizerVulkan::SignalReference() { fence_manager.SignalReference(); } void RasterizerVulkan::ReleaseFences() { fence_manager.WaitPendingFences(); } void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType which) { if (Settings::IsGPULevelExtreme()) { FlushRegion(addr, size, which); } InvalidateRegion(addr, size, which); } void RasterizerVulkan::WaitForIdle() { // Everything but wait pixel operations. This intentionally includes FRAGMENT_SHADER_BIT because // fragment shaders can still write storage buffers. VkPipelineStageFlags flags = VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT; if (device.IsExtTransformFeedbackSupported()) { flags |= VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT; } scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([event = *wfi_event, flags](vk::CommandBuffer cmdbuf) { cmdbuf.SetEvent(event, flags); cmdbuf.WaitEvents(event, flags, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, {}, {}, {}); }); fence_manager.SignalOrdering(); } void RasterizerVulkan::FragmentBarrier() { // We already put barriers when a render pass finishes scheduler.RequestOutsideRenderPassOperationContext(); } void RasterizerVulkan::TiledCacheBarrier() { // TODO: Implementing tiled barriers requires rewriting a good chunk of the Vulkan backend } void RasterizerVulkan::FlushCommands() { if (draw_counter == 0) { return; } draw_counter = 0; scheduler.Flush(); } void RasterizerVulkan::TickFrame() { draw_counter = 0; guest_descriptor_queue.TickFrame(); compute_pass_descriptor_queue.TickFrame(); fence_manager.TickFrame(); staging_pool.TickFrame(); { std::scoped_lock lock{texture_cache.mutex}; texture_cache.TickFrame(); } { std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.TickFrame(); } } bool RasterizerVulkan::AccelerateConditionalRendering() { gpu_memory->FlushCaching(); if (Settings::IsGPULevelHigh()) { // TODO(Blinkhawk): Reimplement Host conditional rendering. return false; } // Medium / Low Hack: stub any checks on queries written into the buffer cache. const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()}; Maxwell::ReportSemaphore::Compare cmp; if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp), VideoCommon::CacheType::BufferCache | VideoCommon::CacheType::QueryCache)) { return true; } return false; } bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) { std::scoped_lock lock{texture_cache.mutex}; return texture_cache.BlitImage(dst, src, copy_config); } Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA() { return accelerate_dma; } void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, std::span memory) { auto cpu_addr = gpu_memory->GpuToCpuAddress(address); if (!cpu_addr) [[unlikely]] { gpu_memory->WriteBlock(address, memory.data(), copy_size); return; } gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size); { std::unique_lock lock{buffer_cache.mutex}; if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) { buffer_cache.WriteMemory(*cpu_addr, copy_size); } } { std::scoped_lock lock_texture{texture_cache.mutex}; texture_cache.WriteMemory(*cpu_addr, copy_size); } pipeline_cache.InvalidateRegion(*cpu_addr, copy_size); query_cache.InvalidateRegion(*cpu_addr, copy_size); } bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) { if (!framebuffer_addr) { return false; } std::scoped_lock lock{texture_cache.mutex}; ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr); if (!image_view) { return false; } screen_info.image = image_view->ImageHandle(); screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D); screen_info.width = image_view->size.width; screen_info.height = image_view->size.height; screen_info.is_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); return true; } void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_loading, const VideoCore::DiskResourceLoadCallback& callback) { pipeline_cache.LoadDiskResources(title_id, stop_loading, callback); } void RasterizerVulkan::FlushWork() { #ifdef ANDROID static constexpr u32 DRAWS_TO_DISPATCH = 1024; #else static constexpr u32 DRAWS_TO_DISPATCH = 4096; #endif // ANDROID // Only check multiples of 8 draws static_assert(DRAWS_TO_DISPATCH % 8 == 0); if ((++draw_counter & 7) != 7) { return; } if (draw_counter < DRAWS_TO_DISPATCH) { // Send recorded tasks to the worker thread scheduler.DispatchWork(); return; } // Otherwise (every certain number of draws) flush execution. // This submits commands to the Vulkan driver. scheduler.Flush(); draw_counter = 0; } AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_, TextureCache& texture_cache_, Scheduler& scheduler_) : buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, scheduler{scheduler_} {} bool AccelerateDMA::BufferClear(GPUVAddr src_address, u64 amount, u32 value) { std::scoped_lock lock{buffer_cache.mutex}; return buffer_cache.DMAClear(src_address, amount, value); } bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { std::scoped_lock lock{buffer_cache.mutex}; return buffer_cache.DMACopy(src_address, dest_address, amount); } template bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& buffer_operand, const Tegra::DMA::ImageOperand& image_operand) { std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; const auto image_id = texture_cache.DmaImageId(image_operand, IS_IMAGE_UPLOAD); if (image_id == VideoCommon::NULL_IMAGE_ID) { return false; } const u32 buffer_size = static_cast(buffer_operand.pitch * buffer_operand.height); static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing : VideoCommon::ObtainBufferOperation::MarkAsWritten; const auto [buffer, offset] = buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op); const auto [image, copy] = texture_cache.DmaBufferImageCopy( copy_info, buffer_operand, image_operand, image_id, IS_IMAGE_UPLOAD); const std::span copy_span{©, 1}; if constexpr (IS_IMAGE_UPLOAD) { texture_cache.PrepareImage(image_id, true, false); image->UploadMemory(buffer->Handle(), offset, copy_span); } else { if (offset % BytesPerBlock(image->info.format)) { return false; } texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span, buffer_operand.address, buffer_size); } return true; } bool AccelerateDMA::ImageToBuffer(const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::ImageOperand& image_operand, const Tegra::DMA::BufferOperand& buffer_operand) { return DmaBufferImageCopy(copy_info, buffer_operand, image_operand); } bool AccelerateDMA::BufferToImage(const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& buffer_operand, const Tegra::DMA::ImageOperand& image_operand) { return DmaBufferImageCopy(copy_info, buffer_operand, image_operand); } void RasterizerVulkan::UpdateDynamicStates() { auto& regs = maxwell3d->regs; UpdateViewportsState(regs); UpdateScissorsState(regs); UpdateDepthBias(regs); UpdateBlendConstants(regs); UpdateDepthBounds(regs); UpdateStencilFaces(regs); UpdateLineWidth(regs); if (device.IsExtExtendedDynamicStateSupported()) { UpdateCullMode(regs); UpdateDepthCompareOp(regs); UpdateFrontFace(regs); UpdateStencilOp(regs); if (device.IsExtVertexInputDynamicStateSupported()) { UpdateVertexInput(regs); } if (state_tracker.TouchStateEnable()) { UpdateDepthBoundsTestEnable(regs); UpdateDepthTestEnable(regs); UpdateDepthWriteEnable(regs); UpdateStencilTestEnable(regs); if (device.IsExtExtendedDynamicState2Supported()) { UpdatePrimitiveRestartEnable(regs); UpdateRasterizerDiscardEnable(regs); UpdateDepthBiasEnable(regs); } if (device.IsExtExtendedDynamicState3EnablesSupported()) { UpdateLogicOpEnable(regs); UpdateDepthClampEnable(regs); } } if (device.IsExtExtendedDynamicState2ExtrasSupported()) { UpdateLogicOp(regs); } if (device.IsExtExtendedDynamicState3Supported()) { UpdateBlending(regs); } } } void RasterizerVulkan::BeginTransformFeedback() { const auto& regs = maxwell3d->regs; if (regs.transform_feedback_enabled == 0) { return; } if (!device.IsExtTransformFeedbackSupported()) { LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); return; } UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) || regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation)); scheduler.Record( [](vk::CommandBuffer cmdbuf) { cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); }); } void RasterizerVulkan::EndTransformFeedback() { const auto& regs = maxwell3d->regs; if (regs.transform_feedback_enabled == 0) { return; } if (!device.IsExtTransformFeedbackSupported()) { return; } scheduler.Record( [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); } void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchViewports()) { return; } if (!regs.viewport_scale_offset_enabled) { const auto x = static_cast(regs.surface_clip.x); const auto y = static_cast(regs.surface_clip.y); const auto width = static_cast(regs.surface_clip.width); const auto height = static_cast(regs.surface_clip.height); VkViewport viewport{ .x = x, .y = y, .width = width != 0.0f ? width : 1.0f, .height = height != 0.0f ? height : 1.0f, .minDepth = 0.0f, .maxDepth = 1.0f, }; scheduler.Record([viewport](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewport); }); return; } const bool is_rescaling{texture_cache.IsRescaling()}; const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f; const std::array viewport_list{ GetViewportState(device, regs, 0, scale), GetViewportState(device, regs, 1, scale), GetViewportState(device, regs, 2, scale), GetViewportState(device, regs, 3, scale), GetViewportState(device, regs, 4, scale), GetViewportState(device, regs, 5, scale), GetViewportState(device, regs, 6, scale), GetViewportState(device, regs, 7, scale), GetViewportState(device, regs, 8, scale), GetViewportState(device, regs, 9, scale), GetViewportState(device, regs, 10, scale), GetViewportState(device, regs, 11, scale), GetViewportState(device, regs, 12, scale), GetViewportState(device, regs, 13, scale), GetViewportState(device, regs, 14, scale), GetViewportState(device, regs, 15, scale), }; scheduler.Record([this, viewport_list](vk::CommandBuffer cmdbuf) { const u32 num_viewports = std::min(device.GetMaxViewports(), Maxwell::NumViewports); const vk::Span viewports(viewport_list.data(), num_viewports); cmdbuf.SetViewport(0, viewports); }); } void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchScissors()) { return; } u32 up_scale = 1; u32 down_shift = 0; if (texture_cache.IsRescaling()) { up_scale = Settings::values.resolution_info.up_scale; down_shift = Settings::values.resolution_info.down_shift; } const std::array scissor_list{ GetScissorState(regs, 0, up_scale, down_shift), GetScissorState(regs, 1, up_scale, down_shift), GetScissorState(regs, 2, up_scale, down_shift), GetScissorState(regs, 3, up_scale, down_shift), GetScissorState(regs, 4, up_scale, down_shift), GetScissorState(regs, 5, up_scale, down_shift), GetScissorState(regs, 6, up_scale, down_shift), GetScissorState(regs, 7, up_scale, down_shift), GetScissorState(regs, 8, up_scale, down_shift), GetScissorState(regs, 9, up_scale, down_shift), GetScissorState(regs, 10, up_scale, down_shift), GetScissorState(regs, 11, up_scale, down_shift), GetScissorState(regs, 12, up_scale, down_shift), GetScissorState(regs, 13, up_scale, down_shift), GetScissorState(regs, 14, up_scale, down_shift), GetScissorState(regs, 15, up_scale, down_shift), }; scheduler.Record([this, scissor_list](vk::CommandBuffer cmdbuf) { const u32 num_scissors = std::min(device.GetMaxViewports(), Maxwell::NumViewports); const vk::Span scissors(scissor_list.data(), num_scissors); cmdbuf.SetScissor(0, scissors); }); } void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchDepthBias()) { return; } float units = regs.depth_bias / 2.0f; const bool is_d24 = regs.zeta.format == Tegra::DepthFormat::Z24_UNORM_S8_UINT || regs.zeta.format == Tegra::DepthFormat::X8Z24_UNORM || regs.zeta.format == Tegra::DepthFormat::S8Z24_UNORM || regs.zeta.format == Tegra::DepthFormat::V8Z24_UNORM; if (is_d24 && !device.SupportsD24DepthBuffer()) { // the base formulas can be obtained from here: // https://docs.microsoft.com/en-us/windows/win32/direct3d11/d3d10-graphics-programming-guide-output-merger-stage-depth-bias const double rescale_factor = static_cast(1ULL << (32 - 24)) / (static_cast(0x1.ep+127)); units = static_cast(static_cast(units) * rescale_factor); } scheduler.Record([constant = units, clamp = regs.depth_bias_clamp, factor = regs.slope_scale_depth_bias](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBias(constant, clamp, factor); }); } void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchBlendConstants()) { return; } const std::array blend_color = {regs.blend_color.r, regs.blend_color.g, regs.blend_color.b, regs.blend_color.a}; scheduler.Record( [blend_color](vk::CommandBuffer cmdbuf) { cmdbuf.SetBlendConstants(blend_color.data()); }); } void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchDepthBounds()) { return; } scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]]( vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBounds(min, max); }); } void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchStencilProperties()) { return; } bool update_references = state_tracker.TouchStencilReference(); bool update_write_mask = state_tracker.TouchStencilWriteMask(); bool update_compare_masks = state_tracker.TouchStencilCompare(); if (state_tracker.TouchStencilSide(regs.stencil_two_side_enable != 0)) { update_references = true; update_write_mask = true; update_compare_masks = true; } if (update_references) { [&]() { if (regs.stencil_two_side_enable) { if (!state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref) && !state_tracker.CheckStencilReferenceBack(regs.stencil_back_ref)) { return; } } else { if (!state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref)) { return; } } scheduler.Record([front_ref = regs.stencil_front_ref, back_ref = regs.stencil_back_ref, two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) { const bool set_back = two_sided && front_ref != back_ref; // Front face cmdbuf.SetStencilReference(set_back ? VK_STENCIL_FACE_FRONT_BIT : VK_STENCIL_FACE_FRONT_AND_BACK, front_ref); if (set_back) { cmdbuf.SetStencilReference(VK_STENCIL_FACE_BACK_BIT, back_ref); } }); }(); } if (update_write_mask) { [&]() { if (regs.stencil_two_side_enable) { if (!state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask) && !state_tracker.CheckStencilWriteMaskBack(regs.stencil_back_mask)) { return; } } else { if (!state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask)) { return; } } scheduler.Record([front_write_mask = regs.stencil_front_mask, back_write_mask = regs.stencil_back_mask, two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) { const bool set_back = two_sided && front_write_mask != back_write_mask; // Front face cmdbuf.SetStencilWriteMask(set_back ? VK_STENCIL_FACE_FRONT_BIT : VK_STENCIL_FACE_FRONT_AND_BACK, front_write_mask); if (set_back) { cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_BACK_BIT, back_write_mask); } }); }(); } if (update_compare_masks) { [&]() { if (regs.stencil_two_side_enable) { if (!state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask) && !state_tracker.CheckStencilCompareMaskBack(regs.stencil_back_func_mask)) { return; } } else { if (!state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask)) { return; } } scheduler.Record([front_test_mask = regs.stencil_front_func_mask, back_test_mask = regs.stencil_back_func_mask, two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) { const bool set_back = two_sided && front_test_mask != back_test_mask; // Front face cmdbuf.SetStencilCompareMask(set_back ? VK_STENCIL_FACE_FRONT_BIT : VK_STENCIL_FACE_FRONT_AND_BACK, front_test_mask); if (set_back) { cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_BACK_BIT, back_test_mask); } }); }(); } state_tracker.ClearStencilReset(); } void RasterizerVulkan::UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchLineWidth()) { return; } const float width = regs.line_anti_alias_enable ? regs.line_width_smooth : regs.line_width_aliased; scheduler.Record([width](vk::CommandBuffer cmdbuf) { cmdbuf.SetLineWidth(width); }); } void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchCullMode()) { return; } scheduler.Record([enabled = regs.gl_cull_test_enabled, cull_face = regs.gl_cull_face](vk::CommandBuffer cmdbuf) { cmdbuf.SetCullModeEXT(enabled ? MaxwellToVK::CullFace(cull_face) : VK_CULL_MODE_NONE); }); } void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchDepthBoundsTestEnable()) { return; } bool enabled = regs.depth_bounds_enable; if (enabled && !device.IsDepthBoundsSupported()) { LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported"); enabled = false; } scheduler.Record([enable = enabled](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBoundsTestEnableEXT(enable); }); } void RasterizerVulkan::UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchDepthTestEnable()) { return; } scheduler.Record([enable = regs.depth_test_enable](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthTestEnableEXT(enable); }); } void RasterizerVulkan::UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchDepthWriteEnable()) { return; } scheduler.Record([enable = regs.depth_write_enabled](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthWriteEnableEXT(enable); }); } void RasterizerVulkan::UpdatePrimitiveRestartEnable(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchPrimitiveRestartEnable()) { return; } scheduler.Record([enable = regs.primitive_restart.enabled](vk::CommandBuffer cmdbuf) { cmdbuf.SetPrimitiveRestartEnableEXT(enable); }); } void RasterizerVulkan::UpdateRasterizerDiscardEnable(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchRasterizerDiscardEnable()) { return; } scheduler.Record([disable = regs.rasterize_enable](vk::CommandBuffer cmdbuf) { cmdbuf.SetRasterizerDiscardEnableEXT(disable == 0); }); } void RasterizerVulkan::UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchDepthBiasEnable()) { return; } constexpr size_t POINT = 0; constexpr size_t LINE = 1; constexpr size_t POLYGON = 2; static constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { POINT, // Points LINE, // Lines LINE, // LineLoop LINE, // LineStrip POLYGON, // Triangles POLYGON, // TriangleStrip POLYGON, // TriangleFan POLYGON, // Quads POLYGON, // QuadStrip POLYGON, // Polygon LINE, // LinesAdjacency LINE, // LineStripAdjacency POLYGON, // TrianglesAdjacency POLYGON, // TriangleStripAdjacency POLYGON, // Patches }; const std::array enabled_lut{ regs.polygon_offset_point_enable, regs.polygon_offset_line_enable, regs.polygon_offset_fill_enable, }; const u32 topology_index = static_cast(maxwell3d->draw_manager->GetDrawState().topology); const u32 enable = enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]]; scheduler.Record( [enable](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBiasEnableEXT(enable != 0); }); } void RasterizerVulkan::UpdateLogicOpEnable(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchLogicOpEnable()) { return; } scheduler.Record([enable = regs.logic_op.enable](vk::CommandBuffer cmdbuf) { cmdbuf.SetLogicOpEnableEXT(enable != 0); }); } void RasterizerVulkan::UpdateDepthClampEnable(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchDepthClampEnable()) { return; } bool is_enabled = !(regs.viewport_clip_control.geometry_clip == Maxwell::ViewportClipControl::GeometryClip::Passthrough || regs.viewport_clip_control.geometry_clip == Maxwell::ViewportClipControl::GeometryClip::FrustumXYZ || regs.viewport_clip_control.geometry_clip == Maxwell::ViewportClipControl::GeometryClip::FrustumZ); scheduler.Record( [is_enabled](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthClampEnableEXT(is_enabled); }); } void RasterizerVulkan::UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchDepthCompareOp()) { return; } scheduler.Record([func = regs.depth_test_func](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthCompareOpEXT(MaxwellToVK::ComparisonOp(func)); }); } void RasterizerVulkan::UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchFrontFace()) { return; } VkFrontFace front_face = MaxwellToVK::FrontFace(regs.gl_front_face); if (regs.window_origin.flip_y != 0) { front_face = front_face == VK_FRONT_FACE_CLOCKWISE ? VK_FRONT_FACE_COUNTER_CLOCKWISE : VK_FRONT_FACE_CLOCKWISE; } scheduler.Record( [front_face](vk::CommandBuffer cmdbuf) { cmdbuf.SetFrontFaceEXT(front_face); }); } void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchStencilOp()) { return; } const Maxwell::StencilOp::Op fail = regs.stencil_front_op.fail; const Maxwell::StencilOp::Op zfail = regs.stencil_front_op.zfail; const Maxwell::StencilOp::Op zpass = regs.stencil_front_op.zpass; const Maxwell::ComparisonOp compare = regs.stencil_front_op.func; if (regs.stencil_two_side_enable) { // Separate stencil op per face const Maxwell::StencilOp::Op back_fail = regs.stencil_back_op.fail; const Maxwell::StencilOp::Op back_zfail = regs.stencil_back_op.zfail; const Maxwell::StencilOp::Op back_zpass = regs.stencil_back_op.zpass; const Maxwell::ComparisonOp back_compare = regs.stencil_back_op.func; scheduler.Record([fail, zfail, zpass, compare, back_fail, back_zfail, back_zpass, back_compare](vk::CommandBuffer cmdbuf) { cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_BIT, MaxwellToVK::StencilOp(fail), MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail), MaxwellToVK::ComparisonOp(compare)); cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_BACK_BIT, MaxwellToVK::StencilOp(back_fail), MaxwellToVK::StencilOp(back_zpass), MaxwellToVK::StencilOp(back_zfail), MaxwellToVK::ComparisonOp(back_compare)); }); } else { // Front face defines the stencil op of both faces scheduler.Record([fail, zfail, zpass, compare](vk::CommandBuffer cmdbuf) { cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_AND_BACK, MaxwellToVK::StencilOp(fail), MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail), MaxwellToVK::ComparisonOp(compare)); }); } } void RasterizerVulkan::UpdateLogicOp(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchLogicOp()) { return; } const auto op_value = static_cast(regs.logic_op.op); auto op = op_value >= 0x1500 && op_value < 0x1510 ? static_cast(op_value - 0x1500) : VK_LOGIC_OP_NO_OP; scheduler.Record([op](vk::CommandBuffer cmdbuf) { cmdbuf.SetLogicOpEXT(op); }); } void RasterizerVulkan::UpdateBlending(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchBlending()) { return; } if (state_tracker.TouchColorMask()) { std::array setup_masks{}; for (size_t index = 0; index < Maxwell::NumRenderTargets; index++) { const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index]; auto& current = setup_masks[index]; if (mask.R) { current |= VK_COLOR_COMPONENT_R_BIT; } if (mask.G) { current |= VK_COLOR_COMPONENT_G_BIT; } if (mask.B) { current |= VK_COLOR_COMPONENT_B_BIT; } if (mask.A) { current |= VK_COLOR_COMPONENT_A_BIT; } } scheduler.Record([setup_masks](vk::CommandBuffer cmdbuf) { cmdbuf.SetColorWriteMaskEXT(0, setup_masks); }); } if (state_tracker.TouchBlendEnable()) { std::array setup_enables{}; std::ranges::transform( regs.blend.enable, setup_enables.begin(), [&](const auto& is_enabled) { return is_enabled != 0 ? VK_TRUE : VK_FALSE; }); scheduler.Record([setup_enables](vk::CommandBuffer cmdbuf) { cmdbuf.SetColorBlendEnableEXT(0, setup_enables); }); } if (state_tracker.TouchBlendEquations()) { std::array setup_blends{}; for (size_t index = 0; index < Maxwell::NumRenderTargets; index++) { const auto blend_setup = [&](const T& guest_blend) { auto& host_blend = setup_blends[index]; host_blend.srcColorBlendFactor = MaxwellToVK::BlendFactor(guest_blend.color_source); host_blend.dstColorBlendFactor = MaxwellToVK::BlendFactor(guest_blend.color_dest); host_blend.colorBlendOp = MaxwellToVK::BlendEquation(guest_blend.color_op); host_blend.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(guest_blend.alpha_source); host_blend.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(guest_blend.alpha_dest); host_blend.alphaBlendOp = MaxwellToVK::BlendEquation(guest_blend.alpha_op); }; if (!regs.blend_per_target_enabled) { blend_setup(regs.blend); continue; } blend_setup(regs.blend_per_target[index]); } scheduler.Record([setup_blends](vk::CommandBuffer cmdbuf) { cmdbuf.SetColorBlendEquationEXT(0, setup_blends); }); } } void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchStencilTestEnable()) { return; } scheduler.Record([enable = regs.stencil_enable](vk::CommandBuffer cmdbuf) { cmdbuf.SetStencilTestEnableEXT(enable); }); } void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) { auto& dirty{maxwell3d->dirty.flags}; if (!dirty[Dirty::VertexInput]) { return; } dirty[Dirty::VertexInput] = false; boost::container::static_vector bindings; boost::container::static_vector attributes; // There seems to be a bug on Nvidia's driver where updating only higher attributes ends up // generating dirty state. Track the highest dirty attribute and update all attributes until // that one. size_t highest_dirty_attr{}; for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { if (dirty[Dirty::VertexAttribute0 + index]) { highest_dirty_attr = index; } } for (size_t index = 0; index < highest_dirty_attr; ++index) { const Maxwell::VertexAttribute attribute{regs.vertex_attrib_format[index]}; const u32 binding{attribute.buffer}; dirty[Dirty::VertexAttribute0 + index] = false; dirty[Dirty::VertexBinding0 + static_cast(binding)] = true; if (!attribute.constant) { attributes.push_back({ .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT, .pNext = nullptr, .location = static_cast(index), .binding = binding, .format = MaxwellToVK::VertexFormat(device, attribute.type, attribute.size), .offset = attribute.offset, }); } } for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { if (!dirty[Dirty::VertexBinding0 + index]) { continue; } dirty[Dirty::VertexBinding0 + index] = false; const u32 binding{static_cast(index)}; const auto& input_binding{regs.vertex_streams[binding]}; const bool is_instanced{regs.vertex_stream_instances.IsInstancingEnabled(binding)}; bindings.push_back({ .sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT, .pNext = nullptr, .binding = binding, .stride = input_binding.stride, .inputRate = is_instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX, .divisor = is_instanced ? input_binding.frequency : 1, }); } scheduler.Record([bindings, attributes](vk::CommandBuffer cmdbuf) { cmdbuf.SetVertexInputEXT(bindings, attributes); }); } void RasterizerVulkan::InitializeChannel(Tegra::Control::ChannelState& channel) { CreateChannel(channel); { std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; texture_cache.CreateChannel(channel); buffer_cache.CreateChannel(channel); } pipeline_cache.CreateChannel(channel); query_cache.CreateChannel(channel); state_tracker.SetupTables(channel); } void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) { const s32 channel_id = channel.bind_id; BindToChannel(channel_id); { std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; texture_cache.BindToChannel(channel_id); buffer_cache.BindToChannel(channel_id); } pipeline_cache.BindToChannel(channel_id); query_cache.BindToChannel(channel_id); state_tracker.ChangeChannel(channel); state_tracker.InvalidateState(); } void RasterizerVulkan::ReleaseChannel(s32 channel_id) { EraseChannel(channel_id); { std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; texture_cache.EraseChannel(channel_id); buffer_cache.EraseChannel(channel_id); } pipeline_cache.EraseChannel(channel_id); query_cache.EraseChannel(channel_id); } } // namespace Vulkan