diff options
-rw-r--r-- | src/core/arm/dynarmic/arm_dynarmic_32.cpp | 4 | ||||
-rw-r--r-- | src/core/arm/dynarmic/arm_dynarmic_64.cpp | 4 | ||||
-rw-r--r-- | src/shader_recompiler/backend/spirv/emit_spirv.h | 2 | ||||
-rw-r--r-- | src/shader_recompiler/frontend/maxwell/translate_program.cpp | 4 | ||||
-rw-r--r-- | src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | 3 | ||||
-rw-r--r-- | src/shader_recompiler/ir_opt/rescaling_pass.cpp | 29 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 42 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 35 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_compute_pass.cpp | 3 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.cpp | 3 | ||||
-rw-r--r-- | src/yuzu/configuration/config.cpp | 2 |
11 files changed, 117 insertions, 14 deletions
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index 286976623..c1c843b8f 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -148,8 +148,8 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable* config.wall_clock_cntpct = uses_wall_clock; // Code cache size - config.code_cache_size = 512_MiB; - config.far_code_offset = 400_MiB; + config.code_cache_size = 128_MiB; + config.far_code_offset = 100_MiB; // Safe optimizations if (Settings::values.cpu_debug_mode) { diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index d96226c41..aa74fce4d 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -208,8 +208,8 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable* config.wall_clock_cntpct = uses_wall_clock; // Code cache size - config.code_cache_size = 512_MiB; - config.far_code_offset = 400_MiB; + config.code_cache_size = 128_MiB; + config.far_code_offset = 100_MiB; // Safe optimizations if (Settings::values.cpu_debug_mode) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index b412957c7..2b360e073 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h @@ -22,7 +22,7 @@ constexpr u32 NUM_TEXTURE_AND_IMAGE_SCALING_WORDS = struct RescalingLayout { alignas(16) std::array<u32, NUM_TEXTURE_SCALING_WORDS> rescaling_textures; alignas(16) std::array<u32, NUM_IMAGE_SCALING_WORDS> rescaling_images; - alignas(16) u32 down_factor; + u32 down_factor; }; constexpr u32 RESCALING_LAYOUT_WORDS_OFFSET = offsetof(RescalingLayout, rescaling_textures); constexpr u32 RESCALING_LAYOUT_DOWN_FACTOR_OFFSET = offsetof(RescalingLayout, down_factor); diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 248ad3ced..b22725584 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp @@ -212,11 +212,11 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo } Optimization::SsaRewritePass(program); + Optimization::ConstantPropagationPass(program); + Optimization::GlobalMemoryToStorageBufferPass(program); Optimization::TexturePass(env, program); - Optimization::ConstantPropagationPass(program); - if (Settings::values.resolution_info.active) { Optimization::RescalingPass(program); } diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 38592afd0..ddf497e32 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -334,7 +334,8 @@ std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) { /// Tries to track the storage buffer address used by a global memory instruction std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) { const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> { - if (inst->GetOpcode() != IR::Opcode::GetCbufU32) { + if (inst->GetOpcode() != IR::Opcode::GetCbufU32 && + inst->GetOpcode() != IR::Opcode::GetCbufU32x2) { return std::nullopt; } const IR::Value index{inst->Arg(0)}; diff --git a/src/shader_recompiler/ir_opt/rescaling_pass.cpp b/src/shader_recompiler/ir_opt/rescaling_pass.cpp index c28500dd1..496d4667e 100644 --- a/src/shader_recompiler/ir_opt/rescaling_pass.cpp +++ b/src/shader_recompiler/ir_opt/rescaling_pass.cpp @@ -183,6 +183,31 @@ void ScaleIntegerComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_s } } +void ScaleIntegerOffsetComposite(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled, + size_t index) { + const IR::Value composite{inst.Arg(index)}; + if (composite.IsEmpty()) { + return; + } + const auto info{inst.Flags<IR::TextureInstInfo>()}; + const IR::U32 x{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 0)})}; + const IR::U32 y{Scale(ir, is_scaled, IR::U32{ir.CompositeExtract(composite, 1)})}; + switch (info.type) { + case TextureType::ColorArray2D: + case TextureType::Color2D: + inst.SetArg(index, ir.CompositeConstruct(x, y)); + break; + case TextureType::Color1D: + case TextureType::ColorArray1D: + case TextureType::Color3D: + case TextureType::ColorCube: + case TextureType::ColorArrayCube: + case TextureType::Buffer: + // Nothing to patch here + break; + } +} + void SubScaleCoord(IR::IREmitter& ir, IR::Inst& inst, const IR::U1& is_scaled) { const auto info{inst.Flags<IR::TextureInstInfo>()}; const IR::Value coord{inst.Arg(1)}; @@ -220,7 +245,7 @@ void SubScaleImageFetch(IR::Block& block, IR::Inst& inst) { const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; SubScaleCoord(ir, inst, is_scaled); // Scale ImageFetch offset - ScaleIntegerComposite(ir, inst, is_scaled, 2); + ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2); } void SubScaleImageRead(IR::Block& block, IR::Inst& inst) { @@ -242,7 +267,7 @@ void PatchImageFetch(IR::Block& block, IR::Inst& inst) { const IR::U1 is_scaled{ir.IsTextureScaled(ir.Imm32(info.descriptor_index))}; ScaleIntegerComposite(ir, inst, is_scaled, 1); // Scale ImageFetch offset - ScaleIntegerComposite(ir, inst, is_scaled, 2); + ScaleIntegerOffsetComposite(ir, inst, is_scaled, 2); } void PatchImageRead(IR::Block& block, IR::Inst& inst) { diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 5d6d217bb..8f2fd28c2 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -7,6 +7,7 @@ #include "common/assert.h" #include "core/core.h" #include "core/core_timing.h" +#include "video_core/dirty_flags.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" @@ -208,6 +209,14 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume return ProcessCBBind(4); case MAXWELL3D_REG_INDEX(draw.vertex_end_gl): return DrawArrays(); + case MAXWELL3D_REG_INDEX(small_index): + regs.index_array.count = regs.small_index.count; + regs.index_array.first = regs.small_index.first; + dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + return DrawArrays(); + case MAXWELL3D_REG_INDEX(topology_override): + use_topology_override = true; + return; case MAXWELL3D_REG_INDEX(clear_buffers): return ProcessClearBuffers(); case MAXWELL3D_REG_INDEX(query.query_get): @@ -360,6 +369,35 @@ void Maxwell3D::CallMethodFromMME(u32 method, u32 method_argument) { } } +void Maxwell3D::ProcessTopologyOverride() { + using PrimitiveTopology = Maxwell3D::Regs::PrimitiveTopology; + using PrimitiveTopologyOverride = Maxwell3D::Regs::PrimitiveTopologyOverride; + + PrimitiveTopology topology{}; + + switch (regs.topology_override) { + case PrimitiveTopologyOverride::None: + topology = regs.draw.topology; + break; + case PrimitiveTopologyOverride::Points: + topology = PrimitiveTopology::Points; + break; + case PrimitiveTopologyOverride::Lines: + topology = PrimitiveTopology::Lines; + break; + case PrimitiveTopologyOverride::LineStrip: + topology = PrimitiveTopology::LineStrip; + break; + default: + topology = static_cast<PrimitiveTopology>(regs.topology_override); + break; + } + + if (use_topology_override) { + regs.draw.topology.Assign(topology); + } +} + void Maxwell3D::FlushMMEInlineDraw() { LOG_TRACE(HW_GPU, "called, topology={}, count={}", regs.draw.topology.Value(), regs.vertex_buffer.count); @@ -370,6 +408,8 @@ void Maxwell3D::FlushMMEInlineDraw() { ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont, "Illegal combination of instancing parameters"); + ProcessTopologyOverride(); + const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed; if (ShouldExecute()) { rasterizer->Draw(is_indexed, true); @@ -529,6 +569,8 @@ void Maxwell3D::DrawArrays() { ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont, "Illegal combination of instancing parameters"); + ProcessTopologyOverride(); + if (regs.draw.instance_next) { // Increment the current instance *before* drawing. state.current_instance += 1; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index dc9df6c8b..6d34da046 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -367,6 +367,22 @@ public: Patches = 0xe, }; + // Constants as from NVC0_3D_UNK1970_D3D + // https://gitlab.freedesktop.org/mesa/mesa/-/blob/main/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h#L1598 + enum class PrimitiveTopologyOverride : u32 { + None = 0x0, + Points = 0x1, + Lines = 0x2, + LineStrip = 0x3, + Triangles = 0x4, + TriangleStrip = 0x5, + LinesAdjacency = 0xa, + LineStripAdjacency = 0xb, + TrianglesAdjacency = 0xc, + TriangleStripAdjacency = 0xd, + Patches = 0xe, + }; + enum class IndexFormat : u32 { UnsignedByte = 0x0, UnsignedShort = 0x1, @@ -1200,7 +1216,12 @@ public: } } index_array; - INSERT_PADDING_WORDS_NOINIT(0x7); + union { + BitField<0, 16, u32> first; + BitField<16, 16, u32> count; + } small_index; + + INSERT_PADDING_WORDS_NOINIT(0x6); INSERT_PADDING_WORDS_NOINIT(0x1F); @@ -1244,7 +1265,11 @@ public: BitField<11, 1, u32> depth_clamp_disabled; } view_volume_clip_control; - INSERT_PADDING_WORDS_NOINIT(0x1F); + INSERT_PADDING_WORDS_NOINIT(0xC); + + PrimitiveTopologyOverride topology_override; + + INSERT_PADDING_WORDS_NOINIT(0x12); u32 depth_bounds_enable; @@ -1531,6 +1556,9 @@ private: /// Handles a write to the VERTEX_END_GL register, triggering a draw. void DrawArrays(); + /// Handles use of topology overrides (e.g., to avoid using a topology assigned from a macro) + void ProcessTopologyOverride(); + // Handles a instance drawcall from MME void StepInstance(MMEDrawMode expected_mode, u32 count); @@ -1569,6 +1597,7 @@ private: Upload::State upload_state; bool execute_on{true}; + bool use_topology_override{false}; }; #define ASSERT_REG_POSITION(field_name, position) \ @@ -1685,6 +1714,7 @@ ASSERT_REG_POSITION(draw, 0x585); ASSERT_REG_POSITION(primitive_restart, 0x591); ASSERT_REG_POSITION(provoking_vertex_last, 0x5A1); ASSERT_REG_POSITION(index_array, 0x5F2); +ASSERT_REG_POSITION(small_index, 0x5F9); ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); ASSERT_REG_POSITION(instanced_arrays, 0x620); ASSERT_REG_POSITION(vp_point_size, 0x644); @@ -1694,6 +1724,7 @@ ASSERT_REG_POSITION(cull_face, 0x648); ASSERT_REG_POSITION(pixel_center_integer, 0x649); ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B); ASSERT_REG_POSITION(view_volume_clip_control, 0x64F); +ASSERT_REG_POSITION(topology_override, 0x65C); ASSERT_REG_POSITION(depth_bounds_enable, 0x66F); ASSERT_REG_POSITION(logic_op, 0x671); ASSERT_REG_POSITION(clear_buffers, 0x674); diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 3e96c0f60..4d73427b4 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <array> #include <cstring> #include <memory> #include <optional> @@ -292,7 +293,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, }; - const std::array push_constants{base_vertex, index_shift}; + const std::array<u32, 2> push_constants{base_vertex, index_shift}; const VkDescriptorSet set = descriptor_allocator.Commit(); device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 0f62779de..ca6019a3a 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1067,7 +1067,8 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im } break; case PixelFormat::A8B8G8R8_UNORM: - if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { + if (src_view.format == PixelFormat::S8_UINT_D24_UNORM || + src_view.format == PixelFormat::D24_UNORM_S8_UINT) { return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view); } break; diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index c2b66ff14..4b943c6ba 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -1155,6 +1155,8 @@ void Config::SaveCpuValues() { WriteBasicSetting(Settings::values.cpuopt_misc_ir); WriteBasicSetting(Settings::values.cpuopt_reduce_misalign_checks); WriteBasicSetting(Settings::values.cpuopt_fastmem); + WriteBasicSetting(Settings::values.cpuopt_fastmem_exclusives); + WriteBasicSetting(Settings::values.cpuopt_recompile_exclusives); } qt_config->endGroup(); |