diff options
Diffstat (limited to 'src')
22 files changed, 166 insertions, 164 deletions
diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp index f53a8d193..200c6489a 100644 --- a/src/common/telemetry.cpp +++ b/src/common/telemetry.cpp @@ -44,20 +44,6 @@ template class Field<std::string>; template class Field<const char*>; template class Field<std::chrono::microseconds>; -#ifdef ARCHITECTURE_x86_64 -static const char* CpuVendorToStr(Common::CPUVendor vendor) { - switch (vendor) { - case Common::CPUVendor::INTEL: - return "Intel"; - case Common::CPUVendor::AMD: - return "Amd"; - case Common::CPUVendor::OTHER: - return "Other"; - } - UNREACHABLE(); -} -#endif - void AppendBuildInfo(FieldCollection& fc) { const bool is_git_dirty{std::strstr(Common::g_scm_desc, "dirty") != nullptr}; fc.AddField(FieldType::App, "Git_IsDirty", is_git_dirty); @@ -71,7 +57,6 @@ void AppendCPUInfo(FieldCollection& fc) { #ifdef ARCHITECTURE_x86_64 fc.AddField(FieldType::UserSystem, "CPU_Model", Common::GetCPUCaps().cpu_string); fc.AddField(FieldType::UserSystem, "CPU_BrandString", Common::GetCPUCaps().brand_string); - fc.AddField(FieldType::UserSystem, "CPU_Vendor", CpuVendorToStr(Common::GetCPUCaps().vendor)); fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes); fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx); fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2); diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index 2dfcd39c8..c9349a6b4 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp @@ -3,8 +3,6 @@ // Refer to the license.txt file included. #include <cstring> -#include <string> -#include <thread> #include "common/common_types.h" #include "common/x64/cpu_detect.h" @@ -51,8 +49,6 @@ namespace Common { static CPUCaps Detect() { CPUCaps caps = {}; - caps.num_cores = std::thread::hardware_concurrency(); - // Assumes the CPU supports the CPUID instruction. Those that don't would likely not support // yuzu at all anyway @@ -70,12 +66,6 @@ static CPUCaps Detect() { __cpuid(cpu_id, 0x80000000); u32 max_ex_fn = cpu_id[0]; - if (!strcmp(caps.brand_string, "GenuineIntel")) - caps.vendor = CPUVendor::INTEL; - else if (!strcmp(caps.brand_string, "AuthenticAMD")) - caps.vendor = CPUVendor::AMD; - else - caps.vendor = CPUVendor::OTHER; // Set reasonable default brand string even if brand string not available strcpy(caps.cpu_string, caps.brand_string); @@ -96,15 +86,9 @@ static CPUCaps Detect() { caps.sse4_1 = true; if ((cpu_id[2] >> 20) & 1) caps.sse4_2 = true; - if ((cpu_id[2] >> 22) & 1) - caps.movbe = true; if ((cpu_id[2] >> 25) & 1) caps.aes = true; - if ((cpu_id[3] >> 24) & 1) { - caps.fxsave_fxrstor = true; - } - // AVX support requires 3 separate checks: // - Is the AVX bit set in CPUID? // - Is the XSAVE bit set in CPUID? @@ -129,8 +113,6 @@ static CPUCaps Detect() { } } - caps.flush_to_zero = caps.sse; - if (max_ex_fn >= 0x80000004) { // Extract CPU model string __cpuid(cpu_id, 0x80000002); @@ -144,14 +126,8 @@ static CPUCaps Detect() { if (max_ex_fn >= 0x80000001) { // Check for more features __cpuid(cpu_id, 0x80000001); - if (cpu_id[2] & 1) - caps.lahf_sahf_64 = true; - if ((cpu_id[2] >> 5) & 1) - caps.lzcnt = true; if ((cpu_id[2] >> 16) & 1) caps.fma4 = true; - if ((cpu_id[3] >> 29) & 1) - caps.long_mode = true; } return caps; @@ -162,48 +138,4 @@ const CPUCaps& GetCPUCaps() { return caps; } -std::string GetCPUCapsString() { - auto caps = GetCPUCaps(); - - std::string sum(caps.cpu_string); - sum += " ("; - sum += caps.brand_string; - sum += ")"; - - if (caps.sse) - sum += ", SSE"; - if (caps.sse2) { - sum += ", SSE2"; - if (!caps.flush_to_zero) - sum += " (without DAZ)"; - } - - if (caps.sse3) - sum += ", SSE3"; - if (caps.ssse3) - sum += ", SSSE3"; - if (caps.sse4_1) - sum += ", SSE4.1"; - if (caps.sse4_2) - sum += ", SSE4.2"; - if (caps.avx) - sum += ", AVX"; - if (caps.avx2) - sum += ", AVX2"; - if (caps.bmi1) - sum += ", BMI1"; - if (caps.bmi2) - sum += ", BMI2"; - if (caps.fma) - sum += ", FMA"; - if (caps.aes) - sum += ", AES"; - if (caps.movbe) - sum += ", MOVBE"; - if (caps.long_mode) - sum += ", 64-bit support"; - - return sum; -} - } // namespace Common diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h index 0af3a8adb..20f2ba234 100644 --- a/src/common/x64/cpu_detect.h +++ b/src/common/x64/cpu_detect.h @@ -4,23 +4,12 @@ #pragma once -#include <string> - namespace Common { -/// x86/x64 CPU vendors that may be detected by this module -enum class CPUVendor { - INTEL, - AMD, - OTHER, -}; - /// x86/x64 CPU capabilities that may be detected by this module struct CPUCaps { - CPUVendor vendor; char cpu_string[0x21]; char brand_string[0x41]; - int num_cores; bool sse; bool sse2; bool sse3; @@ -35,20 +24,6 @@ struct CPUCaps { bool fma; bool fma4; bool aes; - - // Support for the FXSAVE and FXRSTOR instructions - bool fxsave_fxrstor; - - bool movbe; - - // This flag indicates that the hardware supports some mode in which denormal inputs and outputs - // are automatically set to (signed) zero. - bool flush_to_zero; - - // Support for LAHF and SAHF instructions in 64-bit mode - bool lahf_sahf_64; - - bool long_mode; }; /** @@ -57,10 +32,4 @@ struct CPUCaps { */ const CPUCaps& GetCPUCaps(); -/** - * Gets a string summary of the name and supported capabilities of the host CPU - * @return String summary - */ -std::string GetCPUCapsString(); - } // namespace Common diff --git a/src/core/hle/kernel/physical_memory.h b/src/core/hle/kernel/physical_memory.h index 090565310..b689e8e8b 100644 --- a/src/core/hle/kernel/physical_memory.h +++ b/src/core/hle/kernel/physical_memory.h @@ -14,6 +14,9 @@ namespace Kernel { // - Second to ensure all host backing memory used is aligned to 256 bytes due // to strict alignment restrictions on GPU memory. -using PhysicalMemory = std::vector<u8, Common::AlignmentAllocator<u8, 256>>; +using PhysicalMemoryVector = std::vector<u8, Common::AlignmentAllocator<u8, 256>>; +class PhysicalMemory final : public PhysicalMemoryVector { + using PhysicalMemoryVector::PhysicalMemoryVector; +}; } // namespace Kernel diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index a9a20ef76..0b3500fce 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <algorithm> +#include <cstring> #include <iterator> #include <utility> #include "common/alignment.h" @@ -269,18 +270,9 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) { // If necessary, expand backing vector to cover new heap extents in // the case of allocating. Otherwise, shrink the backing memory, // if a smaller heap has been requested. - const u64 old_heap_size = GetCurrentHeapSize(); - if (size > old_heap_size) { - const u64 alloc_size = size - old_heap_size; - - heap_memory->insert(heap_memory->end(), alloc_size, 0); - RefreshMemoryBlockMappings(heap_memory.get()); - } else if (size < old_heap_size) { - heap_memory->resize(size); - heap_memory->shrink_to_fit(); - - RefreshMemoryBlockMappings(heap_memory.get()); - } + heap_memory->resize(size); + heap_memory->shrink_to_fit(); + RefreshMemoryBlockMappings(heap_memory.get()); heap_end = heap_region_base + size; ASSERT(GetCurrentHeapSize() == heap_memory->size()); @@ -752,24 +744,20 @@ void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryAre // Always merge allocated memory blocks, even when they don't share the same backing block. if (left.type == VMAType::AllocatedMemoryBlock && (left.backing_block != right.backing_block || left.offset + left.size != right.offset)) { - const auto right_begin = right.backing_block->begin() + right.offset; - const auto right_end = right_begin + right.size; // Check if we can save work. if (left.offset == 0 && left.size == left.backing_block->size()) { // Fast case: left is an entire backing block. - left.backing_block->insert(left.backing_block->end(), right_begin, right_end); + left.backing_block->resize(left.size + right.size); + std::memcpy(left.backing_block->data() + left.size, + right.backing_block->data() + right.offset, right.size); } else { // Slow case: make a new memory block for left and right. - const auto left_begin = left.backing_block->begin() + left.offset; - const auto left_end = left_begin + left.size; - const auto left_size = static_cast<std::size_t>(std::distance(left_begin, left_end)); - const auto right_size = static_cast<std::size_t>(std::distance(right_begin, right_end)); - auto new_memory = std::make_shared<PhysicalMemory>(); - new_memory->reserve(left_size + right_size); - new_memory->insert(new_memory->end(), left_begin, left_end); - new_memory->insert(new_memory->end(), right_begin, right_end); + new_memory->resize(left.size + right.size); + std::memcpy(new_memory->data(), left.backing_block->data() + left.offset, left.size); + std::memcpy(new_memory->data() + left.size, right.backing_block->data() + right.offset, + right.size); left.backing_block = std::move(new_memory); left.offset = 0; @@ -792,8 +780,7 @@ void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) { memory.UnmapRegion(page_table, vma.base, vma.size); break; case VMAType::AllocatedMemoryBlock: - memory.MapMemoryRegion(page_table, vma.base, vma.size, - vma.backing_block->data() + vma.offset); + memory.MapMemoryRegion(page_table, vma.base, vma.size, *vma.backing_block, vma.offset); break; case VMAType::BackingMemory: memory.MapMemoryRegion(page_table, vma.base, vma.size, vma.backing_memory); diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp index f1795fdd6..8908e5328 100644 --- a/src/core/loader/elf.cpp +++ b/src/core/loader/elf.cpp @@ -335,7 +335,8 @@ Kernel::CodeSet ElfReader::LoadInto(VAddr vaddr) { codeset_segment->addr = segment_addr; codeset_segment->size = aligned_size; - memcpy(&program_image[current_image_position], GetSegmentPtr(i), p->p_filesz); + std::memcpy(program_image.data() + current_image_position, GetSegmentPtr(i), + p->p_filesz); current_image_position += aligned_size; } } diff --git a/src/core/loader/kip.cpp b/src/core/loader/kip.cpp index 474b55cb1..092103abe 100644 --- a/src/core/loader/kip.cpp +++ b/src/core/loader/kip.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <cstring> #include "core/file_sys/kernel_executable.h" #include "core/file_sys/program_metadata.h" #include "core/gdbstub/gdbstub.h" @@ -76,8 +77,8 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::Process& process) { segment.addr = offset; segment.offset = offset; segment.size = PageAlignSize(static_cast<u32>(data.size())); - program_image.resize(offset); - program_image.insert(program_image.end(), data.begin(), data.end()); + program_image.resize(offset + data.size()); + std::memcpy(program_image.data() + offset, data.data(), data.size()); }; load_segment(codeset.CodeSegment(), kip->GetTextSection(), kip->GetTextOffset()); diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp index f629892ae..515c5accb 100644 --- a/src/core/loader/nso.cpp +++ b/src/core/loader/nso.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <cinttypes> +#include <cstring> #include <vector> #include "common/common_funcs.h" @@ -96,8 +97,9 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process, if (nso_header.IsSegmentCompressed(i)) { data = DecompressSegment(data, nso_header.segments[i]); } - program_image.resize(nso_header.segments[i].location); - program_image.insert(program_image.end(), data.begin(), data.end()); + program_image.resize(nso_header.segments[i].location + data.size()); + std::memcpy(program_image.data() + nso_header.segments[i].location, data.data(), + data.size()); codeset.segments[i].addr = nso_header.segments[i].location; codeset.segments[i].offset = nso_header.segments[i].location; codeset.segments[i].size = PageAlignSize(static_cast<u32>(data.size())); @@ -139,12 +141,12 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process, std::vector<u8> pi_header; pi_header.insert(pi_header.begin(), reinterpret_cast<u8*>(&nso_header), reinterpret_cast<u8*>(&nso_header) + sizeof(NSOHeader)); - pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.begin(), - program_image.end()); + pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.data(), + program_image.data() + program_image.size()); pi_header = pm->PatchNSO(pi_header, file.GetName()); - std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.begin()); + std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.data()); } // Apply cheats if they exist and the program has a valid title ID diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 3c2a29d9b..f0888327f 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -14,6 +14,7 @@ #include "common/swap.h" #include "core/arm/arm_interface.h" #include "core/core.h" +#include "core/hle/kernel/physical_memory.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/vm_manager.h" #include "core/memory.h" @@ -38,6 +39,11 @@ struct Memory::Impl { system.ArmInterface(3).PageTableChanged(*current_page_table, address_space_width); } + void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, + Kernel::PhysicalMemory& memory, VAddr offset) { + MapMemoryRegion(page_table, base, size, memory.data() + offset); + } + void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) { ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); @@ -601,6 +607,11 @@ void Memory::SetCurrentPageTable(Kernel::Process& process) { impl->SetCurrentPageTable(process); } +void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, + Kernel::PhysicalMemory& memory, VAddr offset) { + impl->MapMemoryRegion(page_table, base, size, memory, offset); +} + void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) { impl->MapMemoryRegion(page_table, base, size, target); } diff --git a/src/core/memory.h b/src/core/memory.h index 1428a6d60..8913a9da4 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -19,8 +19,9 @@ class System; } namespace Kernel { +class PhysicalMemory; class Process; -} +} // namespace Kernel namespace Memory { @@ -66,6 +67,19 @@ public: void SetCurrentPageTable(Kernel::Process& process); /** + * Maps an physical buffer onto a region of the emulated process address space. + * + * @param page_table The page table of the emulated process. + * @param base The address to start mapping at. Must be page-aligned. + * @param size The amount of bytes to map. Must be page-aligned. + * @param memory Physical buffer with the memory backing the mapping. Must be of length + * at least `size + offset`. + * @param offset The offset within the physical memory. Must be page-aligned. + */ + void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, + Kernel::PhysicalMemory& memory, VAddr offset); + + /** * Maps an allocated buffer onto a region of the emulated process address space. * * @param page_table The page table of the emulated process. diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 16f95b77d..ee79260fc 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1018,7 +1018,14 @@ public: } } instanced_arrays; - INSERT_UNION_PADDING_WORDS(0x6); + INSERT_UNION_PADDING_WORDS(0x4); + + union { + BitField<0, 1, u32> enable; + BitField<4, 8, u32> unk4; + } vp_point_size; + + INSERT_UNION_PADDING_WORDS(1); Cull cull; @@ -1503,6 +1510,7 @@ ASSERT_REG_POSITION(primitive_restart, 0x591); ASSERT_REG_POSITION(index_array, 0x5F2); ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F); ASSERT_REG_POSITION(instanced_arrays, 0x620); +ASSERT_REG_POSITION(vp_point_size, 0x644); ASSERT_REG_POSITION(cull, 0x646); ASSERT_REG_POSITION(pixel_center_integer, 0x649); ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 57b57c647..6f98bd827 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -215,6 +215,18 @@ enum class F2fRoundingOp : u64 { Trunc = 11, }; +enum class AtomicOp : u64 { + Add = 0, + Min = 1, + Max = 2, + Inc = 3, + Dec = 4, + And = 5, + Or = 6, + Xor = 7, + Exch = 8, +}; + enum class UniformType : u64 { UnsignedByte = 0, SignedByte = 1, @@ -236,6 +248,13 @@ enum class StoreType : u64 { Bits128 = 6, }; +enum class AtomicType : u64 { + U32 = 0, + S32 = 1, + U64 = 2, + S64 = 3, +}; + enum class IMinMaxExchange : u64 { None = 0, XLo = 1, @@ -939,6 +958,16 @@ union Instruction { } stg; union { + BitField<52, 4, AtomicOp> operation; + BitField<28, 2, AtomicType> type; + BitField<30, 22, s64> offset; + + s32 GetImmediateOffset() const { + return static_cast<s32>(offset << 2); + } + } atoms; + + union { BitField<32, 1, PhysicalAttributeDirection> direction; BitField<47, 3, AttributeSize> size; BitField<20, 11, u64> address; @@ -1659,9 +1688,10 @@ public: ST_A, ST_L, ST_S, - ST, // Store in generic memory - STG, // Store in global memory - AL2P, // Transforms attribute memory into physical memory + ST, // Store in generic memory + STG, // Store in global memory + ATOMS, // Atomic operation on shared memory + AL2P, // Transforms attribute memory into physical memory TEX, TEX_B, // Texture Load Bindless TXQ, // Texture Query @@ -1964,6 +1994,7 @@ private: INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), INST("101-------------", Id::ST, Type::Memory, "ST"), INST("1110111011011---", Id::STG, Type::Memory, "STG"), + INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), INST("110000----111---", Id::TEX, Type::Texture, "TEX"), INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"), diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 672051102..c428f06e4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -1272,6 +1272,7 @@ void RasterizerOpenGL::SyncPointState() { const auto& regs = system.GPU().Maxwell3D().regs; // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid // in OpenGL). + state.point.program_control = regs.vp_point_size.enable != 0; state.point.size = std::max(1.0f, regs.point_size); } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index e9ceca768..2996aaf08 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1856,6 +1856,16 @@ private: Type::Uint}; } + template <const std::string_view& opname, Type type> + Expression Atomic(Operation operation) { + ASSERT(stage == ShaderType::Compute); + auto& smem = std::get<SmemNode>(*operation[0]); + + return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(), + Visit(operation[1]).As(type)), + type}; + } + Expression Branch(Operation operation) { const auto target = std::get_if<ImmediateNode>(&*operation[0]); UNIMPLEMENTED_IF(!target); @@ -2194,6 +2204,8 @@ private: &GLSLDecompiler::AtomicImage<Func::Xor>, &GLSLDecompiler::AtomicImage<Func::Exchange>, + &GLSLDecompiler::Atomic<Func::Add, Type::Uint>, + &GLSLDecompiler::Branch, &GLSLDecompiler::BranchIndirect, &GLSLDecompiler::PushFlowStack, diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index df2e2395a..cc185e9e1 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -127,6 +127,7 @@ void OpenGLState::ApplyClipDistances() { } void OpenGLState::ApplyPointSize() { + Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control); if (UpdateValue(cur_state.point.size, point.size)) { glPointSize(point.size); } diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index fb180f302..678e5cd89 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -131,7 +131,8 @@ public: std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports; struct { - float size = 1.0f; // GL_POINT_SIZE + bool program_control = false; // GL_PROGRAM_POINT_SIZE + GLfloat size = 1.0f; // GL_POINT_SIZE } point; struct { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index b790b0ef4..e95eb069e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -44,7 +44,7 @@ struct FormatTuple { constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8U - {GL_RGBA8, GL_RGBA, GL_BYTE, false}, // ABGR8S + {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE, false}, // ABGR8S {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false}, // ABGR8UI {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5U {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10U @@ -83,9 +83,9 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format {GL_RGB32F, GL_RGB, GL_FLOAT, false}, // RGB32F {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // RGBA8_SRGB {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false}, // RG8U - {GL_RG8, GL_RG, GL_BYTE, false}, // RG8S + {GL_RG8_SNORM, GL_RG, GL_BYTE, false}, // RG8S {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI - {GL_RGB16F, GL_RGBA16, GL_HALF_FLOAT, false}, // RGBX16F + {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5 @@ -253,14 +253,12 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); + u8* const mip_data = staging_buffer.data() + mip_offset; + const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level)); if (is_compressed) { - glGetCompressedTextureImage(texture.handle, level, - static_cast<GLsizei>(params.GetHostMipmapSize(level)), - staging_buffer.data() + mip_offset); + glGetCompressedTextureImage(texture.handle, level, size, mip_data); } else { - glGetTextureImage(texture.handle, level, format, type, - static_cast<GLsizei>(params.GetHostMipmapSize(level)), - staging_buffer.data() + mip_offset); + glGetTextureImage(texture.handle, level, format, type, size, mip_data); } } } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 8fe852ce8..0cf97cafa 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -1796,6 +1796,11 @@ private: return {}; } + Expression UAtomicAdd(Operation) { + UNIMPLEMENTED(); + return {}; + } + Expression Branch(Operation operation) { const auto& target = std::get<ImmediateNode>(*operation[0]); OpStore(jmp_to, Constant(t_uint, target.GetValue())); @@ -2373,6 +2378,8 @@ private: &SPIRVDecompiler::AtomicImageXor, &SPIRVDecompiler::AtomicImageExchange, + &SPIRVDecompiler::UAtomicAdd, + &SPIRVDecompiler::Branch, &SPIRVDecompiler::BranchIndirect, &SPIRVDecompiler::PushFlowStack, diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 8cc84e935..7591a715f 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -16,6 +16,8 @@ namespace VideoCommon::Shader { +using Tegra::Shader::AtomicOp; +using Tegra::Shader::AtomicType; using Tegra::Shader::Attribute; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; @@ -333,6 +335,23 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::ATOMS: { + UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}", + static_cast<int>(instr.atoms.operation.Value())); + UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}", + static_cast<int>(instr.atoms.type.Value())); + + const s32 offset = instr.atoms.GetImmediateOffset(); + Node address = GetRegister(instr.gpr8); + address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset)); + + Node memory = GetSharedMemory(std::move(address)); + Node data = GetRegister(instr.gpr20); + + Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data)); + SetRegister(bb, instr.gpr0, std::move(value)); + break; + } case OpCode::Id::AL2P: { // Ignore al2p.direction since we don't care about it. diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 4e155542a..075c7d07c 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -162,6 +162,8 @@ enum class OperationCode { AtomicImageXor, /// (MetaImage, int[N] coords) -> void AtomicImageExchange, /// (MetaImage, int[N] coords) -> void + UAtomicAdd, /// (smem, uint) -> uint + Branch, /// (uint branch_target) -> void BranchIndirect, /// (uint branch_target) -> void PushFlowStack, /// (uint branch_target) -> void diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index b21fbf826..b5dd3e0d6 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -526,19 +526,30 @@ void GMainWindow::InitializeHotkeys() { const QString main_window = QStringLiteral("Main Window"); const QString load_file = QStringLiteral("Load File"); + const QString load_amiibo = QStringLiteral("Load Amiibo"); const QString exit_yuzu = QStringLiteral("Exit yuzu"); + const QString restart_emulation = QStringLiteral("Restart Emulation"); const QString stop_emulation = QStringLiteral("Stop Emulation"); const QString toggle_filter_bar = QStringLiteral("Toggle Filter Bar"); const QString toggle_status_bar = QStringLiteral("Toggle Status Bar"); const QString fullscreen = QStringLiteral("Fullscreen"); + const QString capture_screenshot = QStringLiteral("Capture Screenshot"); ui.action_Load_File->setShortcut(hotkey_registry.GetKeySequence(main_window, load_file)); ui.action_Load_File->setShortcutContext( hotkey_registry.GetShortcutContext(main_window, load_file)); + ui.action_Load_Amiibo->setShortcut(hotkey_registry.GetKeySequence(main_window, load_amiibo)); + ui.action_Load_Amiibo->setShortcutContext( + hotkey_registry.GetShortcutContext(main_window, load_amiibo)); + ui.action_Exit->setShortcut(hotkey_registry.GetKeySequence(main_window, exit_yuzu)); ui.action_Exit->setShortcutContext(hotkey_registry.GetShortcutContext(main_window, exit_yuzu)); + ui.action_Restart->setShortcut(hotkey_registry.GetKeySequence(main_window, restart_emulation)); + ui.action_Restart->setShortcutContext( + hotkey_registry.GetShortcutContext(main_window, restart_emulation)); + ui.action_Stop->setShortcut(hotkey_registry.GetKeySequence(main_window, stop_emulation)); ui.action_Stop->setShortcutContext( hotkey_registry.GetShortcutContext(main_window, stop_emulation)); @@ -553,6 +564,11 @@ void GMainWindow::InitializeHotkeys() { ui.action_Show_Status_Bar->setShortcutContext( hotkey_registry.GetShortcutContext(main_window, toggle_status_bar)); + ui.action_Capture_Screenshot->setShortcut( + hotkey_registry.GetKeySequence(main_window, capture_screenshot)); + ui.action_Capture_Screenshot->setShortcutContext( + hotkey_registry.GetShortcutContext(main_window, capture_screenshot)); + connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Load File"), this), &QShortcut::activated, this, &GMainWindow::OnMenuLoadFile); connect( diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui index 21f422500..a2c9e4547 100644 --- a/src/yuzu/main.ui +++ b/src/yuzu/main.ui @@ -15,7 +15,7 @@ </property> <property name="windowIcon"> <iconset> - <normaloff>src/pcafe/res/icon3_64x64.ico</normaloff>src/pcafe/res/icon3_64x64.ico</iconset> + <normaloff>../dist/yuzu.ico</normaloff>../dist/yuzu.ico</iconset> </property> <property name="tabShape"> <enum>QTabWidget::Rounded</enum> @@ -98,6 +98,7 @@ <addaction name="action_Display_Dock_Widget_Headers"/> <addaction name="action_Show_Filter_Bar"/> <addaction name="action_Show_Status_Bar"/> + <addaction name="separator"/> <addaction name="menu_View_Debugging"/> </widget> <widget class="QMenu" name="menu_Tools"> |