summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/telemetry.cpp15
-rw-r--r--src/common/x64/cpu_detect.cpp68
-rw-r--r--src/common/x64/cpu_detect.h31
-rw-r--r--src/core/hle/kernel/physical_memory.h5
-rw-r--r--src/core/hle/kernel/vm_manager.cpp37
-rw-r--r--src/core/loader/elf.cpp3
-rw-r--r--src/core/loader/kip.cpp5
-rw-r--r--src/core/loader/nso.cpp12
-rw-r--r--src/core/memory.cpp11
-rw-r--r--src/core/memory.h16
-rw-r--r--src/video_core/engines/maxwell_3d.h10
-rw-r--r--src/video_core/engines/shader_bytecode.h37
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_state.h3
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp16
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp7
-rw-r--r--src/video_core/shader/decode/memory.cpp19
-rw-r--r--src/video_core/shader/node.h2
-rw-r--r--src/yuzu/main.cpp16
-rw-r--r--src/yuzu/main.ui3
22 files changed, 166 insertions, 164 deletions
diff --git a/src/common/telemetry.cpp b/src/common/telemetry.cpp
index f53a8d193..200c6489a 100644
--- a/src/common/telemetry.cpp
+++ b/src/common/telemetry.cpp
@@ -44,20 +44,6 @@ template class Field<std::string>;
template class Field<const char*>;
template class Field<std::chrono::microseconds>;
-#ifdef ARCHITECTURE_x86_64
-static const char* CpuVendorToStr(Common::CPUVendor vendor) {
- switch (vendor) {
- case Common::CPUVendor::INTEL:
- return "Intel";
- case Common::CPUVendor::AMD:
- return "Amd";
- case Common::CPUVendor::OTHER:
- return "Other";
- }
- UNREACHABLE();
-}
-#endif
-
void AppendBuildInfo(FieldCollection& fc) {
const bool is_git_dirty{std::strstr(Common::g_scm_desc, "dirty") != nullptr};
fc.AddField(FieldType::App, "Git_IsDirty", is_git_dirty);
@@ -71,7 +57,6 @@ void AppendCPUInfo(FieldCollection& fc) {
#ifdef ARCHITECTURE_x86_64
fc.AddField(FieldType::UserSystem, "CPU_Model", Common::GetCPUCaps().cpu_string);
fc.AddField(FieldType::UserSystem, "CPU_BrandString", Common::GetCPUCaps().brand_string);
- fc.AddField(FieldType::UserSystem, "CPU_Vendor", CpuVendorToStr(Common::GetCPUCaps().vendor));
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AES", Common::GetCPUCaps().aes);
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX", Common::GetCPUCaps().avx);
fc.AddField(FieldType::UserSystem, "CPU_Extension_x64_AVX2", Common::GetCPUCaps().avx2);
diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp
index 2dfcd39c8..c9349a6b4 100644
--- a/src/common/x64/cpu_detect.cpp
+++ b/src/common/x64/cpu_detect.cpp
@@ -3,8 +3,6 @@
// Refer to the license.txt file included.
#include <cstring>
-#include <string>
-#include <thread>
#include "common/common_types.h"
#include "common/x64/cpu_detect.h"
@@ -51,8 +49,6 @@ namespace Common {
static CPUCaps Detect() {
CPUCaps caps = {};
- caps.num_cores = std::thread::hardware_concurrency();
-
// Assumes the CPU supports the CPUID instruction. Those that don't would likely not support
// yuzu at all anyway
@@ -70,12 +66,6 @@ static CPUCaps Detect() {
__cpuid(cpu_id, 0x80000000);
u32 max_ex_fn = cpu_id[0];
- if (!strcmp(caps.brand_string, "GenuineIntel"))
- caps.vendor = CPUVendor::INTEL;
- else if (!strcmp(caps.brand_string, "AuthenticAMD"))
- caps.vendor = CPUVendor::AMD;
- else
- caps.vendor = CPUVendor::OTHER;
// Set reasonable default brand string even if brand string not available
strcpy(caps.cpu_string, caps.brand_string);
@@ -96,15 +86,9 @@ static CPUCaps Detect() {
caps.sse4_1 = true;
if ((cpu_id[2] >> 20) & 1)
caps.sse4_2 = true;
- if ((cpu_id[2] >> 22) & 1)
- caps.movbe = true;
if ((cpu_id[2] >> 25) & 1)
caps.aes = true;
- if ((cpu_id[3] >> 24) & 1) {
- caps.fxsave_fxrstor = true;
- }
-
// AVX support requires 3 separate checks:
// - Is the AVX bit set in CPUID?
// - Is the XSAVE bit set in CPUID?
@@ -129,8 +113,6 @@ static CPUCaps Detect() {
}
}
- caps.flush_to_zero = caps.sse;
-
if (max_ex_fn >= 0x80000004) {
// Extract CPU model string
__cpuid(cpu_id, 0x80000002);
@@ -144,14 +126,8 @@ static CPUCaps Detect() {
if (max_ex_fn >= 0x80000001) {
// Check for more features
__cpuid(cpu_id, 0x80000001);
- if (cpu_id[2] & 1)
- caps.lahf_sahf_64 = true;
- if ((cpu_id[2] >> 5) & 1)
- caps.lzcnt = true;
if ((cpu_id[2] >> 16) & 1)
caps.fma4 = true;
- if ((cpu_id[3] >> 29) & 1)
- caps.long_mode = true;
}
return caps;
@@ -162,48 +138,4 @@ const CPUCaps& GetCPUCaps() {
return caps;
}
-std::string GetCPUCapsString() {
- auto caps = GetCPUCaps();
-
- std::string sum(caps.cpu_string);
- sum += " (";
- sum += caps.brand_string;
- sum += ")";
-
- if (caps.sse)
- sum += ", SSE";
- if (caps.sse2) {
- sum += ", SSE2";
- if (!caps.flush_to_zero)
- sum += " (without DAZ)";
- }
-
- if (caps.sse3)
- sum += ", SSE3";
- if (caps.ssse3)
- sum += ", SSSE3";
- if (caps.sse4_1)
- sum += ", SSE4.1";
- if (caps.sse4_2)
- sum += ", SSE4.2";
- if (caps.avx)
- sum += ", AVX";
- if (caps.avx2)
- sum += ", AVX2";
- if (caps.bmi1)
- sum += ", BMI1";
- if (caps.bmi2)
- sum += ", BMI2";
- if (caps.fma)
- sum += ", FMA";
- if (caps.aes)
- sum += ", AES";
- if (caps.movbe)
- sum += ", MOVBE";
- if (caps.long_mode)
- sum += ", 64-bit support";
-
- return sum;
-}
-
} // namespace Common
diff --git a/src/common/x64/cpu_detect.h b/src/common/x64/cpu_detect.h
index 0af3a8adb..20f2ba234 100644
--- a/src/common/x64/cpu_detect.h
+++ b/src/common/x64/cpu_detect.h
@@ -4,23 +4,12 @@
#pragma once
-#include <string>
-
namespace Common {
-/// x86/x64 CPU vendors that may be detected by this module
-enum class CPUVendor {
- INTEL,
- AMD,
- OTHER,
-};
-
/// x86/x64 CPU capabilities that may be detected by this module
struct CPUCaps {
- CPUVendor vendor;
char cpu_string[0x21];
char brand_string[0x41];
- int num_cores;
bool sse;
bool sse2;
bool sse3;
@@ -35,20 +24,6 @@ struct CPUCaps {
bool fma;
bool fma4;
bool aes;
-
- // Support for the FXSAVE and FXRSTOR instructions
- bool fxsave_fxrstor;
-
- bool movbe;
-
- // This flag indicates that the hardware supports some mode in which denormal inputs and outputs
- // are automatically set to (signed) zero.
- bool flush_to_zero;
-
- // Support for LAHF and SAHF instructions in 64-bit mode
- bool lahf_sahf_64;
-
- bool long_mode;
};
/**
@@ -57,10 +32,4 @@ struct CPUCaps {
*/
const CPUCaps& GetCPUCaps();
-/**
- * Gets a string summary of the name and supported capabilities of the host CPU
- * @return String summary
- */
-std::string GetCPUCapsString();
-
} // namespace Common
diff --git a/src/core/hle/kernel/physical_memory.h b/src/core/hle/kernel/physical_memory.h
index 090565310..b689e8e8b 100644
--- a/src/core/hle/kernel/physical_memory.h
+++ b/src/core/hle/kernel/physical_memory.h
@@ -14,6 +14,9 @@ namespace Kernel {
// - Second to ensure all host backing memory used is aligned to 256 bytes due
// to strict alignment restrictions on GPU memory.
-using PhysicalMemory = std::vector<u8, Common::AlignmentAllocator<u8, 256>>;
+using PhysicalMemoryVector = std::vector<u8, Common::AlignmentAllocator<u8, 256>>;
+class PhysicalMemory final : public PhysicalMemoryVector {
+ using PhysicalMemoryVector::PhysicalMemoryVector;
+};
} // namespace Kernel
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index a9a20ef76..0b3500fce 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <algorithm>
+#include <cstring>
#include <iterator>
#include <utility>
#include "common/alignment.h"
@@ -269,18 +270,9 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {
// If necessary, expand backing vector to cover new heap extents in
// the case of allocating. Otherwise, shrink the backing memory,
// if a smaller heap has been requested.
- const u64 old_heap_size = GetCurrentHeapSize();
- if (size > old_heap_size) {
- const u64 alloc_size = size - old_heap_size;
-
- heap_memory->insert(heap_memory->end(), alloc_size, 0);
- RefreshMemoryBlockMappings(heap_memory.get());
- } else if (size < old_heap_size) {
- heap_memory->resize(size);
- heap_memory->shrink_to_fit();
-
- RefreshMemoryBlockMappings(heap_memory.get());
- }
+ heap_memory->resize(size);
+ heap_memory->shrink_to_fit();
+ RefreshMemoryBlockMappings(heap_memory.get());
heap_end = heap_region_base + size;
ASSERT(GetCurrentHeapSize() == heap_memory->size());
@@ -752,24 +744,20 @@ void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryAre
// Always merge allocated memory blocks, even when they don't share the same backing block.
if (left.type == VMAType::AllocatedMemoryBlock &&
(left.backing_block != right.backing_block || left.offset + left.size != right.offset)) {
- const auto right_begin = right.backing_block->begin() + right.offset;
- const auto right_end = right_begin + right.size;
// Check if we can save work.
if (left.offset == 0 && left.size == left.backing_block->size()) {
// Fast case: left is an entire backing block.
- left.backing_block->insert(left.backing_block->end(), right_begin, right_end);
+ left.backing_block->resize(left.size + right.size);
+ std::memcpy(left.backing_block->data() + left.size,
+ right.backing_block->data() + right.offset, right.size);
} else {
// Slow case: make a new memory block for left and right.
- const auto left_begin = left.backing_block->begin() + left.offset;
- const auto left_end = left_begin + left.size;
- const auto left_size = static_cast<std::size_t>(std::distance(left_begin, left_end));
- const auto right_size = static_cast<std::size_t>(std::distance(right_begin, right_end));
-
auto new_memory = std::make_shared<PhysicalMemory>();
- new_memory->reserve(left_size + right_size);
- new_memory->insert(new_memory->end(), left_begin, left_end);
- new_memory->insert(new_memory->end(), right_begin, right_end);
+ new_memory->resize(left.size + right.size);
+ std::memcpy(new_memory->data(), left.backing_block->data() + left.offset, left.size);
+ std::memcpy(new_memory->data() + left.size, right.backing_block->data() + right.offset,
+ right.size);
left.backing_block = std::move(new_memory);
left.offset = 0;
@@ -792,8 +780,7 @@ void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
memory.UnmapRegion(page_table, vma.base, vma.size);
break;
case VMAType::AllocatedMemoryBlock:
- memory.MapMemoryRegion(page_table, vma.base, vma.size,
- vma.backing_block->data() + vma.offset);
+ memory.MapMemoryRegion(page_table, vma.base, vma.size, *vma.backing_block, vma.offset);
break;
case VMAType::BackingMemory:
memory.MapMemoryRegion(page_table, vma.base, vma.size, vma.backing_memory);
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp
index f1795fdd6..8908e5328 100644
--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -335,7 +335,8 @@ Kernel::CodeSet ElfReader::LoadInto(VAddr vaddr) {
codeset_segment->addr = segment_addr;
codeset_segment->size = aligned_size;
- memcpy(&program_image[current_image_position], GetSegmentPtr(i), p->p_filesz);
+ std::memcpy(program_image.data() + current_image_position, GetSegmentPtr(i),
+ p->p_filesz);
current_image_position += aligned_size;
}
}
diff --git a/src/core/loader/kip.cpp b/src/core/loader/kip.cpp
index 474b55cb1..092103abe 100644
--- a/src/core/loader/kip.cpp
+++ b/src/core/loader/kip.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <cstring>
#include "core/file_sys/kernel_executable.h"
#include "core/file_sys/program_metadata.h"
#include "core/gdbstub/gdbstub.h"
@@ -76,8 +77,8 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::Process& process) {
segment.addr = offset;
segment.offset = offset;
segment.size = PageAlignSize(static_cast<u32>(data.size()));
- program_image.resize(offset);
- program_image.insert(program_image.end(), data.begin(), data.end());
+ program_image.resize(offset + data.size());
+ std::memcpy(program_image.data() + offset, data.data(), data.size());
};
load_segment(codeset.CodeSegment(), kip->GetTextSection(), kip->GetTextOffset());
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index f629892ae..515c5accb 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <cinttypes>
+#include <cstring>
#include <vector>
#include "common/common_funcs.h"
@@ -96,8 +97,9 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
if (nso_header.IsSegmentCompressed(i)) {
data = DecompressSegment(data, nso_header.segments[i]);
}
- program_image.resize(nso_header.segments[i].location);
- program_image.insert(program_image.end(), data.begin(), data.end());
+ program_image.resize(nso_header.segments[i].location + data.size());
+ std::memcpy(program_image.data() + nso_header.segments[i].location, data.data(),
+ data.size());
codeset.segments[i].addr = nso_header.segments[i].location;
codeset.segments[i].offset = nso_header.segments[i].location;
codeset.segments[i].size = PageAlignSize(static_cast<u32>(data.size()));
@@ -139,12 +141,12 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
std::vector<u8> pi_header;
pi_header.insert(pi_header.begin(), reinterpret_cast<u8*>(&nso_header),
reinterpret_cast<u8*>(&nso_header) + sizeof(NSOHeader));
- pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.begin(),
- program_image.end());
+ pi_header.insert(pi_header.begin() + sizeof(NSOHeader), program_image.data(),
+ program_image.data() + program_image.size());
pi_header = pm->PatchNSO(pi_header, file.GetName());
- std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.begin());
+ std::copy(pi_header.begin() + sizeof(NSOHeader), pi_header.end(), program_image.data());
}
// Apply cheats if they exist and the program has a valid title ID
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 3c2a29d9b..f0888327f 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -14,6 +14,7 @@
#include "common/swap.h"
#include "core/arm/arm_interface.h"
#include "core/core.h"
+#include "core/hle/kernel/physical_memory.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/vm_manager.h"
#include "core/memory.h"
@@ -38,6 +39,11 @@ struct Memory::Impl {
system.ArmInterface(3).PageTableChanged(*current_page_table, address_space_width);
}
+ void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size,
+ Kernel::PhysicalMemory& memory, VAddr offset) {
+ MapMemoryRegion(page_table, base, size, memory.data() + offset);
+ }
+
void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {
ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
@@ -601,6 +607,11 @@ void Memory::SetCurrentPageTable(Kernel::Process& process) {
impl->SetCurrentPageTable(process);
}
+void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size,
+ Kernel::PhysicalMemory& memory, VAddr offset) {
+ impl->MapMemoryRegion(page_table, base, size, memory, offset);
+}
+
void Memory::MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {
impl->MapMemoryRegion(page_table, base, size, target);
}
diff --git a/src/core/memory.h b/src/core/memory.h
index 1428a6d60..8913a9da4 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -19,8 +19,9 @@ class System;
}
namespace Kernel {
+class PhysicalMemory;
class Process;
-}
+} // namespace Kernel
namespace Memory {
@@ -66,6 +67,19 @@ public:
void SetCurrentPageTable(Kernel::Process& process);
/**
+ * Maps an physical buffer onto a region of the emulated process address space.
+ *
+ * @param page_table The page table of the emulated process.
+ * @param base The address to start mapping at. Must be page-aligned.
+ * @param size The amount of bytes to map. Must be page-aligned.
+ * @param memory Physical buffer with the memory backing the mapping. Must be of length
+ * at least `size + offset`.
+ * @param offset The offset within the physical memory. Must be page-aligned.
+ */
+ void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size,
+ Kernel::PhysicalMemory& memory, VAddr offset);
+
+ /**
* Maps an allocated buffer onto a region of the emulated process address space.
*
* @param page_table The page table of the emulated process.
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 16f95b77d..ee79260fc 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1018,7 +1018,14 @@ public:
}
} instanced_arrays;
- INSERT_UNION_PADDING_WORDS(0x6);
+ INSERT_UNION_PADDING_WORDS(0x4);
+
+ union {
+ BitField<0, 1, u32> enable;
+ BitField<4, 8, u32> unk4;
+ } vp_point_size;
+
+ INSERT_UNION_PADDING_WORDS(1);
Cull cull;
@@ -1503,6 +1510,7 @@ ASSERT_REG_POSITION(primitive_restart, 0x591);
ASSERT_REG_POSITION(index_array, 0x5F2);
ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
ASSERT_REG_POSITION(instanced_arrays, 0x620);
+ASSERT_REG_POSITION(vp_point_size, 0x644);
ASSERT_REG_POSITION(cull, 0x646);
ASSERT_REG_POSITION(pixel_center_integer, 0x649);
ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 57b57c647..6f98bd827 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -215,6 +215,18 @@ enum class F2fRoundingOp : u64 {
Trunc = 11,
};
+enum class AtomicOp : u64 {
+ Add = 0,
+ Min = 1,
+ Max = 2,
+ Inc = 3,
+ Dec = 4,
+ And = 5,
+ Or = 6,
+ Xor = 7,
+ Exch = 8,
+};
+
enum class UniformType : u64 {
UnsignedByte = 0,
SignedByte = 1,
@@ -236,6 +248,13 @@ enum class StoreType : u64 {
Bits128 = 6,
};
+enum class AtomicType : u64 {
+ U32 = 0,
+ S32 = 1,
+ U64 = 2,
+ S64 = 3,
+};
+
enum class IMinMaxExchange : u64 {
None = 0,
XLo = 1,
@@ -939,6 +958,16 @@ union Instruction {
} stg;
union {
+ BitField<52, 4, AtomicOp> operation;
+ BitField<28, 2, AtomicType> type;
+ BitField<30, 22, s64> offset;
+
+ s32 GetImmediateOffset() const {
+ return static_cast<s32>(offset << 2);
+ }
+ } atoms;
+
+ union {
BitField<32, 1, PhysicalAttributeDirection> direction;
BitField<47, 3, AttributeSize> size;
BitField<20, 11, u64> address;
@@ -1659,9 +1688,10 @@ public:
ST_A,
ST_L,
ST_S,
- ST, // Store in generic memory
- STG, // Store in global memory
- AL2P, // Transforms attribute memory into physical memory
+ ST, // Store in generic memory
+ STG, // Store in global memory
+ ATOMS, // Atomic operation on shared memory
+ AL2P, // Transforms attribute memory into physical memory
TEX,
TEX_B, // Texture Load Bindless
TXQ, // Texture Query
@@ -1964,6 +1994,7 @@ private:
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
INST("101-------------", Id::ST, Type::Memory, "ST"),
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
+ INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 672051102..c428f06e4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -1272,6 +1272,7 @@ void RasterizerOpenGL::SyncPointState() {
const auto& regs = system.GPU().Maxwell3D().regs;
// Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
// in OpenGL).
+ state.point.program_control = regs.vp_point_size.enable != 0;
state.point.size = std::max(1.0f, regs.point_size);
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index e9ceca768..2996aaf08 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1856,6 +1856,16 @@ private:
Type::Uint};
}
+ template <const std::string_view& opname, Type type>
+ Expression Atomic(Operation operation) {
+ ASSERT(stage == ShaderType::Compute);
+ auto& smem = std::get<SmemNode>(*operation[0]);
+
+ return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(),
+ Visit(operation[1]).As(type)),
+ type};
+ }
+
Expression Branch(Operation operation) {
const auto target = std::get_if<ImmediateNode>(&*operation[0]);
UNIMPLEMENTED_IF(!target);
@@ -2194,6 +2204,8 @@ private:
&GLSLDecompiler::AtomicImage<Func::Xor>,
&GLSLDecompiler::AtomicImage<Func::Exchange>,
+ &GLSLDecompiler::Atomic<Func::Add, Type::Uint>,
+
&GLSLDecompiler::Branch,
&GLSLDecompiler::BranchIndirect,
&GLSLDecompiler::PushFlowStack,
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index df2e2395a..cc185e9e1 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -127,6 +127,7 @@ void OpenGLState::ApplyClipDistances() {
}
void OpenGLState::ApplyPointSize() {
+ Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control);
if (UpdateValue(cur_state.point.size, point.size)) {
glPointSize(point.size);
}
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index fb180f302..678e5cd89 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -131,7 +131,8 @@ public:
std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports;
struct {
- float size = 1.0f; // GL_POINT_SIZE
+ bool program_control = false; // GL_PROGRAM_POINT_SIZE
+ GLfloat size = 1.0f; // GL_POINT_SIZE
} point;
struct {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index b790b0ef4..e95eb069e 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -44,7 +44,7 @@ struct FormatTuple {
constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8U
- {GL_RGBA8, GL_RGBA, GL_BYTE, false}, // ABGR8S
+ {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE, false}, // ABGR8S
{GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false}, // ABGR8UI
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5U
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10U
@@ -83,9 +83,9 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
{GL_RGB32F, GL_RGB, GL_FLOAT, false}, // RGB32F
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // RGBA8_SRGB
{GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false}, // RG8U
- {GL_RG8, GL_RG, GL_BYTE, false}, // RG8S
+ {GL_RG8_SNORM, GL_RG, GL_BYTE, false}, // RG8S
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI
- {GL_RGB16F, GL_RGBA16, GL_HALF_FLOAT, false}, // RGBX16F
+ {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5
@@ -253,14 +253,12 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level)));
glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level);
+ u8* const mip_data = staging_buffer.data() + mip_offset;
+ const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level));
if (is_compressed) {
- glGetCompressedTextureImage(texture.handle, level,
- static_cast<GLsizei>(params.GetHostMipmapSize(level)),
- staging_buffer.data() + mip_offset);
+ glGetCompressedTextureImage(texture.handle, level, size, mip_data);
} else {
- glGetTextureImage(texture.handle, level, format, type,
- static_cast<GLsizei>(params.GetHostMipmapSize(level)),
- staging_buffer.data() + mip_offset);
+ glGetTextureImage(texture.handle, level, format, type, size, mip_data);
}
}
}
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 8fe852ce8..0cf97cafa 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1796,6 +1796,11 @@ private:
return {};
}
+ Expression UAtomicAdd(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
Expression Branch(Operation operation) {
const auto& target = std::get<ImmediateNode>(*operation[0]);
OpStore(jmp_to, Constant(t_uint, target.GetValue()));
@@ -2373,6 +2378,8 @@ private:
&SPIRVDecompiler::AtomicImageXor,
&SPIRVDecompiler::AtomicImageExchange,
+ &SPIRVDecompiler::UAtomicAdd,
+
&SPIRVDecompiler::Branch,
&SPIRVDecompiler::BranchIndirect,
&SPIRVDecompiler::PushFlowStack,
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 8cc84e935..7591a715f 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -16,6 +16,8 @@
namespace VideoCommon::Shader {
+using Tegra::Shader::AtomicOp;
+using Tegra::Shader::AtomicType;
using Tegra::Shader::Attribute;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
@@ -333,6 +335,23 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
break;
}
+ case OpCode::Id::ATOMS: {
+ UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
+ static_cast<int>(instr.atoms.operation.Value()));
+ UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}",
+ static_cast<int>(instr.atoms.type.Value()));
+
+ const s32 offset = instr.atoms.GetImmediateOffset();
+ Node address = GetRegister(instr.gpr8);
+ address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset));
+
+ Node memory = GetSharedMemory(std::move(address));
+ Node data = GetRegister(instr.gpr20);
+
+ Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data));
+ SetRegister(bb, instr.gpr0, std::move(value));
+ break;
+ }
case OpCode::Id::AL2P: {
// Ignore al2p.direction since we don't care about it.
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 4e155542a..075c7d07c 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -162,6 +162,8 @@ enum class OperationCode {
AtomicImageXor, /// (MetaImage, int[N] coords) -> void
AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
+ UAtomicAdd, /// (smem, uint) -> uint
+
Branch, /// (uint branch_target) -> void
BranchIndirect, /// (uint branch_target) -> void
PushFlowStack, /// (uint branch_target) -> void
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index b21fbf826..b5dd3e0d6 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -526,19 +526,30 @@ void GMainWindow::InitializeHotkeys() {
const QString main_window = QStringLiteral("Main Window");
const QString load_file = QStringLiteral("Load File");
+ const QString load_amiibo = QStringLiteral("Load Amiibo");
const QString exit_yuzu = QStringLiteral("Exit yuzu");
+ const QString restart_emulation = QStringLiteral("Restart Emulation");
const QString stop_emulation = QStringLiteral("Stop Emulation");
const QString toggle_filter_bar = QStringLiteral("Toggle Filter Bar");
const QString toggle_status_bar = QStringLiteral("Toggle Status Bar");
const QString fullscreen = QStringLiteral("Fullscreen");
+ const QString capture_screenshot = QStringLiteral("Capture Screenshot");
ui.action_Load_File->setShortcut(hotkey_registry.GetKeySequence(main_window, load_file));
ui.action_Load_File->setShortcutContext(
hotkey_registry.GetShortcutContext(main_window, load_file));
+ ui.action_Load_Amiibo->setShortcut(hotkey_registry.GetKeySequence(main_window, load_amiibo));
+ ui.action_Load_Amiibo->setShortcutContext(
+ hotkey_registry.GetShortcutContext(main_window, load_amiibo));
+
ui.action_Exit->setShortcut(hotkey_registry.GetKeySequence(main_window, exit_yuzu));
ui.action_Exit->setShortcutContext(hotkey_registry.GetShortcutContext(main_window, exit_yuzu));
+ ui.action_Restart->setShortcut(hotkey_registry.GetKeySequence(main_window, restart_emulation));
+ ui.action_Restart->setShortcutContext(
+ hotkey_registry.GetShortcutContext(main_window, restart_emulation));
+
ui.action_Stop->setShortcut(hotkey_registry.GetKeySequence(main_window, stop_emulation));
ui.action_Stop->setShortcutContext(
hotkey_registry.GetShortcutContext(main_window, stop_emulation));
@@ -553,6 +564,11 @@ void GMainWindow::InitializeHotkeys() {
ui.action_Show_Status_Bar->setShortcutContext(
hotkey_registry.GetShortcutContext(main_window, toggle_status_bar));
+ ui.action_Capture_Screenshot->setShortcut(
+ hotkey_registry.GetKeySequence(main_window, capture_screenshot));
+ ui.action_Capture_Screenshot->setShortcutContext(
+ hotkey_registry.GetShortcutContext(main_window, capture_screenshot));
+
connect(hotkey_registry.GetHotkey(main_window, QStringLiteral("Load File"), this),
&QShortcut::activated, this, &GMainWindow::OnMenuLoadFile);
connect(
diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui
index 21f422500..a2c9e4547 100644
--- a/src/yuzu/main.ui
+++ b/src/yuzu/main.ui
@@ -15,7 +15,7 @@
</property>
<property name="windowIcon">
<iconset>
- <normaloff>src/pcafe/res/icon3_64x64.ico</normaloff>src/pcafe/res/icon3_64x64.ico</iconset>
+ <normaloff>../dist/yuzu.ico</normaloff>../dist/yuzu.ico</iconset>
</property>
<property name="tabShape">
<enum>QTabWidget::Rounded</enum>
@@ -98,6 +98,7 @@
<addaction name="action_Display_Dock_Widget_Headers"/>
<addaction name="action_Show_Filter_Bar"/>
<addaction name="action_Show_Status_Bar"/>
+ <addaction name="separator"/>
<addaction name="menu_View_Debugging"/>
</widget>
<widget class="QMenu" name="menu_Tools">