summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--.travis.yml4
-rwxr-xr-x.travis/linux/build.sh2
-rwxr-xr-x.travis/linux/docker.sh16
-rwxr-xr-x.travis/macos/build.sh4
-rwxr-xr-x.travis/macos/deps.sh2
-rw-r--r--src/common/logging/backend.cpp1
-rw-r--r--src/common/logging/log.h1
-rw-r--r--src/common/string_util.cpp4
-rw-r--r--src/common/string_util.h2
-rw-r--r--src/core/CMakeLists.txt6
-rw-r--r--src/core/arm/unicorn/arm_unicorn.cpp38
-rw-r--r--src/core/arm/unicorn/arm_unicorn.h4
-rw-r--r--src/core/file_sys/partition_filesystem.cpp24
-rw-r--r--src/core/file_sys/partition_filesystem.h8
-rw-r--r--src/core/gdbstub/gdbstub.cpp170
-rw-r--r--src/core/gdbstub/gdbstub.h9
-rw-r--r--src/core/hle/kernel/address_arbiter.cpp173
-rw-r--r--src/core/hle/kernel/address_arbiter.h32
-rw-r--r--src/core/hle/kernel/errors.h12
-rw-r--r--src/core/hle/kernel/hle_ipc.cpp5
-rw-r--r--src/core/hle/kernel/mutex.cpp4
-rw-r--r--src/core/hle/kernel/svc.cpp68
-rw-r--r--src/core/hle/kernel/svc_wrap.h14
-rw-r--r--src/core/hle/kernel/thread.cpp6
-rw-r--r--src/core/hle/kernel/thread.h4
-rw-r--r--src/core/hle/service/am/am.cpp13
-rw-r--r--src/core/hle/service/am/am.h1
-rw-r--r--src/core/hle/service/audio/audren_u.cpp105
-rw-r--r--src/core/hle/service/audio/audren_u.h6
-rw-r--r--src/core/hle/service/filesystem/fsp_srv.cpp31
-rw-r--r--src/core/hle/service/hid/hid.cpp7
-rw-r--r--src/core/hle/service/hid/hid.h2
-rw-r--r--src/core/hle/service/mm/mm_u.cpp50
-rw-r--r--src/core/hle/service/mm/mm_u.h29
-rw-r--r--src/core/hle/service/nfp/nfp.cpp108
-rw-r--r--src/core/hle/service/nifm/nifm.cpp11
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp22
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h31
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp3
-rw-r--r--src/core/hle/service/service.cpp2
-rw-r--r--src/core/hle/service/set/set.cpp5
-rw-r--r--src/core/loader/loader.cpp10
-rw-r--r--src/core/loader/loader.h1
-rw-r--r--src/core/loader/nca.cpp303
-rw-r--r--src/core/loader/nca.h49
-rw-r--r--src/core/loader/nso.cpp79
-rw-r--r--src/core/loader/nso.h3
-rw-r--r--src/core/memory.cpp4
-rw-r--r--src/core/memory.h7
-rw-r--r--src/video_core/CMakeLists.txt4
-rw-r--r--src/video_core/command_processor.cpp6
-rw-r--r--src/video_core/engines/fermi_2d.cpp1
-rw-r--r--src/video_core/engines/maxwell_3d.cpp39
-rw-r--r--src/video_core/engines/maxwell_3d.h34
-rw-r--r--src/video_core/engines/maxwell_dma.cpp69
-rw-r--r--src/video_core/engines/maxwell_dma.h155
-rw-r--r--src/video_core/engines/shader_bytecode.h181
-rw-r--r--src/video_core/gpu.cpp6
-rw-r--r--src/video_core/gpu.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp167
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h24
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp139
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h36
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp581
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h58
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp23
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h1
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp17
-rw-r--r--src/video_core/renderer_opengl/gl_state.h6
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h23
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp1
-rw-r--r--src/video_core/textures/astc.cpp1646
-rw-r--r--src/video_core/textures/astc.h15
-rw-r--r--src/video_core/textures/decoders.cpp6
-rw-r--r--src/video_core/textures/texture.h16
-rw-r--r--src/yuzu/CMakeLists.txt3
-rw-r--r--src/yuzu/debugger/registers.cpp190
-rw-r--r--src/yuzu/debugger/registers.h42
-rw-r--r--src/yuzu/debugger/registers.ui40
-rw-r--r--src/yuzu/debugger/wait_tree.cpp4
-rw-r--r--src/yuzu/game_list.cpp32
-rw-r--r--src/yuzu/main.cpp31
-rw-r--r--src/yuzu/main.h3
-rw-r--r--src/yuzu/main.ui6
85 files changed, 4279 insertions, 832 deletions
diff --git a/.travis.yml b/.travis.yml
index 6740ea2cf..c225d4a2c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -42,3 +42,7 @@ notifications:
webhooks:
urls:
- https://api.yuzu-emu.org/code/travis/notify
+
+cache:
+ directories:
+ - $HOME/.ccache
diff --git a/.travis/linux/build.sh b/.travis/linux/build.sh
index bea898171..213feb25b 100755
--- a/.travis/linux/build.sh
+++ b/.travis/linux/build.sh
@@ -1,3 +1,3 @@
#!/bin/bash -ex
-docker run -v $(pwd):/yuzu ubuntu:18.04 /bin/bash /yuzu/.travis/linux/docker.sh
+docker run -e CCACHE_DIR=/ccache -v $HOME/.ccache:/ccache -v $(pwd):/yuzu ubuntu:18.04 /bin/bash /yuzu/.travis/linux/docker.sh
diff --git a/.travis/linux/docker.sh b/.travis/linux/docker.sh
index 8a23b54b1..376ad28dd 100755
--- a/.travis/linux/docker.sh
+++ b/.travis/linux/docker.sh
@@ -1,16 +1,18 @@
#!/bin/bash -ex
apt-get update
-apt-get install -y build-essential git libqt5opengl5-dev libsdl2-dev libssl-dev python qtbase5-dev wget
-
-# Get a recent version of CMake
-wget https://cmake.org/files/v3.10/cmake-3.10.1-Linux-x86_64.sh
-sh cmake-3.10.1-Linux-x86_64.sh --exclude-subdir --prefix=/ --skip-license
+apt-get install --no-install-recommends -y build-essential git libqt5opengl5-dev libsdl2-dev libssl-dev python qtbase5-dev wget cmake ninja-build ccache
cd /yuzu
+export PATH=/usr/lib/ccache:$PATH
+ln -sf /usr/bin/ccache /usr/lib/ccache/cc
+ln -sf /usr/bin/ccache /usr/lib/ccache/c++
mkdir build && cd build
-cmake .. -DYUZU_BUILD_UNICORN=ON -DCMAKE_BUILD_TYPE=Release
-make -j4
+ccache --show-stats > ccache_before
+cmake .. -DYUZU_BUILD_UNICORN=ON -DCMAKE_BUILD_TYPE=Release -G Ninja
+ninja
+ccache --show-stats > ccache_after
+diff -U100 ccache_before ccache_after || true
ctest -VV -C Release
diff --git a/.travis/macos/build.sh b/.travis/macos/build.sh
index f633f618f..5816b1d6e 100755
--- a/.travis/macos/build.sh
+++ b/.travis/macos/build.sh
@@ -7,8 +7,12 @@ export Qt5_DIR=$(brew --prefix)/opt/qt5
export UNICORNDIR=$(pwd)/externals/unicorn
mkdir build && cd build
+export PATH=/usr/local/opt/ccache/libexec:$PATH
+ccache --show-stats > ccache_before
cmake --version
cmake .. -DYUZU_BUILD_UNICORN=ON -DCMAKE_BUILD_TYPE=Release
make -j4
+ccache --show-stats > ccache_after
+diff -U100 ccache_before ccache_after || true
ctest -VV -C Release
diff --git a/.travis/macos/deps.sh b/.travis/macos/deps.sh
index 3cd13a39b..1a547c060 100755
--- a/.travis/macos/deps.sh
+++ b/.travis/macos/deps.sh
@@ -1,5 +1,5 @@
#!/bin/sh -ex
brew update
-brew install dylibbundler p7zip qt5 sdl2
+brew install dylibbundler p7zip qt5 sdl2 ccache
brew outdated cmake || brew upgrade cmake
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index 3e31a74f2..c26b20062 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -41,6 +41,7 @@ namespace Log {
SUB(Service, FS) \
SUB(Service, HID) \
SUB(Service, LM) \
+ SUB(Service, MM) \
SUB(Service, NFP) \
SUB(Service, NIFM) \
SUB(Service, NS) \
diff --git a/src/common/logging/log.h b/src/common/logging/log.h
index 43e572ebe..c5015531c 100644
--- a/src/common/logging/log.h
+++ b/src/common/logging/log.h
@@ -61,6 +61,7 @@ enum class Class : ClassType {
Service_FS, ///< The FS (Filesystem) service
Service_HID, ///< The HID (Human interface device) service
Service_LM, ///< The LM (Logger) service
+ Service_MM, ///< The MM (Multimedia) service
Service_NFP, ///< The NFP service
Service_NIFM, ///< The NIFM (Network interface) service
Service_NS, ///< The NS services
diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp
index 1d952874d..646400db0 100644
--- a/src/common/string_util.cpp
+++ b/src/common/string_util.cpp
@@ -64,6 +64,10 @@ std::string ArrayToString(const u8* data, size_t size, int line_len, bool spaces
return oss.str();
}
+std::string StringFromBuffer(const std::vector<u8>& data) {
+ return std::string(data.begin(), std::find(data.begin(), data.end(), '\0'));
+}
+
// Turns " hej " into "hej". Also handles tabs.
std::string StripSpaces(const std::string& str) {
const size_t s = str.find_first_not_of(" \t\r\n");
diff --git a/src/common/string_util.h b/src/common/string_util.h
index 65e4ea5d3..1f5a383cb 100644
--- a/src/common/string_util.h
+++ b/src/common/string_util.h
@@ -21,6 +21,8 @@ std::string ToUpper(std::string str);
std::string ArrayToString(const u8* data, size_t size, int line_len = 20, bool spaces = true);
+std::string StringFromBuffer(const std::vector<u8>& data);
+
std::string StripSpaces(const std::string& s);
std::string StripQuotes(const std::string& s);
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index aff1d2180..51e4088d2 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -40,6 +40,8 @@ add_library(core STATIC
hle/config_mem.h
hle/ipc.h
hle/ipc_helpers.h
+ hle/kernel/address_arbiter.cpp
+ hle/kernel/address_arbiter.h
hle/kernel/client_port.cpp
hle/kernel/client_port.h
hle/kernel/client_session.cpp
@@ -148,6 +150,8 @@ add_library(core STATIC
hle/service/hid/hid.h
hle/service/lm/lm.cpp
hle/service/lm/lm.h
+ hle/service/mm/mm_u.cpp
+ hle/service/mm/mm_u.h
hle/service/nifm/nifm.cpp
hle/service/nifm/nifm.h
hle/service/nifm/nifm_a.cpp
@@ -255,6 +259,8 @@ add_library(core STATIC
loader/linker.h
loader/loader.cpp
loader/loader.h
+ loader/nca.cpp
+ loader/nca.h
loader/nro.cpp
loader/nro.h
loader/nso.cpp
diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index c0cc62f03..ce6c5616d 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -35,6 +35,17 @@ LoadDll LoadDll::g_load_dll;
} \
} while (0)
+static void CodeHook(uc_engine* uc, uint64_t address, uint32_t size, void* user_data) {
+ GDBStub::BreakpointAddress bkpt =
+ GDBStub::GetNextBreakpointFromAddress(address, GDBStub::BreakpointType::Execute);
+ if (GDBStub::IsMemoryBreak() ||
+ (bkpt.type != GDBStub::BreakpointType::None && address == bkpt.address)) {
+ auto core = static_cast<ARM_Unicorn*>(user_data);
+ core->RecordBreak(bkpt);
+ uc_emu_stop(uc);
+ }
+}
+
static void InterruptHook(uc_engine* uc, u32 intNo, void* user_data) {
u32 esr{};
CHECKED(uc_reg_read(uc, UC_ARM64_REG_ESR, &esr));
@@ -67,6 +78,10 @@ ARM_Unicorn::ARM_Unicorn() {
uc_hook hook{};
CHECKED(uc_hook_add(uc, &hook, UC_HOOK_INTR, (void*)InterruptHook, this, 0, -1));
CHECKED(uc_hook_add(uc, &hook, UC_HOOK_MEM_INVALID, (void*)UnmappedMemoryHook, this, 0, -1));
+ if (GDBStub::IsServerEnabled()) {
+ CHECKED(uc_hook_add(uc, &hook, UC_HOOK_CODE, (void*)CodeHook, this, 0, -1));
+ last_bkpt_hit = false;
+ }
}
ARM_Unicorn::~ARM_Unicorn() {
@@ -155,7 +170,11 @@ void ARM_Unicorn::SetTlsAddress(VAddr base) {
}
void ARM_Unicorn::Run() {
- ExecuteInstructions(std::max(CoreTiming::GetDowncount(), 0));
+ if (GDBStub::IsServerEnabled()) {
+ ExecuteInstructions(std::max(4000000, 0));
+ } else {
+ ExecuteInstructions(std::max(CoreTiming::GetDowncount(), 0));
+ }
}
void ARM_Unicorn::Step() {
@@ -168,6 +187,18 @@ void ARM_Unicorn::ExecuteInstructions(int num_instructions) {
MICROPROFILE_SCOPE(ARM_Jit);
CHECKED(uc_emu_start(uc, GetPC(), 1ULL << 63, 0, num_instructions));
CoreTiming::AddTicks(num_instructions);
+ if (GDBStub::IsServerEnabled()) {
+ if (last_bkpt_hit) {
+ uc_reg_write(uc, UC_ARM64_REG_PC, &last_bkpt.address);
+ }
+ Kernel::Thread* thread = Kernel::GetCurrentThread();
+ SaveContext(thread->context);
+ if (last_bkpt_hit) {
+ last_bkpt_hit = false;
+ GDBStub::Break();
+ }
+ GDBStub::SendTrap(thread, 5);
+ }
}
void ARM_Unicorn::SaveContext(ARM_Interface::ThreadContext& ctx) {
@@ -233,3 +264,8 @@ void ARM_Unicorn::PrepareReschedule() {
}
void ARM_Unicorn::ClearInstructionCache() {}
+
+void ARM_Unicorn::RecordBreak(GDBStub::BreakpointAddress bkpt) {
+ last_bkpt = bkpt;
+ last_bkpt_hit = true;
+}
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index b99b58e4c..a482a2aa3 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -7,6 +7,7 @@
#include <unicorn/unicorn.h>
#include "common/common_types.h"
#include "core/arm/arm_interface.h"
+#include "core/gdbstub/gdbstub.h"
class ARM_Unicorn final : public ARM_Interface {
public:
@@ -35,7 +36,10 @@ public:
void Step() override;
void ClearInstructionCache() override;
void PageTableChanged() override{};
+ void RecordBreak(GDBStub::BreakpointAddress bkpt);
private:
uc_engine* uc{};
+ GDBStub::BreakpointAddress last_bkpt{};
+ bool last_bkpt_hit;
};
diff --git a/src/core/file_sys/partition_filesystem.cpp b/src/core/file_sys/partition_filesystem.cpp
index 808254ecc..874b9e23b 100644
--- a/src/core/file_sys/partition_filesystem.cpp
+++ b/src/core/file_sys/partition_filesystem.cpp
@@ -19,13 +19,20 @@ Loader::ResultStatus PartitionFilesystem::Load(const std::string& file_path, siz
if (file.GetSize() < sizeof(Header))
return Loader::ResultStatus::Error;
+ file.Seek(offset, SEEK_SET);
// For cartridges, HFSs can get very large, so we need to calculate the size up to
// the actual content itself instead of just blindly reading in the entire file.
Header pfs_header;
if (!file.ReadBytes(&pfs_header, sizeof(Header)))
return Loader::ResultStatus::Error;
- bool is_hfs = (memcmp(pfs_header.magic.data(), "HFS", 3) == 0);
+ if (pfs_header.magic != Common::MakeMagic('H', 'F', 'S', '0') &&
+ pfs_header.magic != Common::MakeMagic('P', 'F', 'S', '0')) {
+ return Loader::ResultStatus::ErrorInvalidFormat;
+ }
+
+ bool is_hfs = pfs_header.magic == Common::MakeMagic('H', 'F', 'S', '0');
+
size_t entry_size = is_hfs ? sizeof(HFSEntry) : sizeof(PFSEntry);
size_t metadata_size =
sizeof(Header) + (pfs_header.num_entries * entry_size) + pfs_header.strtab_size;
@@ -50,7 +57,12 @@ Loader::ResultStatus PartitionFilesystem::Load(const std::vector<u8>& file_data,
return Loader::ResultStatus::Error;
memcpy(&pfs_header, &file_data[offset], sizeof(Header));
- is_hfs = (memcmp(pfs_header.magic.data(), "HFS", 3) == 0);
+ if (pfs_header.magic != Common::MakeMagic('H', 'F', 'S', '0') &&
+ pfs_header.magic != Common::MakeMagic('P', 'F', 'S', '0')) {
+ return Loader::ResultStatus::ErrorInvalidFormat;
+ }
+
+ is_hfs = pfs_header.magic == Common::MakeMagic('H', 'F', 'S', '0');
size_t entries_offset = offset + sizeof(Header);
size_t entry_size = is_hfs ? sizeof(HFSEntry) : sizeof(PFSEntry);
@@ -73,21 +85,21 @@ u32 PartitionFilesystem::GetNumEntries() const {
return pfs_header.num_entries;
}
-u64 PartitionFilesystem::GetEntryOffset(int index) const {
+u64 PartitionFilesystem::GetEntryOffset(u32 index) const {
if (index > GetNumEntries())
return 0;
return content_offset + pfs_entries[index].fs_entry.offset;
}
-u64 PartitionFilesystem::GetEntrySize(int index) const {
+u64 PartitionFilesystem::GetEntrySize(u32 index) const {
if (index > GetNumEntries())
return 0;
return pfs_entries[index].fs_entry.size;
}
-std::string PartitionFilesystem::GetEntryName(int index) const {
+std::string PartitionFilesystem::GetEntryName(u32 index) const {
if (index > GetNumEntries())
return "";
@@ -113,7 +125,7 @@ u64 PartitionFilesystem::GetFileSize(const std::string& name) const {
}
void PartitionFilesystem::Print() const {
- NGLOG_DEBUG(Service_FS, "Magic: {:.4}", pfs_header.magic.data());
+ NGLOG_DEBUG(Service_FS, "Magic: {}", pfs_header.magic);
NGLOG_DEBUG(Service_FS, "Files: {}", pfs_header.num_entries);
for (u32 i = 0; i < pfs_header.num_entries; i++) {
NGLOG_DEBUG(Service_FS, " > File {}: {} (0x{:X} bytes, at 0x{:X})", i,
diff --git a/src/core/file_sys/partition_filesystem.h b/src/core/file_sys/partition_filesystem.h
index 573c90057..9c5810cf1 100644
--- a/src/core/file_sys/partition_filesystem.h
+++ b/src/core/file_sys/partition_filesystem.h
@@ -27,9 +27,9 @@ public:
Loader::ResultStatus Load(const std::vector<u8>& file_data, size_t offset = 0);
u32 GetNumEntries() const;
- u64 GetEntryOffset(int index) const;
- u64 GetEntrySize(int index) const;
- std::string GetEntryName(int index) const;
+ u64 GetEntryOffset(u32 index) const;
+ u64 GetEntrySize(u32 index) const;
+ std::string GetEntryName(u32 index) const;
u64 GetFileOffset(const std::string& name) const;
u64 GetFileSize(const std::string& name) const;
@@ -37,7 +37,7 @@ public:
private:
struct Header {
- std::array<char, 4> magic;
+ u32_le magic;
u32_le num_entries;
u32_le strtab_size;
INSERT_PADDING_BYTES(0x4);
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index 6c5a40ba8..2603192fe 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -32,9 +32,13 @@
#include "common/logging/log.h"
#include "common/string_util.h"
+#include "common/swap.h"
#include "core/arm/arm_interface.h"
#include "core/core.h"
+#include "core/core_cpu.h"
#include "core/gdbstub/gdbstub.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/scheduler.h"
#include "core/loader/loader.h"
#include "core/memory.h"
@@ -137,15 +141,17 @@ static u8 command_buffer[GDB_BUFFER_SIZE];
static u32 command_length;
static u32 latest_signal = 0;
-static bool step_break = false;
static bool memory_break = false;
+static Kernel::Thread* current_thread = nullptr;
+
// Binding to a port within the reserved ports range (0-1023) requires root permissions,
// so default to a port outside of that range.
static u16 gdbstub_port = 24689;
static bool halt_loop = true;
static bool step_loop = false;
+static bool send_trap = false;
// If set to false, the server will never be started and no
// gdbstub-related functions will be executed.
@@ -165,6 +171,53 @@ static std::map<u64, Breakpoint> breakpoints_execute;
static std::map<u64, Breakpoint> breakpoints_read;
static std::map<u64, Breakpoint> breakpoints_write;
+static Kernel::Thread* FindThreadById(int id) {
+ for (int core = 0; core < Core::NUM_CPU_CORES; core++) {
+ auto threads = Core::System::GetInstance().Scheduler(core)->GetThreadList();
+ for (auto thread : threads) {
+ if (thread->GetThreadId() == id) {
+ current_thread = thread.get();
+ return current_thread;
+ }
+ }
+ }
+ return nullptr;
+}
+
+static u64 RegRead(int id, Kernel::Thread* thread = nullptr) {
+ if (!thread) {
+ return 0;
+ }
+
+ if (id < SP_REGISTER) {
+ return thread->context.cpu_registers[id];
+ } else if (id == SP_REGISTER) {
+ return thread->context.sp;
+ } else if (id == PC_REGISTER) {
+ return thread->context.pc;
+ } else if (id == CPSR_REGISTER) {
+ return thread->context.cpsr;
+ } else {
+ return 0;
+ }
+}
+
+static void RegWrite(int id, u64 val, Kernel::Thread* thread = nullptr) {
+ if (!thread) {
+ return;
+ }
+
+ if (id < SP_REGISTER) {
+ thread->context.cpu_registers[id] = val;
+ } else if (id == SP_REGISTER) {
+ thread->context.sp = val;
+ } else if (id == PC_REGISTER) {
+ thread->context.pc = val;
+ } else if (id == CPSR_REGISTER) {
+ thread->context.cpsr = val;
+ }
+}
+
/**
* Turns hex string character into the equivalent byte.
*
@@ -193,7 +246,7 @@ static u8 NibbleToHex(u8 n) {
if (n < 0xA) {
return '0' + n;
} else {
- return 'A' + n - 0xA;
+ return 'a' + n - 0xA;
}
}
@@ -439,6 +492,8 @@ static void SendReply(const char* reply) {
return;
}
+ NGLOG_DEBUG(Debug_GDBStub, "Reply: {}", reply);
+
memset(command_buffer, 0, sizeof(command_buffer));
command_length = static_cast<u32>(strlen(reply));
@@ -483,6 +538,22 @@ static void HandleQuery() {
} else if (strncmp(query, "Xfer:features:read:target.xml:",
strlen("Xfer:features:read:target.xml:")) == 0) {
SendReply(target_xml);
+ } else if (strncmp(query, "Offsets", strlen("Offsets")) == 0) {
+ std::string buffer = fmt::format("TextSeg={:0x}", Memory::PROCESS_IMAGE_VADDR);
+ SendReply(buffer.c_str());
+ } else if (strncmp(query, "fThreadInfo", strlen("fThreadInfo")) == 0) {
+ std::string val = "m";
+ for (int core = 0; core < Core::NUM_CPU_CORES; core++) {
+ auto threads = Core::System::GetInstance().Scheduler(core)->GetThreadList();
+ for (auto thread : threads) {
+ val += fmt::format("{:x}", thread->GetThreadId());
+ val += ",";
+ }
+ }
+ val.pop_back();
+ SendReply(val.c_str());
+ } else if (strncmp(query, "sThreadInfo", strlen("sThreadInfo")) == 0) {
+ SendReply("l");
} else {
SendReply("");
}
@@ -490,11 +561,40 @@ static void HandleQuery() {
/// Handle set thread command from gdb client.
static void HandleSetThread() {
- if (memcmp(command_buffer, "Hg0", 3) == 0 || memcmp(command_buffer, "Hc-1", 4) == 0 ||
- memcmp(command_buffer, "Hc0", 4) == 0 || memcmp(command_buffer, "Hc1", 4) == 0) {
- return SendReply("OK");
+ if (memcmp(command_buffer, "Hc", 2) == 0 || memcmp(command_buffer, "Hg", 2) == 0) {
+ int thread_id = -1;
+ if (command_buffer[2] != '-') {
+ thread_id = static_cast<int>(HexToInt(
+ command_buffer + 2,
+ command_length - 2 /*strlen(reinterpret_cast<char*>(command_buffer) + 2)*/));
+ }
+ if (thread_id >= 1) {
+ current_thread = FindThreadById(thread_id);
+ }
+ if (!current_thread) {
+ thread_id = 1;
+ current_thread = FindThreadById(thread_id);
+ }
+ if (current_thread) {
+ SendReply("OK");
+ return;
+ }
}
+ SendReply("E01");
+}
+/// Handle thread alive command from gdb client.
+static void HandleThreadAlive() {
+ int thread_id = static_cast<int>(
+ HexToInt(command_buffer + 1,
+ command_length - 1 /*strlen(reinterpret_cast<char*>(command_buffer) + 1)*/));
+ if (thread_id == 0) {
+ thread_id = 1;
+ }
+ if (FindThreadById(thread_id)) {
+ SendReply("OK");
+ return;
+ }
SendReply("E01");
}
@@ -503,15 +603,24 @@ static void HandleSetThread() {
*
* @param signal Signal to be sent to client.
*/
-static void SendSignal(u32 signal) {
+static void SendSignal(Kernel::Thread* thread, u32 signal, bool full = true) {
if (gdbserver_socket == -1) {
return;
}
latest_signal = signal;
- std::string buffer = fmt::format("T{:02x}", latest_signal);
- NGLOG_DEBUG(Debug_GDBStub, "Response: {}", buffer);
+ std::string buffer;
+ if (full) {
+ buffer = fmt::format("T{:02x}{:02x}:{:016x};{:02x}:{:016x};", latest_signal, PC_REGISTER,
+ Common::swap64(RegRead(PC_REGISTER, thread)), SP_REGISTER,
+ Common::swap64(RegRead(SP_REGISTER, thread)));
+ } else {
+ buffer = fmt::format("T{:02x};", latest_signal);
+ }
+
+ buffer += fmt::format("thread:{:x};", thread->GetThreadId());
+
SendReply(buffer.c_str());
}
@@ -527,7 +636,7 @@ static void ReadCommand() {
} else if (c == 0x03) {
NGLOG_INFO(Debug_GDBStub, "gdb: found break command");
halt_loop = true;
- SendSignal(SIGTRAP);
+ SendSignal(current_thread, SIGTRAP);
return;
} else if (c != GDB_STUB_START) {
NGLOG_DEBUG(Debug_GDBStub, "gdb: read invalid byte {:02X}", c);
@@ -598,11 +707,11 @@ static void ReadRegister() {
}
if (id <= SP_REGISTER) {
- LongToGdbHex(reply, Core::CurrentArmInterface().GetReg(static_cast<int>(id)));
+ LongToGdbHex(reply, RegRead(id, current_thread));
} else if (id == PC_REGISTER) {
- LongToGdbHex(reply, Core::CurrentArmInterface().GetPC());
+ LongToGdbHex(reply, RegRead(id, current_thread));
} else if (id == CPSR_REGISTER) {
- IntToGdbHex(reply, Core::CurrentArmInterface().GetCPSR());
+ IntToGdbHex(reply, (u32)RegRead(id, current_thread));
} else {
return SendReply("E01");
}
@@ -618,16 +727,16 @@ static void ReadRegisters() {
u8* bufptr = buffer;
for (int reg = 0; reg <= SP_REGISTER; reg++) {
- LongToGdbHex(bufptr + reg * 16, Core::CurrentArmInterface().GetReg(reg));
+ LongToGdbHex(bufptr + reg * 16, RegRead(reg, current_thread));
}
bufptr += (32 * 16);
- LongToGdbHex(bufptr, Core::CurrentArmInterface().GetPC());
+ LongToGdbHex(bufptr, RegRead(PC_REGISTER, current_thread));
bufptr += 16;
- IntToGdbHex(bufptr, Core::CurrentArmInterface().GetCPSR());
+ IntToGdbHex(bufptr, (u32)RegRead(CPSR_REGISTER, current_thread));
bufptr += 8;
@@ -646,11 +755,11 @@ static void WriteRegister() {
}
if (id <= SP_REGISTER) {
- Core::CurrentArmInterface().SetReg(id, GdbHexToLong(buffer_ptr));
+ RegWrite(id, GdbHexToLong(buffer_ptr), current_thread);
} else if (id == PC_REGISTER) {
- Core::CurrentArmInterface().SetPC(GdbHexToLong(buffer_ptr));
+ RegWrite(id, GdbHexToLong(buffer_ptr), current_thread);
} else if (id == CPSR_REGISTER) {
- Core::CurrentArmInterface().SetCPSR(GdbHexToInt(buffer_ptr));
+ RegWrite(id, GdbHexToInt(buffer_ptr), current_thread);
} else {
return SendReply("E01");
}
@@ -667,11 +776,11 @@ static void WriteRegisters() {
for (int i = 0, reg = 0; reg <= CPSR_REGISTER; i++, reg++) {
if (reg <= SP_REGISTER) {
- Core::CurrentArmInterface().SetReg(reg, GdbHexToLong(buffer_ptr + i * 16));
+ RegWrite(reg, GdbHexToLong(buffer_ptr + i * 16), current_thread);
} else if (reg == PC_REGISTER) {
- Core::CurrentArmInterface().SetPC(GdbHexToLong(buffer_ptr + i * 16));
+ RegWrite(PC_REGISTER, GdbHexToLong(buffer_ptr + i * 16), current_thread);
} else if (reg == CPSR_REGISTER) {
- Core::CurrentArmInterface().SetCPSR(GdbHexToInt(buffer_ptr + i * 16));
+ RegWrite(CPSR_REGISTER, GdbHexToInt(buffer_ptr + i * 16), current_thread);
} else {
UNIMPLEMENTED();
}
@@ -734,7 +843,7 @@ static void WriteMemory() {
void Break(bool is_memory_break) {
if (!halt_loop) {
halt_loop = true;
- SendSignal(SIGTRAP);
+ send_trap = true;
}
memory_break = is_memory_break;
@@ -744,10 +853,10 @@ void Break(bool is_memory_break) {
static void Step() {
step_loop = true;
halt_loop = true;
- step_break = true;
- SendSignal(SIGTRAP);
+ send_trap = true;
}
+/// Tell the CPU if we hit a memory breakpoint.
bool IsMemoryBreak() {
if (IsConnected()) {
return false;
@@ -759,7 +868,6 @@ bool IsMemoryBreak() {
/// Tell the CPU to continue executing.
static void Continue() {
memory_break = false;
- step_break = false;
step_loop = false;
halt_loop = false;
}
@@ -898,7 +1006,7 @@ void HandlePacket() {
HandleSetThread();
break;
case '?':
- SendSignal(latest_signal);
+ SendSignal(current_thread, latest_signal);
break;
case 'k':
Shutdown();
@@ -935,6 +1043,9 @@ void HandlePacket() {
case 'Z':
AddBreakpoint();
break;
+ case 'T':
+ HandleThreadAlive();
+ break;
default:
SendReply("");
break;
@@ -1079,4 +1190,11 @@ bool GetCpuStepFlag() {
void SetCpuStepFlag(bool is_step) {
step_loop = is_step;
}
+
+void SendTrap(Kernel::Thread* thread, int trap) {
+ if (send_trap) {
+ send_trap = false;
+ SendSignal(thread, trap);
+ }
+}
}; // namespace GDBStub
diff --git a/src/core/gdbstub/gdbstub.h b/src/core/gdbstub/gdbstub.h
index 201fca095..f2418c9e4 100644
--- a/src/core/gdbstub/gdbstub.h
+++ b/src/core/gdbstub/gdbstub.h
@@ -7,6 +7,7 @@
#pragma once
#include "common/common_types.h"
+#include "core/hle/kernel/thread.h"
namespace GDBStub {
@@ -91,4 +92,12 @@ bool GetCpuStepFlag();
* @param is_step
*/
void SetCpuStepFlag(bool is_step);
+
+/**
+ * Send trap signal from thread back to the gdbstub server.
+ *
+ * @param thread Sending thread.
+ * @param trap Trap no.
+ */
+void SendTrap(Kernel::Thread* thread, int trap);
} // namespace GDBStub
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp
new file mode 100644
index 000000000..e9c8369d7
--- /dev/null
+++ b/src/core/hle/kernel/address_arbiter.cpp
@@ -0,0 +1,173 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "core/core.h"
+#include "core/hle/kernel/errors.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/process.h"
+#include "core/hle/kernel/thread.h"
+#include "core/hle/lock.h"
+#include "core/memory.h"
+
+namespace Kernel {
+namespace AddressArbiter {
+
+// Performs actual address waiting logic.
+static ResultCode WaitForAddress(VAddr address, s64 timeout) {
+ SharedPtr<Thread> current_thread = GetCurrentThread();
+ current_thread->arb_wait_address = address;
+ current_thread->status = THREADSTATUS_WAIT_ARB;
+ current_thread->wakeup_callback = nullptr;
+
+ current_thread->WakeAfterDelay(timeout);
+
+ Core::System::GetInstance().CpuCore(current_thread->processor_id).PrepareReschedule();
+ return RESULT_TIMEOUT;
+}
+
+// Gets the threads waiting on an address.
+static void GetThreadsWaitingOnAddress(std::vector<SharedPtr<Thread>>& waiting_threads,
+ VAddr address) {
+ auto RetrieveWaitingThreads =
+ [](size_t core_index, std::vector<SharedPtr<Thread>>& waiting_threads, VAddr arb_addr) {
+ const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
+ auto& thread_list = scheduler->GetThreadList();
+
+ for (auto& thread : thread_list) {
+ if (thread->arb_wait_address == arb_addr)
+ waiting_threads.push_back(thread);
+ }
+ };
+
+ // Retrieve a list of all threads that are waiting for this address.
+ RetrieveWaitingThreads(0, waiting_threads, address);
+ RetrieveWaitingThreads(1, waiting_threads, address);
+ RetrieveWaitingThreads(2, waiting_threads, address);
+ RetrieveWaitingThreads(3, waiting_threads, address);
+ // Sort them by priority, such that the highest priority ones come first.
+ std::sort(waiting_threads.begin(), waiting_threads.end(),
+ [](const SharedPtr<Thread>& lhs, const SharedPtr<Thread>& rhs) {
+ return lhs->current_priority < rhs->current_priority;
+ });
+}
+
+// Wake up num_to_wake (or all) threads in a vector.
+static void WakeThreads(std::vector<SharedPtr<Thread>>& waiting_threads, s32 num_to_wake) {
+ // Only process up to 'target' threads, unless 'target' is <= 0, in which case process
+ // them all.
+ size_t last = waiting_threads.size();
+ if (num_to_wake > 0)
+ last = num_to_wake;
+
+ // Signal the waiting threads.
+ for (size_t i = 0; i < last; i++) {
+ ASSERT(waiting_threads[i]->status = THREADSTATUS_WAIT_ARB);
+ waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS);
+ waiting_threads[i]->arb_wait_address = 0;
+ waiting_threads[i]->ResumeFromWait();
+ }
+}
+
+// Signals an address being waited on.
+ResultCode SignalToAddress(VAddr address, s32 num_to_wake) {
+ // Get threads waiting on the address.
+ std::vector<SharedPtr<Thread>> waiting_threads;
+ GetThreadsWaitingOnAddress(waiting_threads, address);
+
+ WakeThreads(waiting_threads, num_to_wake);
+ return RESULT_SUCCESS;
+}
+
+// Signals an address being waited on and increments its value if equal to the value argument.
+ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake) {
+ // Ensure that we can write to the address.
+ if (!Memory::IsValidVirtualAddress(address)) {
+ return ERR_INVALID_ADDRESS_STATE;
+ }
+
+ if (static_cast<s32>(Memory::Read32(address)) == value) {
+ Memory::Write32(address, static_cast<u32>(value + 1));
+ } else {
+ return ERR_INVALID_STATE;
+ }
+
+ return SignalToAddress(address, num_to_wake);
+}
+
+// Signals an address being waited on and modifies its value based on waiting thread count if equal
+// to the value argument.
+ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value,
+ s32 num_to_wake) {
+ // Ensure that we can write to the address.
+ if (!Memory::IsValidVirtualAddress(address)) {
+ return ERR_INVALID_ADDRESS_STATE;
+ }
+
+ // Get threads waiting on the address.
+ std::vector<SharedPtr<Thread>> waiting_threads;
+ GetThreadsWaitingOnAddress(waiting_threads, address);
+
+ // Determine the modified value depending on the waiting count.
+ s32 updated_value;
+ if (waiting_threads.size() == 0) {
+ updated_value = value - 1;
+ } else if (num_to_wake <= 0 || waiting_threads.size() <= num_to_wake) {
+ updated_value = value + 1;
+ } else {
+ updated_value = value;
+ }
+
+ if (static_cast<s32>(Memory::Read32(address)) == value) {
+ Memory::Write32(address, static_cast<u32>(updated_value));
+ } else {
+ return ERR_INVALID_STATE;
+ }
+
+ WakeThreads(waiting_threads, num_to_wake);
+ return RESULT_SUCCESS;
+}
+
+// Waits on an address if the value passed is less than the argument value, optionally decrementing.
+ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement) {
+ // Ensure that we can read the address.
+ if (!Memory::IsValidVirtualAddress(address)) {
+ return ERR_INVALID_ADDRESS_STATE;
+ }
+
+ s32 cur_value = static_cast<s32>(Memory::Read32(address));
+ if (cur_value < value) {
+ Memory::Write32(address, static_cast<u32>(cur_value - 1));
+ } else {
+ return ERR_INVALID_STATE;
+ }
+ // Short-circuit without rescheduling, if timeout is zero.
+ if (timeout == 0) {
+ return RESULT_TIMEOUT;
+ }
+
+ return WaitForAddress(address, timeout);
+}
+
+// Waits on an address if the value passed is equal to the argument value.
+ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout) {
+ // Ensure that we can read the address.
+ if (!Memory::IsValidVirtualAddress(address)) {
+ return ERR_INVALID_ADDRESS_STATE;
+ }
+ // Only wait for the address if equal.
+ if (static_cast<s32>(Memory::Read32(address)) != value) {
+ return ERR_INVALID_STATE;
+ }
+ // Short-circuit without rescheduling, if timeout is zero.
+ if (timeout == 0) {
+ return RESULT_TIMEOUT;
+ }
+
+ return WaitForAddress(address, timeout);
+}
+} // namespace AddressArbiter
+} // namespace Kernel
diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h
new file mode 100644
index 000000000..f20f3dbc0
--- /dev/null
+++ b/src/core/hle/kernel/address_arbiter.h
@@ -0,0 +1,32 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "core/hle/result.h"
+
+namespace Kernel {
+
+namespace AddressArbiter {
+enum class ArbitrationType {
+ WaitIfLessThan = 0,
+ DecrementAndWaitIfLessThan = 1,
+ WaitIfEqual = 2,
+};
+
+enum class SignalType {
+ Signal = 0,
+ IncrementAndSignalIfEqual = 1,
+ ModifyByWaitingCountAndSignalIfEqual = 2,
+};
+
+ResultCode SignalToAddress(VAddr address, s32 num_to_wake);
+ResultCode IncrementAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
+ResultCode ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr address, s32 value, s32 num_to_wake);
+
+ResultCode WaitForAddressIfLessThan(VAddr address, s32 value, s64 timeout, bool should_decrement);
+ResultCode WaitForAddressIfEqual(VAddr address, s32 value, s64 timeout);
+} // namespace AddressArbiter
+
+} // namespace Kernel
diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h
index e1b5430bf..221cb1bb5 100644
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -20,13 +20,16 @@ enum {
MaxConnectionsReached = 52,
// Confirmed Switch OS error codes
- MisalignedAddress = 102,
+ InvalidAddress = 102,
+ InvalidMemoryState = 106,
InvalidProcessorId = 113,
InvalidHandle = 114,
InvalidCombination = 116,
Timeout = 117,
SynchronizationCanceled = 118,
TooLarge = 119,
+ InvalidEnumValue = 120,
+ InvalidState = 125,
};
}
@@ -39,14 +42,15 @@ constexpr ResultCode ERR_SESSION_CLOSED_BY_REMOTE(-1);
constexpr ResultCode ERR_PORT_NAME_TOO_LONG(-1);
constexpr ResultCode ERR_WRONG_PERMISSION(-1);
constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED(-1);
-constexpr ResultCode ERR_INVALID_ENUM_VALUE(-1);
+constexpr ResultCode ERR_INVALID_ENUM_VALUE(ErrorModule::Kernel, ErrCodes::InvalidEnumValue);
constexpr ResultCode ERR_INVALID_ENUM_VALUE_FND(-1);
constexpr ResultCode ERR_INVALID_COMBINATION(-1);
constexpr ResultCode ERR_INVALID_COMBINATION_KERNEL(-1);
constexpr ResultCode ERR_OUT_OF_MEMORY(-1);
-constexpr ResultCode ERR_INVALID_ADDRESS(-1);
-constexpr ResultCode ERR_INVALID_ADDRESS_STATE(-1);
+constexpr ResultCode ERR_INVALID_ADDRESS(ErrorModule::Kernel, ErrCodes::InvalidAddress);
+constexpr ResultCode ERR_INVALID_ADDRESS_STATE(ErrorModule::Kernel, ErrCodes::InvalidMemoryState);
constexpr ResultCode ERR_INVALID_HANDLE(ErrorModule::Kernel, ErrCodes::InvalidHandle);
+constexpr ResultCode ERR_INVALID_STATE(ErrorModule::Kernel, ErrCodes::InvalidState);
constexpr ResultCode ERR_INVALID_POINTER(-1);
constexpr ResultCode ERR_INVALID_OBJECT_ADDR(-1);
constexpr ResultCode ERR_NOT_AUTHORIZED(-1);
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp
index 01904467e..b0d83f401 100644
--- a/src/core/hle/kernel/hle_ipc.cpp
+++ b/src/core/hle/kernel/hle_ipc.cpp
@@ -271,6 +271,11 @@ std::vector<u8> HLERequestContext::ReadBuffer(int buffer_index) const {
}
size_t HLERequestContext::WriteBuffer(const void* buffer, size_t size, int buffer_index) const {
+ if (size == 0) {
+ NGLOG_WARNING(Core, "skip empty buffer write");
+ return 0;
+ }
+
const bool is_buffer_b{BufferDescriptorB().size() && BufferDescriptorB()[buffer_index].Size()};
const size_t buffer_size{GetWriteBufferSize(buffer_index)};
if (size > buffer_size) {
diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp
index bc144f3de..65560226d 100644
--- a/src/core/hle/kernel/mutex.cpp
+++ b/src/core/hle/kernel/mutex.cpp
@@ -59,7 +59,7 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
Handle requesting_thread_handle) {
// The mutex address must be 4-byte aligned
if ((address % sizeof(u32)) != 0) {
- return ResultCode(ErrorModule::Kernel, ErrCodes::MisalignedAddress);
+ return ResultCode(ErrorModule::Kernel, ErrCodes::InvalidAddress);
}
SharedPtr<Thread> holding_thread = g_handle_table.Get<Thread>(holding_thread_handle);
@@ -97,7 +97,7 @@ ResultCode Mutex::TryAcquire(VAddr address, Handle holding_thread_handle,
ResultCode Mutex::Release(VAddr address) {
// The mutex address must be 4-byte aligned
if ((address % sizeof(u32)) != 0) {
- return ResultCode(ErrorModule::Kernel, ErrCodes::MisalignedAddress);
+ return ResultCode(ErrorModule::Kernel, ErrCodes::InvalidAddress);
}
auto [thread, num_waiters] = GetHighestPriorityMutexWaitingThread(GetCurrentThread(), address);
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index ec3601e8b..1a36e0d02 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -11,6 +11,7 @@
#include "common/string_util.h"
#include "core/core.h"
#include "core/core_timing.h"
+#include "core/hle/kernel/address_arbiter.h"
#include "core/hle/kernel/client_port.h"
#include "core/hle/kernel/client_session.h"
#include "core/hle/kernel/event.h"
@@ -316,6 +317,11 @@ static ResultCode GetInfo(u64* result, u64 info_id, u64 handle, u64 info_sub_id)
"(STUBBED) Attempted to query privileged process id bounds, returned 0");
*result = 0;
break;
+ case GetInfoType::UserExceptionContextAddr:
+ NGLOG_WARNING(Kernel_SVC,
+ "(STUBBED) Attempted to query user exception context address, returned 0");
+ *result = 0;
+ break;
default:
UNIMPLEMENTED();
}
@@ -575,7 +581,7 @@ static void SleepThread(s64 nanoseconds) {
Core::System::GetInstance().PrepareReschedule();
}
-/// Signal process wide key atomic
+/// Wait process wide key atomic
static ResultCode WaitProcessWideKeyAtomic(VAddr mutex_addr, VAddr condition_variable_addr,
Handle thread_handle, s64 nano_seconds) {
NGLOG_TRACE(
@@ -684,6 +690,58 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
return RESULT_SUCCESS;
}
+// Wait for an address (via Address Arbiter)
+static ResultCode WaitForAddress(VAddr address, u32 type, s32 value, s64 timeout) {
+ NGLOG_WARNING(Kernel_SVC, "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, timeout={}",
+ address, type, value, timeout);
+ // If the passed address is a kernel virtual address, return invalid memory state.
+ if (Memory::IsKernelVirtualAddress(address)) {
+ return ERR_INVALID_ADDRESS_STATE;
+ }
+ // If the address is not properly aligned to 4 bytes, return invalid address.
+ if (address % sizeof(u32) != 0) {
+ return ERR_INVALID_ADDRESS;
+ }
+
+ switch (static_cast<AddressArbiter::ArbitrationType>(type)) {
+ case AddressArbiter::ArbitrationType::WaitIfLessThan:
+ return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, false);
+ case AddressArbiter::ArbitrationType::DecrementAndWaitIfLessThan:
+ return AddressArbiter::WaitForAddressIfLessThan(address, value, timeout, true);
+ case AddressArbiter::ArbitrationType::WaitIfEqual:
+ return AddressArbiter::WaitForAddressIfEqual(address, value, timeout);
+ default:
+ return ERR_INVALID_ENUM_VALUE;
+ }
+}
+
+// Signals to an address (via Address Arbiter)
+static ResultCode SignalToAddress(VAddr address, u32 type, s32 value, s32 num_to_wake) {
+ NGLOG_WARNING(Kernel_SVC,
+ "called, address=0x{:X}, type=0x{:X}, value=0x{:X}, num_to_wake=0x{:X}", address,
+ type, value, num_to_wake);
+ // If the passed address is a kernel virtual address, return invalid memory state.
+ if (Memory::IsKernelVirtualAddress(address)) {
+ return ERR_INVALID_ADDRESS_STATE;
+ }
+ // If the address is not properly aligned to 4 bytes, return invalid address.
+ if (address % sizeof(u32) != 0) {
+ return ERR_INVALID_ADDRESS;
+ }
+
+ switch (static_cast<AddressArbiter::SignalType>(type)) {
+ case AddressArbiter::SignalType::Signal:
+ return AddressArbiter::SignalToAddress(address, num_to_wake);
+ case AddressArbiter::SignalType::IncrementAndSignalIfEqual:
+ return AddressArbiter::IncrementAndSignalToAddressIfEqual(address, value, num_to_wake);
+ case AddressArbiter::SignalType::ModifyByWaitingCountAndSignalIfEqual:
+ return AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(address, value,
+ num_to_wake);
+ default:
+ return ERR_INVALID_ENUM_VALUE;
+ }
+}
+
/// This returns the total CPU ticks elapsed since the CPU was powered-on
static u64 GetSystemTick() {
const u64 result{CoreTiming::GetTicks()};
@@ -744,7 +802,7 @@ static ResultCode SetThreadCoreMask(Handle thread_handle, u32 core, u64 mask) {
ASSERT(thread->owner_process->ideal_processor != THREADPROCESSORID_DEFAULT);
// Set the target CPU to the one specified in the process' exheader.
core = thread->owner_process->ideal_processor;
- mask = 1 << core;
+ mask = 1ull << core;
}
if (mask == 0) {
@@ -761,7 +819,7 @@ static ResultCode SetThreadCoreMask(Handle thread_handle, u32 core, u64 mask) {
}
// Error out if the input core isn't enabled in the input mask.
- if (core < Core::NUM_CPU_CORES && (mask & (1 << core)) == 0) {
+ if (core < Core::NUM_CPU_CORES && (mask & (1ull << core)) == 0) {
return ResultCode(ErrorModule::Kernel, ErrCodes::InvalidCombination);
}
@@ -856,8 +914,8 @@ static const FunctionDef SVC_Table[] = {
{0x31, nullptr, "GetResourceLimitCurrentValue"},
{0x32, SvcWrap<SetThreadActivity>, "SetThreadActivity"},
{0x33, SvcWrap<GetThreadContext>, "GetThreadContext"},
- {0x34, nullptr, "WaitForAddress"},
- {0x35, nullptr, "SignalToAddress"},
+ {0x34, SvcWrap<WaitForAddress>, "WaitForAddress"},
+ {0x35, SvcWrap<SignalToAddress>, "SignalToAddress"},
{0x36, nullptr, "Unknown"},
{0x37, nullptr, "Unknown"},
{0x38, nullptr, "Unknown"},
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index 40aa88cc1..79c3fe31b 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -179,6 +179,20 @@ void SvcWrap() {
FuncReturn(retval);
}
+template <ResultCode func(u64, u32, s32, s64)>
+void SvcWrap() {
+ FuncReturn(
+ func(PARAM(0), (u32)(PARAM(1) & 0xFFFFFFFF), (s32)(PARAM(2) & 0xFFFFFFFF), (s64)PARAM(3))
+ .raw);
+}
+
+template <ResultCode func(u64, u32, s32, s32)>
+void SvcWrap() {
+ FuncReturn(func(PARAM(0), (u32)(PARAM(1) & 0xFFFFFFFF), (s32)(PARAM(2) & 0xFFFFFFFF),
+ (s32)(PARAM(3) & 0xFFFFFFFF))
+ .raw);
+}
+
////////////////////////////////////////////////////////////////////////////////////////////////////
// Function wrappers that return type u32
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index cffa7ca83..2f333ec34 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -140,6 +140,11 @@ static void ThreadWakeupCallback(u64 thread_handle, int cycles_late) {
}
}
+ if (thread->arb_wait_address != 0) {
+ ASSERT(thread->status == THREADSTATUS_WAIT_ARB);
+ thread->arb_wait_address = 0;
+ }
+
if (resume)
thread->ResumeFromWait();
}
@@ -179,6 +184,7 @@ void Thread::ResumeFromWait() {
case THREADSTATUS_WAIT_SLEEP:
case THREADSTATUS_WAIT_IPC:
case THREADSTATUS_WAIT_MUTEX:
+ case THREADSTATUS_WAIT_ARB:
break;
case THREADSTATUS_READY:
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 1d2da6d50..f1e759802 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -45,6 +45,7 @@ enum ThreadStatus {
THREADSTATUS_WAIT_SYNCH_ANY, ///< Waiting due to WaitSynch1 or WaitSynchN with wait_all = false
THREADSTATUS_WAIT_SYNCH_ALL, ///< Waiting due to WaitSynchronizationN with wait_all = true
THREADSTATUS_WAIT_MUTEX, ///< Waiting due to an ArbitrateLock/WaitProcessWideKey svc
+ THREADSTATUS_WAIT_ARB, ///< Waiting due to a SignalToAddress/WaitForAddress svc
THREADSTATUS_DORMANT, ///< Created but not yet made ready
THREADSTATUS_DEAD ///< Run to completion, or forcefully terminated
};
@@ -230,6 +231,9 @@ public:
VAddr mutex_wait_address; ///< If waiting on a Mutex, this is the mutex address
Handle wait_handle; ///< The handle used to wait for the mutex.
+ // If waiting for an AddressArbiter, this is the address being waited on.
+ VAddr arb_wait_address{0};
+
std::string name;
/// Handle used by guest emulated application to access this thread
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 12954556d..b8d6b8d4d 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -561,7 +561,7 @@ IApplicationFunctions::IApplicationFunctions() : ServiceFramework("IApplicationF
{32, nullptr, "BeginBlockingHomeButton"},
{33, nullptr, "EndBlockingHomeButton"},
{40, &IApplicationFunctions::NotifyRunning, "NotifyRunning"},
- {50, nullptr, "GetPseudoDeviceId"},
+ {50, &IApplicationFunctions::GetPseudoDeviceId, "GetPseudoDeviceId"},
{60, nullptr, "SetMediaPlaybackStateForApplication"},
{65, nullptr, "IsGamePlayRecordingSupported"},
{66, &IApplicationFunctions::InitializeGamePlayRecording, "InitializeGamePlayRecording"},
@@ -684,6 +684,17 @@ void IApplicationFunctions::NotifyRunning(Kernel::HLERequestContext& ctx) {
NGLOG_WARNING(Service_AM, "(STUBBED) called");
}
+void IApplicationFunctions::GetPseudoDeviceId(Kernel::HLERequestContext& ctx) {
+ IPC::ResponseBuilder rb{ctx, 6};
+ rb.Push(RESULT_SUCCESS);
+
+ // Returns a 128-bit UUID
+ rb.Push<u64>(0);
+ rb.Push<u64>(0);
+
+ NGLOG_WARNING(Service_AM, "(STUBBED) called");
+}
+
void InstallInterfaces(SM::ServiceManager& service_manager,
std::shared_ptr<NVFlinger::NVFlinger> nvflinger) {
std::make_shared<AppletAE>(nvflinger)->InstallAsService(service_manager);
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h
index 301a6c798..1da79fd01 100644
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -138,6 +138,7 @@ private:
void InitializeGamePlayRecording(Kernel::HLERequestContext& ctx);
void SetGamePlayRecordingState(Kernel::HLERequestContext& ctx);
void NotifyRunning(Kernel::HLERequestContext& ctx);
+ void GetPseudoDeviceId(Kernel::HLERequestContext& ctx);
};
class IHomeMenuFunctions final : public ServiceFramework<IHomeMenuFunctions> {
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 6e8002bc9..44b7ef216 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -57,27 +57,26 @@ private:
}
void RequestUpdateAudioRenderer(Kernel::HLERequestContext& ctx) {
- NGLOG_DEBUG(Service_Audio, "{}", ctx.Description());
- AudioRendererResponseData response_data{};
-
- response_data.section_0_size =
- static_cast<u32>(response_data.state_entries.size() * sizeof(AudioRendererStateEntry));
- response_data.section_1_size = static_cast<u32>(response_data.section_1.size());
- response_data.section_2_size = static_cast<u32>(response_data.section_2.size());
- response_data.section_3_size = static_cast<u32>(response_data.section_3.size());
- response_data.section_4_size = static_cast<u32>(response_data.section_4.size());
- response_data.section_5_size = static_cast<u32>(response_data.section_5.size());
- response_data.total_size = sizeof(AudioRendererResponseData);
-
- for (unsigned i = 0; i < response_data.state_entries.size(); i++) {
- // 4 = Busy and 5 = Ready?
- response_data.state_entries[i].state = 5;
+ AudioRendererConfig config;
+ auto buf = ctx.ReadBuffer();
+ std::memcpy(&config, buf.data(), sizeof(AudioRendererConfig));
+
+ AudioRendererResponse response_data{config};
+
+ ASSERT(ctx.GetWriteBufferSize() == response_data.total_size);
+
+ std::vector<u8> output(response_data.total_size);
+ std::memcpy(output.data(), &response_data, sizeof(AudioRendererResponse));
+ std::vector<MemoryPoolEntry> memory_pool(config.memory_pools_size / 0x20);
+ for (auto& entry : memory_pool) {
+ entry.state = 5;
}
+ std::memcpy(output.data() + sizeof(AudioRendererResponse), memory_pool.data(),
+ response_data.memory_pools_size);
- ctx.WriteBuffer(&response_data, response_data.total_size);
+ ctx.WriteBuffer(output);
IPC::ResponseBuilder rb{ctx, 2};
-
rb.Push(RESULT_SUCCESS);
NGLOG_WARNING(Service_Audio, "(STUBBED) called");
@@ -109,43 +108,55 @@ private:
NGLOG_WARNING(Service_Audio, "(STUBBED) called");
}
- struct AudioRendererStateEntry {
+ struct MemoryPoolEntry {
u32_le state;
u32_le unknown_4;
u32_le unknown_8;
u32_le unknown_c;
};
- static_assert(sizeof(AudioRendererStateEntry) == 0x10,
- "AudioRendererStateEntry has wrong size");
-
- struct AudioRendererResponseData {
- u32_le unknown_0;
- u32_le section_5_size;
- u32_le section_0_size;
- u32_le section_1_size;
+ static_assert(sizeof(MemoryPoolEntry) == 0x10, "MemoryPoolEntry has wrong size");
+
+ struct AudioRendererConfig {
+ u32 revision;
+ u32 behavior_size;
+ u32 memory_pools_size;
+ u32 voices_size;
+ u32 voice_resource_size;
+ u32 effects_size;
+ u32 mixes_size;
+ u32 sinks_size;
+ u32 performance_buffer_size;
+ INSERT_PADDING_WORDS(6);
+ u32 total_size;
+ };
+ static_assert(sizeof(AudioRendererConfig) == 0x40, "AudioRendererConfig has wrong size");
+
+ struct AudioRendererResponse {
+ AudioRendererResponse(const AudioRendererConfig& config) {
+ revision = config.revision;
+ error_info_size = 0xb0;
+ memory_pools_size = (config.memory_pools_size / 0x20) * 0x10;
+ voices_size = (config.voices_size / 0x170) * 0x10;
+ effects_size = (config.effects_size / 0xC0) * 0x10;
+ sinks_size = (config.sinks_size / 0x140) * 0x20;
+ performance_manager_size = 0x10;
+ total_size = sizeof(AudioRendererResponse) + error_info_size + memory_pools_size +
+ voices_size + effects_size + sinks_size + performance_manager_size;
+ }
+
+ u32_le revision;
+ u32_le error_info_size;
+ u32_le memory_pools_size;
+ u32_le voices_size;
u32_le unknown_10;
- u32_le section_2_size;
+ u32_le effects_size;
u32_le unknown_18;
- u32_le section_3_size;
- u32_le section_4_size;
- u32_le unknown_24;
- u32_le unknown_28;
- u32_le unknown_2c;
- u32_le unknown_30;
- u32_le unknown_34;
- u32_le unknown_38;
+ u32_le sinks_size;
+ u32_le performance_manager_size;
+ INSERT_PADDING_WORDS(6);
u32_le total_size;
-
- std::array<AudioRendererStateEntry, 0x18e> state_entries;
-
- std::array<u8, 0x600> section_1;
- std::array<u8, 0xe0> section_2;
- std::array<u8, 0x20> section_3;
- std::array<u8, 0x10> section_4;
- std::array<u8, 0xb0> section_5;
};
- static_assert(sizeof(AudioRendererResponseData) == 0x20e0,
- "AudioRendererResponseData has wrong size");
+ static_assert(sizeof(AudioRendererResponse) == 0x40, "AudioRendererResponse has wrong size");
/// This is used to trigger the audio event callback.
CoreTiming::EventType* audio_event;
@@ -258,7 +269,7 @@ void AudRenU::OpenAudioRenderer(Kernel::HLERequestContext& ctx) {
void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
- auto params = rp.PopRaw<WorkerBufferParameters>();
+ auto params = rp.PopRaw<AudioRendererParameters>();
u64 buffer_sz = Common::AlignUp(4 * params.unknown8, 0x40);
buffer_sz += params.unknownC * 1024;
@@ -328,7 +339,7 @@ bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap
switch (feature) {
case AudioFeatures::Splitter:
- return version_num >= 2;
+ return version_num >= 2u;
default:
return false;
}
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h
index fe53de4ce..7dbd9b74d 100644
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -22,7 +22,7 @@ private:
void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx);
void GetAudioDevice(Kernel::HLERequestContext& ctx);
- struct WorkerBufferParameters {
+ struct AudioRendererParameters {
u32_le sample_rate;
u32_le sample_count;
u32_le unknown8;
@@ -38,8 +38,8 @@ private:
u8 padding2[4];
u32_le magic;
};
- static_assert(sizeof(WorkerBufferParameters) == 52,
- "WorkerBufferParameters is an invalid size");
+ static_assert(sizeof(AudioRendererParameters) == 52,
+ "AudioRendererParameters is an invalid size");
enum class AudioFeatures : u32 {
Splitter,
diff --git a/src/core/hle/service/filesystem/fsp_srv.cpp b/src/core/hle/service/filesystem/fsp_srv.cpp
index 8a47bb7af..1cf97e876 100644
--- a/src/core/hle/service/filesystem/fsp_srv.cpp
+++ b/src/core/hle/service/filesystem/fsp_srv.cpp
@@ -4,6 +4,7 @@
#include <cinttypes>
#include "common/logging/log.h"
+#include "common/string_util.h"
#include "core/core.h"
#include "core/file_sys/directory.h"
#include "core/file_sys/filesystem.h"
@@ -258,9 +259,7 @@ public:
IPC::RequestParser rp{ctx};
auto file_buffer = ctx.ReadBuffer();
- auto end = std::find(file_buffer.begin(), file_buffer.end(), '\0');
-
- std::string name(file_buffer.begin(), end);
+ std::string name = Common::StringFromBuffer(file_buffer);
u64 mode = rp.Pop<u64>();
u32 size = rp.Pop<u32>();
@@ -275,9 +274,7 @@ public:
IPC::RequestParser rp{ctx};
auto file_buffer = ctx.ReadBuffer();
- auto end = std::find(file_buffer.begin(), file_buffer.end(), '\0');
-
- std::string name(file_buffer.begin(), end);
+ std::string name = Common::StringFromBuffer(file_buffer);
NGLOG_DEBUG(Service_FS, "called file {}", name);
@@ -289,9 +286,7 @@ public:
IPC::RequestParser rp{ctx};
auto file_buffer = ctx.ReadBuffer();
- auto end = std::find(file_buffer.begin(), file_buffer.end(), '\0');
-
- std::string name(file_buffer.begin(), end);
+ std::string name = Common::StringFromBuffer(file_buffer);
NGLOG_DEBUG(Service_FS, "called directory {}", name);
@@ -305,13 +300,11 @@ public:
std::vector<u8> buffer;
buffer.resize(ctx.BufferDescriptorX()[0].Size());
Memory::ReadBlock(ctx.BufferDescriptorX()[0].Address(), buffer.data(), buffer.size());
- auto end = std::find(buffer.begin(), buffer.end(), '\0');
- std::string src_name(buffer.begin(), end);
+ std::string src_name = Common::StringFromBuffer(buffer);
buffer.resize(ctx.BufferDescriptorX()[1].Size());
Memory::ReadBlock(ctx.BufferDescriptorX()[1].Address(), buffer.data(), buffer.size());
- end = std::find(buffer.begin(), buffer.end(), '\0');
- std::string dst_name(buffer.begin(), end);
+ std::string dst_name = Common::StringFromBuffer(buffer);
NGLOG_DEBUG(Service_FS, "called file '{}' to file '{}'", src_name, dst_name);
@@ -323,9 +316,7 @@ public:
IPC::RequestParser rp{ctx};
auto file_buffer = ctx.ReadBuffer();
- auto end = std::find(file_buffer.begin(), file_buffer.end(), '\0');
-
- std::string name(file_buffer.begin(), end);
+ std::string name = Common::StringFromBuffer(file_buffer);
auto mode = static_cast<FileSys::Mode>(rp.Pop<u32>());
@@ -349,9 +340,7 @@ public:
IPC::RequestParser rp{ctx};
auto file_buffer = ctx.ReadBuffer();
- auto end = std::find(file_buffer.begin(), file_buffer.end(), '\0');
-
- std::string name(file_buffer.begin(), end);
+ std::string name = Common::StringFromBuffer(file_buffer);
// TODO(Subv): Implement this filter.
u32 filter_flags = rp.Pop<u32>();
@@ -376,9 +365,7 @@ public:
IPC::RequestParser rp{ctx};
auto file_buffer = ctx.ReadBuffer();
- auto end = std::find(file_buffer.begin(), file_buffer.end(), '\0');
-
- std::string name(file_buffer.begin(), end);
+ std::string name = Common::StringFromBuffer(file_buffer);
NGLOG_DEBUG(Service_FS, "called file {}", name);
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index 00c5308ba..2696a8bf0 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -84,6 +84,10 @@ private:
for (size_t controller = 0; controller < mem.controllers.size(); controller++) {
for (int index = 0; index < HID_NUM_LAYOUTS; index++) {
+ // TODO(DarkLordZach): Is this layout/controller config actually invalid?
+ if (controller == Controller_Handheld && index == Layout_Single)
+ continue;
+
ControllerLayout& layout = mem.controllers[controller].layouts[index];
layout.header.num_entries = HID_NUM_ENTRIES;
layout.header.max_entry_index = HID_NUM_ENTRIES - 1;
@@ -94,7 +98,6 @@ private:
layout.header.latest_entry = (layout.header.latest_entry + 1) % HID_NUM_ENTRIES;
ControllerInputEntry& entry = layout.entries[layout.header.latest_entry];
- entry.connection_state = ConnectionState_Connected | ConnectionState_Wired;
entry.timestamp++;
// TODO(shinyquagsire23): Is this always identical to timestamp?
entry.timestamp_2++;
@@ -103,6 +106,8 @@ private:
if (controller != Controller_Handheld)
continue;
+ entry.connection_state = ConnectionState_Connected | ConnectionState_Wired;
+
// TODO(shinyquagsire23): Set up some LUTs for each layout mapping in the future?
// For now everything is just the default handheld layout, but split Joy-Con will
// rotate the face buttons and directions for certain layouts.
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index 15eee8f01..b499308d6 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -12,7 +12,7 @@ namespace Service::HID {
// Begin enums and output structs
constexpr u32 HID_NUM_ENTRIES = 17;
-constexpr u32 HID_NUM_LAYOUTS = 2;
+constexpr u32 HID_NUM_LAYOUTS = 7;
constexpr s32 HID_JOYSTICK_MAX = 0x8000;
constexpr s32 HID_JOYSTICK_MIN = -0x8000;
diff --git a/src/core/hle/service/mm/mm_u.cpp b/src/core/hle/service/mm/mm_u.cpp
new file mode 100644
index 000000000..b3a85b818
--- /dev/null
+++ b/src/core/hle/service/mm/mm_u.cpp
@@ -0,0 +1,50 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/logging/log.h"
+#include "core/hle/ipc_helpers.h"
+#include "core/hle/kernel/client_session.h"
+#include "core/hle/service/mm/mm_u.h"
+
+namespace Service::MM {
+
+void InstallInterfaces(SM::ServiceManager& service_manager) {
+ std::make_shared<MM_U>()->InstallAsService(service_manager);
+}
+
+void MM_U::Initialize(Kernel::HLERequestContext& ctx) {
+ NGLOG_WARNING(Service_MM, "(STUBBED) called");
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(RESULT_SUCCESS);
+}
+
+void MM_U::SetAndWait(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ min = rp.Pop<u32>();
+ max = rp.Pop<u32>();
+ current = min;
+
+ NGLOG_WARNING(Service_MM, "(STUBBED) called, min=0x{:X}, max=0x{:X}", min, max);
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(RESULT_SUCCESS);
+}
+
+void MM_U::Get(Kernel::HLERequestContext& ctx) {
+ NGLOG_WARNING(Service_MM, "(STUBBED) called");
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(RESULT_SUCCESS);
+ rb.Push(current);
+}
+
+MM_U::MM_U() : ServiceFramework("mm:u") {
+ static const FunctionInfo functions[] = {
+ {0, nullptr, "InitializeOld"}, {1, nullptr, "FinalizeOld"},
+ {2, nullptr, "SetAndWaitOld"}, {3, nullptr, "GetOld"},
+ {4, &MM_U::Initialize, "Initialize"}, {5, nullptr, "Finalize"},
+ {6, &MM_U::SetAndWait, "SetAndWait"}, {7, &MM_U::Get, "Get"},
+ };
+ RegisterHandlers(functions);
+}
+
+} // namespace Service::MM
diff --git a/src/core/hle/service/mm/mm_u.h b/src/core/hle/service/mm/mm_u.h
new file mode 100644
index 000000000..79eeedf9c
--- /dev/null
+++ b/src/core/hle/service/mm/mm_u.h
@@ -0,0 +1,29 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "core/hle/service/service.h"
+
+namespace Service::MM {
+
+class MM_U final : public ServiceFramework<MM_U> {
+public:
+ MM_U();
+ ~MM_U() = default;
+
+private:
+ void Initialize(Kernel::HLERequestContext& ctx);
+ void SetAndWait(Kernel::HLERequestContext& ctx);
+ void Get(Kernel::HLERequestContext& ctx);
+
+ u32 min{0};
+ u32 max{0};
+ u32 current{0};
+};
+
+/// Registers all MM services with the specified service manager.
+void InstallInterfaces(SM::ServiceManager& service_manager);
+
+} // namespace Service::MM
diff --git a/src/core/hle/service/nfp/nfp.cpp b/src/core/hle/service/nfp/nfp.cpp
index 2af4465de..2a9f84037 100644
--- a/src/core/hle/service/nfp/nfp.cpp
+++ b/src/core/hle/service/nfp/nfp.cpp
@@ -4,6 +4,8 @@
#include "common/logging/log.h"
#include "core/hle/ipc_helpers.h"
+#include "core/hle/kernel/event.h"
+#include "core/hle/service/hid/hid.h"
#include "core/hle/service/nfp/nfp.h"
#include "core/hle/service/nfp/nfp_user.h"
@@ -18,7 +20,7 @@ public:
static const FunctionInfo functions[] = {
{0, &IUser::Initialize, "Initialize"},
{1, nullptr, "Finalize"},
- {2, nullptr, "ListDevices"},
+ {2, &IUser::ListDevices, "ListDevices"},
{3, nullptr, "StartDetection"},
{4, nullptr, "StopDetection"},
{5, nullptr, "Mount"},
@@ -33,24 +35,116 @@ public:
{14, nullptr, "GetRegisterInfo"},
{15, nullptr, "GetCommonInfo"},
{16, nullptr, "GetModelInfo"},
- {17, nullptr, "AttachActivateEvent"},
- {18, nullptr, "AttachDeactivateEvent"},
- {19, nullptr, "GetState"},
- {20, nullptr, "GetDeviceState"},
- {21, nullptr, "GetNpadId"},
+ {17, &IUser::AttachActivateEvent, "AttachActivateEvent"},
+ {18, &IUser::AttachDeactivateEvent, "AttachDeactivateEvent"},
+ {19, &IUser::GetState, "GetState"},
+ {20, &IUser::GetDeviceState, "GetDeviceState"},
+ {21, &IUser::GetNpadId, "GetNpadId"},
{22, nullptr, "GetApplicationArea2"},
- {23, nullptr, "AttachAvailabilityChangeEvent"},
+ {23, &IUser::AttachAvailabilityChangeEvent, "AttachAvailabilityChangeEvent"},
{24, nullptr, "RecreateApplicationArea"},
};
RegisterHandlers(functions);
+
+ activate_event = Kernel::Event::Create(Kernel::ResetType::OneShot, "IUser:ActivateEvent");
+ deactivate_event =
+ Kernel::Event::Create(Kernel::ResetType::OneShot, "IUser:DeactivateEvent");
+ availability_change_event =
+ Kernel::Event::Create(Kernel::ResetType::OneShot, "IUser:AvailabilityChangeEvent");
}
private:
+ enum class State : u32 {
+ NonInitialized = 0,
+ Initialized = 1,
+ };
+
+ enum class DeviceState : u32 {
+ Initialized = 0,
+ };
+
void Initialize(Kernel::HLERequestContext& ctx) {
NGLOG_WARNING(Service_NFP, "(STUBBED) called");
+
+ state = State::Initialized;
+
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
+
+ void ListDevices(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const u32 array_size = rp.Pop<u32>();
+
+ ctx.WriteBuffer(&device_handle, sizeof(device_handle));
+
+ NGLOG_WARNING(Service_NFP, "(STUBBED) called, array_size={}", array_size);
+
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(RESULT_SUCCESS);
+ rb.Push<u32>(0);
+ }
+
+ void AttachActivateEvent(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const u64 dev_handle = rp.Pop<u64>();
+ NGLOG_WARNING(Service_NFP, "(STUBBED) called, dev_handle=0x{:X}", dev_handle);
+
+ IPC::ResponseBuilder rb{ctx, 2, 1};
+ rb.Push(RESULT_SUCCESS);
+ rb.PushCopyObjects(activate_event);
+ }
+
+ void AttachDeactivateEvent(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const u64 dev_handle = rp.Pop<u64>();
+ NGLOG_WARNING(Service_NFP, "(STUBBED) called, dev_handle=0x{:X}", dev_handle);
+
+ IPC::ResponseBuilder rb{ctx, 2, 1};
+ rb.Push(RESULT_SUCCESS);
+ rb.PushCopyObjects(deactivate_event);
+ }
+
+ void GetState(Kernel::HLERequestContext& ctx) {
+ NGLOG_WARNING(Service_NFP, "(STUBBED) called");
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(RESULT_SUCCESS);
+ rb.Push<u32>(static_cast<u32>(state));
+ }
+
+ void GetDeviceState(Kernel::HLERequestContext& ctx) {
+ NGLOG_WARNING(Service_NFP, "(STUBBED) called");
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(RESULT_SUCCESS);
+ rb.Push<u32>(static_cast<u32>(device_state));
+ }
+
+ void GetNpadId(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const u64 dev_handle = rp.Pop<u64>();
+ NGLOG_WARNING(Service_NFP, "(STUBBED) called, dev_handle=0x{:X}", dev_handle);
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(RESULT_SUCCESS);
+ rb.Push<u32>(npad_id);
+ }
+
+ void AttachAvailabilityChangeEvent(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const u64 dev_handle = rp.Pop<u64>();
+ NGLOG_WARNING(Service_NFP, "(STUBBED) called, dev_handle=0x{:X}", dev_handle);
+
+ IPC::ResponseBuilder rb{ctx, 2, 1};
+ rb.Push(RESULT_SUCCESS);
+ rb.PushCopyObjects(availability_change_event);
+ }
+
+ const u64 device_handle{0xDEAD};
+ const HID::ControllerID npad_id{HID::Controller_Player1};
+ State state{State::NonInitialized};
+ DeviceState device_state{DeviceState::Initialized};
+ Kernel::SharedPtr<Kernel::Event> activate_event;
+ Kernel::SharedPtr<Kernel::Event> deactivate_event;
+ Kernel::SharedPtr<Kernel::Event> availability_change_event;
};
void Module::Interface::CreateUserInterface(Kernel::HLERequestContext& ctx) {
diff --git a/src/core/hle/service/nifm/nifm.cpp b/src/core/hle/service/nifm/nifm.cpp
index eee92cfcd..62489c7fe 100644
--- a/src/core/hle/service/nifm/nifm.cpp
+++ b/src/core/hle/service/nifm/nifm.cpp
@@ -38,7 +38,7 @@ public:
{8, nullptr, "SetPriority"},
{9, nullptr, "SetNetworkProfileId"},
{10, nullptr, "SetRejectable"},
- {11, nullptr, "SetConnectionConfirmationOption"},
+ {11, &IRequest::SetConnectionConfirmationOption, "SetConnectionConfirmationOption"},
{12, nullptr, "SetPersistent"},
{13, nullptr, "SetInstant"},
{14, nullptr, "SetSustainable"},
@@ -67,23 +67,32 @@ private:
rb.Push(RESULT_SUCCESS);
rb.Push<u32>(0);
}
+
void GetResult(Kernel::HLERequestContext& ctx) {
NGLOG_WARNING(Service_NIFM, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
+
void GetSystemEventReadableHandles(Kernel::HLERequestContext& ctx) {
NGLOG_WARNING(Service_NIFM, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 2, 2};
rb.Push(RESULT_SUCCESS);
rb.PushCopyObjects(event1, event2);
}
+
void Cancel(Kernel::HLERequestContext& ctx) {
NGLOG_WARNING(Service_NIFM, "(STUBBED) called");
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
}
+ void SetConnectionConfirmationOption(Kernel::HLERequestContext& ctx) {
+ NGLOG_WARNING(Service_NIFM, "(STUBBED) called");
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(RESULT_SUCCESS);
+ }
+
Kernel::SharedPtr<Kernel::Event> event1, event2;
};
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
index a9538ff43..0abc0de83 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -26,6 +26,10 @@ u32 nvhost_ctrl_gpu::ioctl(Ioctl command, const std::vector<u8>& input, std::vec
return ZCullGetInfo(input, output);
case IoctlCommand::IocZbcSetTable:
return ZBCSetTable(input, output);
+ case IoctlCommand::IocZbcQueryTable:
+ return ZBCQueryTable(input, output);
+ case IoctlCommand::IocFlushL2:
+ return FlushL2(input, output);
}
UNIMPLEMENTED_MSG("Unimplemented ioctl");
return 0;
@@ -136,4 +140,22 @@ u32 nvhost_ctrl_gpu::ZBCSetTable(const std::vector<u8>& input, std::vector<u8>&
return 0;
}
+u32 nvhost_ctrl_gpu::ZBCQueryTable(const std::vector<u8>& input, std::vector<u8>& output) {
+ NGLOG_WARNING(Service_NVDRV, "(STUBBED) called");
+ IoctlZbcQueryTable params{};
+ std::memcpy(&params, input.data(), input.size());
+ // TODO : To implement properly
+ std::memcpy(output.data(), &params, output.size());
+ return 0;
+}
+
+u32 nvhost_ctrl_gpu::FlushL2(const std::vector<u8>& input, std::vector<u8>& output) {
+ NGLOG_WARNING(Service_NVDRV, "(STUBBED) called");
+ IoctlFlushL2 params{};
+ std::memcpy(&params, input.data(), input.size());
+ // TODO : To implement properly
+ std::memcpy(output.data(), &params, output.size());
+ return 0;
+}
+
} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
index 1d5ba2e67..f09113e67 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
@@ -26,6 +26,18 @@ private:
IocZcullGetCtxSizeCommand = 0x80044701,
IocZcullGetInfo = 0x80284702,
IocZbcSetTable = 0x402C4703,
+ IocZbcQueryTable = 0xC0344704,
+ IocFlushL2 = 0x40084707,
+ IocInvalICache = 0x4008470D,
+ IocSetMmudebugMode = 0x4008470E,
+ IocSetSmDebugMode = 0x4010470F,
+ IocWaitForPause = 0xC0084710,
+ IocGetTcpExceptionEnStatus = 0x80084711,
+ IocNumVsms = 0x80084712,
+ IocVsmsMapping = 0xC0044713,
+ IocGetErrorChannelUserData = 0xC008471B,
+ IocGetGpuTime = 0xC010471C,
+ IocGetCpuTimeCorrelationInfo = 0xC108471D,
};
struct IoctlGpuCharacteristics {
@@ -127,12 +139,31 @@ private:
};
static_assert(sizeof(IoctlZbcSetTable) == 44, "IoctlZbcSetTable is incorrect size");
+ struct IoctlZbcQueryTable {
+ u32_le color_ds[4];
+ u32_le color_l2[4];
+ u32_le depth;
+ u32_le ref_cnt;
+ u32_le format;
+ u32_le type;
+ u32_le index_size;
+ };
+ static_assert(sizeof(IoctlZbcQueryTable) == 52, "IoctlZbcQueryTable is incorrect size");
+
+ struct IoctlFlushL2 {
+ u32_le flush; // l2_flush | l2_invalidate << 1 | fb_flush << 2
+ u32_le reserved;
+ };
+ static_assert(sizeof(IoctlFlushL2) == 8, "IoctlFlushL2 is incorrect size");
+
u32 GetCharacteristics(const std::vector<u8>& input, std::vector<u8>& output);
u32 GetTPCMasks(const std::vector<u8>& input, std::vector<u8>& output);
u32 GetActiveSlotMask(const std::vector<u8>& input, std::vector<u8>& output);
u32 ZCullGetCtxSize(const std::vector<u8>& input, std::vector<u8>& output);
u32 ZCullGetInfo(const std::vector<u8>& input, std::vector<u8>& output);
u32 ZBCSetTable(const std::vector<u8>& input, std::vector<u8>& output);
+ u32 ZBCQueryTable(const std::vector<u8>& input, std::vector<u8>& output);
+ u32 FlushL2(const std::vector<u8>& input, std::vector<u8>& output);
};
} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 79aab87f9..ed7b6dc03 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -121,8 +121,9 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector<
}
u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) {
- if (input.size() < sizeof(IoctlSubmitGpfifo))
+ if (input.size() < sizeof(IoctlSubmitGpfifo)) {
UNIMPLEMENTED();
+ }
IoctlSubmitGpfifo params{};
std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
NGLOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp
index 409fec470..bdd9eb5a5 100644
--- a/src/core/hle/service/service.cpp
+++ b/src/core/hle/service/service.cpp
@@ -26,6 +26,7 @@
#include "core/hle/service/friend/friend.h"
#include "core/hle/service/hid/hid.h"
#include "core/hle/service/lm/lm.h"
+#include "core/hle/service/mm/mm_u.h"
#include "core/hle/service/nfp/nfp.h"
#include "core/hle/service/nifm/nifm.h"
#include "core/hle/service/ns/ns.h"
@@ -191,6 +192,7 @@ void Init(std::shared_ptr<SM::ServiceManager>& sm) {
Friend::InstallInterfaces(*sm);
HID::InstallInterfaces(*sm);
LM::InstallInterfaces(*sm);
+ MM::InstallInterfaces(*sm);
NFP::InstallInterfaces(*sm);
NIFM::InstallInterfaces(*sm);
NS::InstallInterfaces(*sm);
diff --git a/src/core/hle/service/set/set.cpp b/src/core/hle/service/set/set.cpp
index f0572bed6..baeecb0ec 100644
--- a/src/core/hle/service/set/set.cpp
+++ b/src/core/hle/service/set/set.cpp
@@ -12,9 +12,6 @@
namespace Service::Set {
void SET::GetAvailableLanguageCodes(Kernel::HLERequestContext& ctx) {
- IPC::RequestParser rp{ctx};
- u32 id = rp.Pop<u32>();
-
static constexpr std::array<LanguageCode, 17> available_language_codes = {{
LanguageCode::JA,
LanguageCode::EN_US,
@@ -50,7 +47,7 @@ SET::SET() : ServiceFramework("set") {
{2, nullptr, "MakeLanguageCode"},
{3, nullptr, "GetAvailableLanguageCodeCount"},
{4, nullptr, "GetRegionCode"},
- {5, nullptr, "GetAvailableLanguageCodes2"},
+ {5, &SET::GetAvailableLanguageCodes, "GetAvailableLanguageCodes2"},
{6, nullptr, "GetAvailableLanguageCodeCount2"},
{7, nullptr, "GetKeyCodeMap"},
{8, nullptr, "GetQuestFlag"},
diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp
index 6a4fd38cb..20cc0bac0 100644
--- a/src/core/loader/loader.cpp
+++ b/src/core/loader/loader.cpp
@@ -9,6 +9,7 @@
#include "core/hle/kernel/process.h"
#include "core/loader/deconstructed_rom_directory.h"
#include "core/loader/elf.h"
+#include "core/loader/nca.h"
#include "core/loader/nro.h"
#include "core/loader/nso.h"
@@ -32,6 +33,7 @@ FileType IdentifyFile(FileUtil::IOFile& file, const std::string& filepath) {
CHECK_TYPE(ELF)
CHECK_TYPE(NSO)
CHECK_TYPE(NRO)
+ CHECK_TYPE(NCA)
#undef CHECK_TYPE
@@ -57,6 +59,8 @@ FileType GuessFromExtension(const std::string& extension_) {
return FileType::NRO;
else if (extension == ".nso")
return FileType::NSO;
+ else if (extension == ".nca")
+ return FileType::NCA;
return FileType::Unknown;
}
@@ -69,6 +73,8 @@ const char* GetFileTypeString(FileType type) {
return "NRO";
case FileType::NSO:
return "NSO";
+ case FileType::NCA:
+ return "NCA";
case FileType::DeconstructedRomDirectory:
return "Directory";
case FileType::Error:
@@ -104,6 +110,10 @@ static std::unique_ptr<AppLoader> GetFileLoader(FileUtil::IOFile&& file, FileTyp
case FileType::NRO:
return std::make_unique<AppLoader_NRO>(std::move(file), filepath);
+ // NX NCA file format.
+ case FileType::NCA:
+ return std::make_unique<AppLoader_NCA>(std::move(file), filepath);
+
// NX deconstructed ROM directory.
case FileType::DeconstructedRomDirectory:
return std::make_unique<AppLoader_DeconstructedRomDirectory>(std::move(file), filepath);
diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h
index b1aabb1cb..b76f7b13d 100644
--- a/src/core/loader/loader.h
+++ b/src/core/loader/loader.h
@@ -29,6 +29,7 @@ enum class FileType {
ELF,
NSO,
NRO,
+ NCA,
DeconstructedRomDirectory,
};
diff --git a/src/core/loader/nca.cpp b/src/core/loader/nca.cpp
new file mode 100644
index 000000000..067945d46
--- /dev/null
+++ b/src/core/loader/nca.cpp
@@ -0,0 +1,303 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <vector>
+
+#include "common/common_funcs.h"
+#include "common/file_util.h"
+#include "common/logging/log.h"
+#include "common/swap.h"
+#include "core/core.h"
+#include "core/file_sys/program_metadata.h"
+#include "core/file_sys/romfs_factory.h"
+#include "core/hle/kernel/process.h"
+#include "core/hle/kernel/resource_limit.h"
+#include "core/hle/service/filesystem/filesystem.h"
+#include "core/loader/nca.h"
+#include "core/loader/nso.h"
+#include "core/memory.h"
+
+namespace Loader {
+
+// Media offsets in headers are stored divided by 512. Mult. by this to get real offset.
+constexpr u64 MEDIA_OFFSET_MULTIPLIER = 0x200;
+
+constexpr u64 SECTION_HEADER_SIZE = 0x200;
+constexpr u64 SECTION_HEADER_OFFSET = 0x400;
+
+enum class NcaContentType : u8 { Program = 0, Meta = 1, Control = 2, Manual = 3, Data = 4 };
+
+enum class NcaSectionFilesystemType : u8 { PFS0 = 0x2, ROMFS = 0x3 };
+
+struct NcaSectionTableEntry {
+ u32_le media_offset;
+ u32_le media_end_offset;
+ INSERT_PADDING_BYTES(0x8);
+};
+static_assert(sizeof(NcaSectionTableEntry) == 0x10, "NcaSectionTableEntry has incorrect size.");
+
+struct NcaHeader {
+ std::array<u8, 0x100> rsa_signature_1;
+ std::array<u8, 0x100> rsa_signature_2;
+ u32_le magic;
+ u8 is_system;
+ NcaContentType content_type;
+ u8 crypto_type;
+ u8 key_index;
+ u64_le size;
+ u64_le title_id;
+ INSERT_PADDING_BYTES(0x4);
+ u32_le sdk_version;
+ u8 crypto_type_2;
+ INSERT_PADDING_BYTES(15);
+ std::array<u8, 0x10> rights_id;
+ std::array<NcaSectionTableEntry, 0x4> section_tables;
+ std::array<std::array<u8, 0x20>, 0x4> hash_tables;
+ std::array<std::array<u8, 0x10>, 0x4> key_area;
+ INSERT_PADDING_BYTES(0xC0);
+};
+static_assert(sizeof(NcaHeader) == 0x400, "NcaHeader has incorrect size.");
+
+struct NcaSectionHeaderBlock {
+ INSERT_PADDING_BYTES(3);
+ NcaSectionFilesystemType filesystem_type;
+ u8 crypto_type;
+ INSERT_PADDING_BYTES(3);
+};
+static_assert(sizeof(NcaSectionHeaderBlock) == 0x8, "NcaSectionHeaderBlock has incorrect size.");
+
+struct Pfs0Superblock {
+ NcaSectionHeaderBlock header_block;
+ std::array<u8, 0x20> hash;
+ u32_le size;
+ INSERT_PADDING_BYTES(4);
+ u64_le hash_table_offset;
+ u64_le hash_table_size;
+ u64_le pfs0_header_offset;
+ u64_le pfs0_size;
+ INSERT_PADDING_BYTES(432);
+};
+static_assert(sizeof(Pfs0Superblock) == 0x200, "Pfs0Superblock has incorrect size.");
+
+static bool IsValidNca(const NcaHeader& header) {
+ return header.magic == Common::MakeMagic('N', 'C', 'A', '2') ||
+ header.magic == Common::MakeMagic('N', 'C', 'A', '3');
+}
+
+// TODO(DarkLordZach): Add support for encrypted.
+class Nca final {
+ std::vector<FileSys::PartitionFilesystem> pfs;
+ std::vector<u64> pfs_offset;
+
+ u64 romfs_offset = 0;
+ u64 romfs_size = 0;
+
+ boost::optional<u8> exefs_id = boost::none;
+
+ FileUtil::IOFile file;
+ std::string path;
+
+ u64 GetExeFsFileOffset(const std::string& file_name) const;
+ u64 GetExeFsFileSize(const std::string& file_name) const;
+
+public:
+ ResultStatus Load(FileUtil::IOFile&& file, std::string path);
+
+ FileSys::PartitionFilesystem GetPfs(u8 id) const;
+
+ u64 GetRomFsOffset() const;
+ u64 GetRomFsSize() const;
+
+ std::vector<u8> GetExeFsFile(const std::string& file_name);
+};
+
+static bool IsPfsExeFs(const FileSys::PartitionFilesystem& pfs) {
+ // According to switchbrew, an exefs must only contain these two files:
+ return pfs.GetFileSize("main") > 0 && pfs.GetFileSize("main.npdm") > 0;
+}
+
+ResultStatus Nca::Load(FileUtil::IOFile&& in_file, std::string in_path) {
+ file = std::move(in_file);
+ path = in_path;
+ file.Seek(0, SEEK_SET);
+ std::array<u8, sizeof(NcaHeader)> header_array{};
+ if (sizeof(NcaHeader) != file.ReadBytes(header_array.data(), sizeof(NcaHeader)))
+ NGLOG_CRITICAL(Loader, "File reader errored out during header read.");
+
+ NcaHeader header{};
+ std::memcpy(&header, header_array.data(), sizeof(NcaHeader));
+ if (!IsValidNca(header))
+ return ResultStatus::ErrorInvalidFormat;
+
+ int number_sections =
+ std::count_if(std::begin(header.section_tables), std::end(header.section_tables),
+ [](NcaSectionTableEntry entry) { return entry.media_offset > 0; });
+
+ for (int i = 0; i < number_sections; ++i) {
+ // Seek to beginning of this section.
+ file.Seek(SECTION_HEADER_OFFSET + i * SECTION_HEADER_SIZE, SEEK_SET);
+ std::array<u8, sizeof(NcaSectionHeaderBlock)> array{};
+ if (sizeof(NcaSectionHeaderBlock) !=
+ file.ReadBytes(array.data(), sizeof(NcaSectionHeaderBlock)))
+ NGLOG_CRITICAL(Loader, "File reader errored out during header read.");
+
+ NcaSectionHeaderBlock block{};
+ std::memcpy(&block, array.data(), sizeof(NcaSectionHeaderBlock));
+
+ if (block.filesystem_type == NcaSectionFilesystemType::ROMFS) {
+ romfs_offset = header.section_tables[i].media_offset * MEDIA_OFFSET_MULTIPLIER;
+ romfs_size =
+ header.section_tables[i].media_end_offset * MEDIA_OFFSET_MULTIPLIER - romfs_offset;
+ } else if (block.filesystem_type == NcaSectionFilesystemType::PFS0) {
+ Pfs0Superblock sb{};
+ // Seek back to beginning of this section.
+ file.Seek(SECTION_HEADER_OFFSET + i * SECTION_HEADER_SIZE, SEEK_SET);
+ if (sizeof(Pfs0Superblock) != file.ReadBytes(&sb, sizeof(Pfs0Superblock)))
+ NGLOG_CRITICAL(Loader, "File reader errored out during header read.");
+
+ u64 offset = (static_cast<u64>(header.section_tables[i].media_offset) *
+ MEDIA_OFFSET_MULTIPLIER) +
+ sb.pfs0_header_offset;
+ FileSys::PartitionFilesystem npfs{};
+ ResultStatus status = npfs.Load(path, offset);
+
+ if (status == ResultStatus::Success) {
+ pfs.emplace_back(std::move(npfs));
+ pfs_offset.emplace_back(offset);
+ }
+ }
+ }
+
+ for (size_t i = 0; i < pfs.size(); ++i) {
+ if (IsPfsExeFs(pfs[i]))
+ exefs_id = i;
+ }
+
+ return ResultStatus::Success;
+}
+
+FileSys::PartitionFilesystem Nca::GetPfs(u8 id) const {
+ return pfs[id];
+}
+
+u64 Nca::GetExeFsFileOffset(const std::string& file_name) const {
+ if (exefs_id == boost::none)
+ return 0;
+ return pfs[*exefs_id].GetFileOffset(file_name) + pfs_offset[*exefs_id];
+}
+
+u64 Nca::GetExeFsFileSize(const std::string& file_name) const {
+ if (exefs_id == boost::none)
+ return 0;
+ return pfs[*exefs_id].GetFileSize(file_name);
+}
+
+u64 Nca::GetRomFsOffset() const {
+ return romfs_offset;
+}
+
+u64 Nca::GetRomFsSize() const {
+ return romfs_size;
+}
+
+std::vector<u8> Nca::GetExeFsFile(const std::string& file_name) {
+ std::vector<u8> out(GetExeFsFileSize(file_name));
+ file.Seek(GetExeFsFileOffset(file_name), SEEK_SET);
+ file.ReadBytes(out.data(), GetExeFsFileSize(file_name));
+ return out;
+}
+
+AppLoader_NCA::AppLoader_NCA(FileUtil::IOFile&& file, std::string filepath)
+ : AppLoader(std::move(file)), filepath(std::move(filepath)) {}
+
+FileType AppLoader_NCA::IdentifyType(FileUtil::IOFile& file, const std::string&) {
+ file.Seek(0, SEEK_SET);
+ std::array<u8, 0x400> header_enc_array{};
+ if (0x400 != file.ReadBytes(header_enc_array.data(), 0x400))
+ return FileType::Error;
+
+ // TODO(DarkLordZach): Assuming everything is decrypted. Add crypto support.
+ NcaHeader header{};
+ std::memcpy(&header, header_enc_array.data(), sizeof(NcaHeader));
+
+ if (IsValidNca(header) && header.content_type == NcaContentType::Program)
+ return FileType::NCA;
+
+ return FileType::Error;
+}
+
+ResultStatus AppLoader_NCA::Load(Kernel::SharedPtr<Kernel::Process>& process) {
+ if (is_loaded) {
+ return ResultStatus::ErrorAlreadyLoaded;
+ }
+ if (!file.IsOpen()) {
+ return ResultStatus::Error;
+ }
+
+ nca = std::make_unique<Nca>();
+ ResultStatus result = nca->Load(std::move(file), filepath);
+ if (result != ResultStatus::Success) {
+ return result;
+ }
+
+ result = metadata.Load(nca->GetExeFsFile("main.npdm"));
+ if (result != ResultStatus::Success) {
+ return result;
+ }
+ metadata.Print();
+
+ const FileSys::ProgramAddressSpaceType arch_bits{metadata.GetAddressSpaceType()};
+ if (arch_bits == FileSys::ProgramAddressSpaceType::Is32Bit) {
+ return ResultStatus::ErrorUnsupportedArch;
+ }
+
+ VAddr next_load_addr{Memory::PROCESS_IMAGE_VADDR};
+ for (const auto& module : {"rtld", "main", "subsdk0", "subsdk1", "subsdk2", "subsdk3",
+ "subsdk4", "subsdk5", "subsdk6", "subsdk7", "sdk"}) {
+ const VAddr load_addr = next_load_addr;
+ next_load_addr = AppLoader_NSO::LoadModule(module, nca->GetExeFsFile(module), load_addr);
+ if (next_load_addr) {
+ NGLOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", module, load_addr);
+ } else {
+ next_load_addr = load_addr;
+ }
+ }
+
+ process->program_id = metadata.GetTitleID();
+ process->svc_access_mask.set();
+ process->address_mappings = default_address_mappings;
+ process->resource_limit =
+ Kernel::ResourceLimit::GetForCategory(Kernel::ResourceLimitCategory::APPLICATION);
+ process->Run(Memory::PROCESS_IMAGE_VADDR, metadata.GetMainThreadPriority(),
+ metadata.GetMainThreadStackSize());
+
+ if (nca->GetRomFsSize() > 0)
+ Service::FileSystem::RegisterFileSystem(std::make_unique<FileSys::RomFS_Factory>(*this),
+ Service::FileSystem::Type::RomFS);
+
+ is_loaded = true;
+ return ResultStatus::Success;
+}
+
+ResultStatus AppLoader_NCA::ReadRomFS(std::shared_ptr<FileUtil::IOFile>& romfs_file, u64& offset,
+ u64& size) {
+ if (nca->GetRomFsSize() == 0) {
+ NGLOG_DEBUG(Loader, "No RomFS available");
+ return ResultStatus::ErrorNotUsed;
+ }
+
+ romfs_file = std::make_shared<FileUtil::IOFile>(filepath, "rb");
+
+ offset = nca->GetRomFsOffset();
+ size = nca->GetRomFsSize();
+
+ NGLOG_DEBUG(Loader, "RomFS offset: 0x{:016X}", offset);
+ NGLOG_DEBUG(Loader, "RomFS size: 0x{:016X}", size);
+
+ return ResultStatus::Success;
+}
+
+AppLoader_NCA::~AppLoader_NCA() = default;
+
+} // namespace Loader
diff --git a/src/core/loader/nca.h b/src/core/loader/nca.h
new file mode 100644
index 000000000..3b6c451d0
--- /dev/null
+++ b/src/core/loader/nca.h
@@ -0,0 +1,49 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <string>
+#include "common/common_types.h"
+#include "core/file_sys/partition_filesystem.h"
+#include "core/file_sys/program_metadata.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/loader/loader.h"
+
+namespace Loader {
+
+class Nca;
+
+/// Loads an NCA file
+class AppLoader_NCA final : public AppLoader {
+public:
+ AppLoader_NCA(FileUtil::IOFile&& file, std::string filepath);
+
+ /**
+ * Returns the type of the file
+ * @param file FileUtil::IOFile open file
+ * @param filepath Path of the file that we are opening.
+ * @return FileType found, or FileType::Error if this loader doesn't know it
+ */
+ static FileType IdentifyType(FileUtil::IOFile& file, const std::string& filepath);
+
+ FileType GetFileType() override {
+ return IdentifyType(file, filepath);
+ }
+
+ ResultStatus Load(Kernel::SharedPtr<Kernel::Process>& process) override;
+
+ ResultStatus ReadRomFS(std::shared_ptr<FileUtil::IOFile>& romfs_file, u64& offset,
+ u64& size) override;
+
+ ~AppLoader_NCA();
+
+private:
+ std::string filepath;
+ FileSys::ProgramMetadata metadata;
+
+ std::unique_ptr<Nca> nca;
+};
+
+} // namespace Loader
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index 01be9e217..845ed7e90 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -66,8 +66,22 @@ FileType AppLoader_NSO::IdentifyType(FileUtil::IOFile& file, const std::string&)
return FileType::Error;
}
+static std::vector<u8> DecompressSegment(const std::vector<u8>& compressed_data,
+ const NsoSegmentHeader& header) {
+ std::vector<u8> uncompressed_data;
+ uncompressed_data.resize(header.size);
+ const int bytes_uncompressed = LZ4_decompress_safe(
+ reinterpret_cast<const char*>(compressed_data.data()),
+ reinterpret_cast<char*>(uncompressed_data.data()), compressed_data.size(), header.size);
+
+ ASSERT_MSG(bytes_uncompressed == header.size && bytes_uncompressed == uncompressed_data.size(),
+ "{} != {} != {}", bytes_uncompressed, header.size, uncompressed_data.size());
+
+ return uncompressed_data;
+}
+
static std::vector<u8> ReadSegment(FileUtil::IOFile& file, const NsoSegmentHeader& header,
- int compressed_size) {
+ size_t compressed_size) {
std::vector<u8> compressed_data;
compressed_data.resize(compressed_size);
@@ -77,22 +91,65 @@ static std::vector<u8> ReadSegment(FileUtil::IOFile& file, const NsoSegmentHeade
return {};
}
- std::vector<u8> uncompressed_data;
- uncompressed_data.resize(header.size);
- const int bytes_uncompressed = LZ4_decompress_safe(
- reinterpret_cast<const char*>(compressed_data.data()),
- reinterpret_cast<char*>(uncompressed_data.data()), compressed_size, header.size);
-
- ASSERT_MSG(bytes_uncompressed == header.size && bytes_uncompressed == uncompressed_data.size(),
- "{} != {} != {}", bytes_uncompressed, header.size, uncompressed_data.size());
-
- return uncompressed_data;
+ return DecompressSegment(compressed_data, header);
}
static constexpr u32 PageAlignSize(u32 size) {
return (size + Memory::PAGE_MASK) & ~Memory::PAGE_MASK;
}
+VAddr AppLoader_NSO::LoadModule(const std::string& name, const std::vector<u8>& file_data,
+ VAddr load_base) {
+ if (file_data.size() < sizeof(NsoHeader))
+ return {};
+
+ NsoHeader nso_header;
+ std::memcpy(&nso_header, file_data.data(), sizeof(NsoHeader));
+
+ if (nso_header.magic != Common::MakeMagic('N', 'S', 'O', '0'))
+ return {};
+
+ // Build program image
+ Kernel::SharedPtr<Kernel::CodeSet> codeset = Kernel::CodeSet::Create("");
+ std::vector<u8> program_image;
+ for (int i = 0; i < nso_header.segments.size(); ++i) {
+ std::vector<u8> compressed_data(nso_header.segments_compressed_size[i]);
+ for (int j = 0; j < nso_header.segments_compressed_size[i]; ++j)
+ compressed_data[j] = file_data[nso_header.segments[i].offset + j];
+ std::vector<u8> data = DecompressSegment(compressed_data, nso_header.segments[i]);
+ program_image.resize(nso_header.segments[i].location);
+ program_image.insert(program_image.end(), data.begin(), data.end());
+ codeset->segments[i].addr = nso_header.segments[i].location;
+ codeset->segments[i].offset = nso_header.segments[i].location;
+ codeset->segments[i].size = PageAlignSize(static_cast<u32>(data.size()));
+ }
+
+ // MOD header pointer is at .text offset + 4
+ u32 module_offset;
+ std::memcpy(&module_offset, program_image.data() + 4, sizeof(u32));
+
+ // Read MOD header
+ ModHeader mod_header{};
+ // Default .bss to size in segment header if MOD0 section doesn't exist
+ u32 bss_size{PageAlignSize(nso_header.segments[2].bss_size)};
+ std::memcpy(&mod_header, program_image.data() + module_offset, sizeof(ModHeader));
+ const bool has_mod_header{mod_header.magic == Common::MakeMagic('M', 'O', 'D', '0')};
+ if (has_mod_header) {
+ // Resize program image to include .bss section and page align each section
+ bss_size = PageAlignSize(mod_header.bss_end_offset - mod_header.bss_start_offset);
+ }
+ codeset->data.size += bss_size;
+ const u32 image_size{PageAlignSize(static_cast<u32>(program_image.size()) + bss_size)};
+ program_image.resize(image_size);
+
+ // Load codeset for current process
+ codeset->name = name;
+ codeset->memory = std::make_shared<std::vector<u8>>(std::move(program_image));
+ Core::CurrentProcess()->LoadModule(codeset, load_base);
+
+ return load_base + image_size;
+}
+
VAddr AppLoader_NSO::LoadModule(const std::string& path, VAddr load_base) {
FileUtil::IOFile file(path, "rb");
if (!file.IsOpen()) {
diff --git a/src/core/loader/nso.h b/src/core/loader/nso.h
index 1ae30a824..386f4d39a 100644
--- a/src/core/loader/nso.h
+++ b/src/core/loader/nso.h
@@ -29,6 +29,9 @@ public:
return IdentifyType(file, filepath);
}
+ static VAddr LoadModule(const std::string& name, const std::vector<u8>& file_data,
+ VAddr load_base);
+
static VAddr LoadModule(const std::string& path, VAddr load_base);
ResultStatus Load(Kernel::SharedPtr<Kernel::Process>& process) override;
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 3b81acd63..f070dee7d 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -241,6 +241,10 @@ bool IsValidVirtualAddress(const VAddr vaddr) {
return IsValidVirtualAddress(*Core::CurrentProcess(), vaddr);
}
+bool IsKernelVirtualAddress(const VAddr vaddr) {
+ return KERNEL_REGION_VADDR <= vaddr && vaddr < KERNEL_REGION_END;
+}
+
bool IsValidPhysicalAddress(const PAddr paddr) {
return GetPhysicalPointer(paddr) != nullptr;
}
diff --git a/src/core/memory.h b/src/core/memory.h
index 3f56a2c6a..8d5d017a4 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -188,6 +188,11 @@ enum : VAddr {
MAP_REGION_VADDR = NEW_MAP_REGION_VADDR_END,
MAP_REGION_SIZE = 0x1000000000,
MAP_REGION_VADDR_END = MAP_REGION_VADDR + MAP_REGION_SIZE,
+
+ /// Kernel Virtual Address Range
+ KERNEL_REGION_VADDR = 0xFFFFFF8000000000,
+ KERNEL_REGION_SIZE = 0x7FFFE00000,
+ KERNEL_REGION_END = KERNEL_REGION_VADDR + KERNEL_REGION_SIZE,
};
/// Currently active page table
@@ -197,6 +202,8 @@ PageTable* GetCurrentPageTable();
/// Determines if the given VAddr is valid for the specified process.
bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr);
bool IsValidVirtualAddress(const VAddr addr);
+/// Determines if the given VAddr is a kernel address
+bool IsKernelVirtualAddress(const VAddr addr);
bool IsValidPhysicalAddress(const PAddr addr);
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 281810357..c6431e722 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -9,6 +9,8 @@ add_library(video_core STATIC
engines/maxwell_3d.h
engines/maxwell_compute.cpp
engines/maxwell_compute.h
+ engines/maxwell_dma.cpp
+ engines/maxwell_dma.h
engines/shader_bytecode.h
gpu.cpp
gpu.h
@@ -39,6 +41,8 @@ add_library(video_core STATIC
renderer_opengl/maxwell_to_gl.h
renderer_opengl/renderer_opengl.cpp
renderer_opengl/renderer_opengl.h
+ textures/astc.cpp
+ textures/astc.h
textures/decoders.cpp
textures/decoders.h
textures/texture.h
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index d72d6f760..cec9cb9f3 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -16,6 +16,7 @@
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_compute.h"
+#include "video_core/engines/maxwell_dma.h"
#include "video_core/gpu.h"
#include "video_core/renderer_base.h"
#include "video_core/video_core.h"
@@ -60,8 +61,11 @@ void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params)
case EngineID::MAXWELL_COMPUTE_B:
maxwell_compute->WriteReg(method, value);
break;
+ case EngineID::MAXWELL_DMA_COPY_A:
+ maxwell_dma->WriteReg(method, value);
+ break;
default:
- UNIMPLEMENTED();
+ UNIMPLEMENTED_MSG("Unimplemented engine");
}
}
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 6b9382f06..998b7c843 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -47,6 +47,7 @@ void Fermi2D::HandleSurfaceCopy() {
if (regs.src.linear == regs.dst.linear) {
// If the input layout and the output layout are the same, just perform a raw copy.
+ ASSERT(regs.src.BlockHeight() == regs.dst.BlockHeight());
Memory::CopyBlock(dest_cpu, source_cpu,
src_bytes_per_pixel * regs.dst.width * regs.dst.height);
return;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index ef12d9300..93c43c8cb 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -328,8 +328,9 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
Texture::FullTextureInfo tex_info{};
// TODO(Subv): Use the shader to determine which textures are actually accessed.
- tex_info.index = (current_texture - tex_info_buffer.address - TextureInfoOffset) /
- sizeof(Texture::TextureHandle);
+ tex_info.index =
+ static_cast<u32>(current_texture - tex_info_buffer.address - TextureInfoOffset) /
+ sizeof(Texture::TextureHandle);
// Load the TIC data.
if (tex_handle.tic_id != 0) {
@@ -354,6 +355,40 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
return textures;
}
+Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage, size_t offset) const {
+ auto& shader = state.shader_stages[static_cast<size_t>(stage)];
+ auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
+ ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
+
+ GPUVAddr tex_info_address = tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
+
+ ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
+
+ boost::optional<VAddr> tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address);
+ Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
+
+ Texture::FullTextureInfo tex_info{};
+ tex_info.index = static_cast<u32>(offset);
+
+ // Load the TIC data.
+ if (tex_handle.tic_id != 0) {
+ tex_info.enabled = true;
+
+ auto tic_entry = GetTICEntry(tex_handle.tic_id);
+ // TODO(Subv): Workaround for BitField's move constructor being deleted.
+ std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
+ }
+
+ // Load the TSC data
+ if (tex_handle.tsc_id != 0) {
+ auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
+ // TODO(Subv): Workaround for BitField's move constructor being deleted.
+ std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
+ }
+
+ return tex_info;
+}
+
u32 Maxwell3D::GetRegisterValue(u32 method) const {
ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
return regs.reg_array[method];
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 245410c95..2dc251205 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -318,6 +318,7 @@ public:
Equation equation_a;
Factor factor_source_a;
Factor factor_dest_a;
+ INSERT_PADDING_WORDS(1);
};
union {
@@ -432,7 +433,27 @@ public:
};
} rt_control;
- INSERT_PADDING_WORDS(0xCF);
+ INSERT_PADDING_WORDS(0x31);
+
+ u32 independent_blend_enable;
+
+ INSERT_PADDING_WORDS(0x15);
+
+ struct {
+ u32 separate_alpha;
+ Blend::Equation equation_rgb;
+ Blend::Factor factor_source_rgb;
+ Blend::Factor factor_dest_rgb;
+ Blend::Equation equation_a;
+ Blend::Factor factor_source_a;
+ INSERT_PADDING_WORDS(1);
+ Blend::Factor factor_dest_a;
+
+ u32 enable_common;
+ u32 enable[NumRenderTargets];
+ } blend;
+
+ INSERT_PADDING_WORDS(0x77);
struct {
u32 tsc_address_high;
@@ -557,9 +578,7 @@ public:
} vertex_array[NumVertexArrays];
- Blend blend;
-
- INSERT_PADDING_WORDS(0x39);
+ Blend independent_blend[NumRenderTargets];
struct {
u32 limit_high;
@@ -664,6 +683,9 @@ public:
/// Returns a list of enabled textures for the specified shader stage.
std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
+ /// Returns the texture information for a specific texture in a specific shader stage.
+ Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, size_t offset) const;
+
/// Returns whether the specified shader stage is enabled or not.
bool IsShaderStageEnabled(Regs::ShaderStage stage) const;
@@ -719,6 +741,8 @@ ASSERT_REG_POSITION(vertex_buffer, 0x35D);
ASSERT_REG_POSITION(zeta, 0x3F8);
ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458);
ASSERT_REG_POSITION(rt_control, 0x487);
+ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
+ASSERT_REG_POSITION(blend, 0x4CF);
ASSERT_REG_POSITION(tsc, 0x557);
ASSERT_REG_POSITION(tic, 0x55D);
ASSERT_REG_POSITION(code_address, 0x582);
@@ -726,7 +750,7 @@ ASSERT_REG_POSITION(draw, 0x585);
ASSERT_REG_POSITION(index_array, 0x5F2);
ASSERT_REG_POSITION(query, 0x6C0);
ASSERT_REG_POSITION(vertex_array[0], 0x700);
-ASSERT_REG_POSITION(blend, 0x780);
+ASSERT_REG_POSITION(independent_blend, 0x780);
ASSERT_REG_POSITION(vertex_array_limit[0], 0x7C0);
ASSERT_REG_POSITION(shader_config[0], 0x800);
ASSERT_REG_POSITION(const_buffer, 0x8E0);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
new file mode 100644
index 000000000..442138988
--- /dev/null
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -0,0 +1,69 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/memory.h"
+#include "video_core/engines/maxwell_dma.h"
+#include "video_core/textures/decoders.h"
+
+namespace Tegra {
+namespace Engines {
+
+MaxwellDMA::MaxwellDMA(MemoryManager& memory_manager) : memory_manager(memory_manager) {}
+
+void MaxwellDMA::WriteReg(u32 method, u32 value) {
+ ASSERT_MSG(method < Regs::NUM_REGS,
+ "Invalid MaxwellDMA register, increase the size of the Regs structure");
+
+ regs.reg_array[method] = value;
+
+#define MAXWELLDMA_REG_INDEX(field_name) \
+ (offsetof(Tegra::Engines::MaxwellDMA::Regs, field_name) / sizeof(u32))
+
+ switch (method) {
+ case MAXWELLDMA_REG_INDEX(exec): {
+ HandleCopy();
+ break;
+ }
+ }
+
+#undef MAXWELLDMA_REG_INDEX
+}
+
+void MaxwellDMA::HandleCopy() {
+ NGLOG_WARNING(HW_GPU, "Requested a DMA copy");
+
+ const GPUVAddr source = regs.src_address.Address();
+ const GPUVAddr dest = regs.dst_address.Address();
+
+ const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source);
+ const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest);
+
+ // TODO(Subv): Perform more research and implement all features of this engine.
+ ASSERT(regs.exec.enable_swizzle == 0);
+ ASSERT(regs.exec.enable_2d == 1);
+ ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
+ ASSERT(regs.exec.query_intr == Regs::QueryIntr::None);
+ ASSERT(regs.exec.copy_mode == Regs::CopyMode::Unk2);
+ ASSERT(regs.src_params.pos_x == 0);
+ ASSERT(regs.src_params.pos_y == 0);
+ ASSERT(regs.dst_params.pos_x == 0);
+ ASSERT(regs.dst_params.pos_y == 0);
+ ASSERT(regs.exec.is_dst_linear != regs.exec.is_src_linear);
+
+ u8* src_buffer = Memory::GetPointer(source_cpu);
+ u8* dst_buffer = Memory::GetPointer(dest_cpu);
+
+ if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
+ // If the input is tiled and the output is linear, deswizzle the input and copy it over.
+ Texture::CopySwizzledData(regs.src_params.size_x, regs.src_params.size_y, 1, 1, src_buffer,
+ dst_buffer, true, regs.src_params.BlockHeight());
+ } else {
+ // If the input is linear and the output is tiled, swizzle the input and copy it over.
+ Texture::CopySwizzledData(regs.dst_params.size_x, regs.dst_params.size_y, 1, 1, dst_buffer,
+ src_buffer, false, regs.dst_params.BlockHeight());
+ }
+}
+
+} // namespace Engines
+} // namespace Tegra
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
new file mode 100644
index 000000000..905749bde
--- /dev/null
+++ b/src/video_core/engines/maxwell_dma.h
@@ -0,0 +1,155 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include "common/assert.h"
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
+
+namespace Tegra {
+namespace Engines {
+
+class MaxwellDMA final {
+public:
+ explicit MaxwellDMA(MemoryManager& memory_manager);
+ ~MaxwellDMA() = default;
+
+ /// Write the value to the register identified by method.
+ void WriteReg(u32 method, u32 value);
+
+ struct Regs {
+ static constexpr size_t NUM_REGS = 0x1D6;
+
+ struct Parameters {
+ union {
+ BitField<0, 4, u32> block_depth;
+ BitField<4, 4, u32> block_height;
+ BitField<8, 4, u32> block_width;
+ };
+ u32 size_x;
+ u32 size_y;
+ u32 size_z;
+ u32 pos_z;
+ union {
+ BitField<0, 16, u32> pos_x;
+ BitField<16, 16, u32> pos_y;
+ };
+
+ u32 BlockHeight() const {
+ return 1 << block_height;
+ }
+ };
+
+ static_assert(sizeof(Parameters) == 24, "Parameters has wrong size");
+
+ enum class CopyMode : u32 {
+ None = 0,
+ Unk1 = 1,
+ Unk2 = 2,
+ };
+
+ enum class QueryMode : u32 {
+ None = 0,
+ Short = 1,
+ Long = 2,
+ };
+
+ enum class QueryIntr : u32 {
+ None = 0,
+ Block = 1,
+ NonBlock = 2,
+ };
+
+ union {
+ struct {
+ INSERT_PADDING_WORDS(0xC0);
+
+ struct {
+ union {
+ BitField<0, 2, CopyMode> copy_mode;
+ BitField<2, 1, u32> flush;
+
+ BitField<3, 2, QueryMode> query_mode;
+ BitField<5, 2, QueryIntr> query_intr;
+
+ BitField<7, 1, u32> is_src_linear;
+ BitField<8, 1, u32> is_dst_linear;
+
+ BitField<9, 1, u32> enable_2d;
+ BitField<10, 1, u32> enable_swizzle;
+ };
+ } exec;
+
+ INSERT_PADDING_WORDS(0x3F);
+
+ struct {
+ u32 address_high;
+ u32 address_low;
+
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+ address_low);
+ }
+ } src_address;
+
+ struct {
+ u32 address_high;
+ u32 address_low;
+
+ GPUVAddr Address() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+ address_low);
+ }
+ } dst_address;
+
+ u32 src_pitch;
+ u32 dst_pitch;
+ u32 x_count;
+ u32 y_count;
+
+ INSERT_PADDING_WORDS(0xBB);
+
+ Parameters dst_params;
+
+ INSERT_PADDING_WORDS(1);
+
+ Parameters src_params;
+
+ INSERT_PADDING_WORDS(0x13);
+ };
+ std::array<u32, NUM_REGS> reg_array;
+ };
+ } regs{};
+
+ MemoryManager& memory_manager;
+
+private:
+ /// Performs the copy from the source buffer to the destination buffer as configured in the
+ /// registers.
+ void HandleCopy();
+};
+
+#define ASSERT_REG_POSITION(field_name, position) \
+ static_assert(offsetof(MaxwellDMA::Regs, field_name) == position * 4, \
+ "Field " #field_name " has invalid position")
+
+ASSERT_REG_POSITION(exec, 0xC0);
+ASSERT_REG_POSITION(src_address, 0x100);
+ASSERT_REG_POSITION(dst_address, 0x102);
+ASSERT_REG_POSITION(src_pitch, 0x104);
+ASSERT_REG_POSITION(dst_pitch, 0x105);
+ASSERT_REG_POSITION(x_count, 0x106);
+ASSERT_REG_POSITION(y_count, 0x107);
+ASSERT_REG_POSITION(dst_params, 0x1C3);
+ASSERT_REG_POSITION(src_params, 0x1CA);
+
+#undef ASSERT_REG_POSITION
+
+} // namespace Engines
+} // namespace Tegra
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 38757c038..cb4db0679 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -109,11 +109,6 @@ union Sampler {
u64 value{};
};
-union Uniform {
- BitField<20, 14, u64> offset;
- BitField<34, 5, u64> index;
-};
-
} // namespace Shader
} // namespace Tegra
@@ -173,6 +168,31 @@ enum class SubOp : u64 {
Min = 0x8,
};
+enum class F2iRoundingOp : u64 {
+ None = 0,
+ Floor = 1,
+ Ceil = 2,
+ Trunc = 3,
+};
+
+enum class F2fRoundingOp : u64 {
+ None = 0,
+ Pass = 3,
+ Round = 8,
+ Floor = 9,
+ Ceil = 10,
+ Trunc = 11,
+};
+
+enum class UniformType : u64 {
+ UnsignedByte = 0,
+ SignedByte = 1,
+ UnsignedShort = 2,
+ SignedShort = 3,
+ Single = 4,
+ Double = 5,
+};
+
union Instruction {
Instruction& operator=(const Instruction& instr) {
value = instr.value;
@@ -196,12 +216,12 @@ union Instruction {
union {
BitField<20, 19, u64> imm20_19;
- BitField<20, 32, u64> imm20_32;
+ BitField<20, 32, s64> imm20_32;
BitField<45, 1, u64> negate_b;
BitField<46, 1, u64> abs_a;
BitField<48, 1, u64> negate_a;
BitField<49, 1, u64> abs_b;
- BitField<50, 1, u64> abs_d;
+ BitField<50, 1, u64> saturate_d;
BitField<56, 1, u64> negate_imm;
union {
@@ -210,10 +230,18 @@ union Instruction {
} fmnmx;
union {
+ BitField<39, 1, u64> invert_a;
+ BitField<40, 1, u64> invert_b;
+ BitField<41, 2, LogicOperation> operation;
+ BitField<44, 2, u64> unk44;
+ BitField<48, 3, Pred> pred48;
+ } lop;
+
+ union {
BitField<53, 2, LogicOperation> operation;
BitField<55, 1, u64> invert_a;
BitField<56, 1, u64> invert_b;
- } lop;
+ } lop32i;
float GetImm20_19() const {
float result{};
@@ -226,7 +254,7 @@ union Instruction {
float GetImm20_32() const {
float result{};
- u32 imm{static_cast<u32>(imm20_32)};
+ s32 imm{static_cast<s32>(imm20_32)};
std::memcpy(&result, &imm, sizeof(imm));
return result;
}
@@ -240,10 +268,30 @@ union Instruction {
} alu;
union {
+ BitField<48, 1, u64> is_signed;
+ } shift;
+
+ union {
BitField<39, 5, u64> shift_amount;
BitField<48, 1, u64> negate_b;
BitField<49, 1, u64> negate_a;
- } iscadd;
+ } alu_integer;
+
+ union {
+ BitField<54, 1, u64> saturate;
+ BitField<56, 1, u64> negate_a;
+ } iadd32i;
+
+ union {
+ BitField<20, 8, u64> shift_position;
+ BitField<28, 8, u64> shift_length;
+ BitField<48, 1, u64> negate_b;
+ BitField<49, 1, u64> negate_a;
+
+ u64 GetLeftShiftValue() const {
+ return 32 - (shift_position + shift_length);
+ }
+ } bfe;
union {
BitField<48, 1, u64> negate_b;
@@ -251,6 +299,11 @@ union Instruction {
} ffma;
union {
+ BitField<48, 3, UniformType> type;
+ BitField<44, 2, u64> unknown;
+ } ld_c;
+
+ union {
BitField<0, 3, u64> pred0;
BitField<3, 3, u64> pred3;
BitField<7, 1, u64> abs_a;
@@ -289,19 +342,37 @@ union Instruction {
} fset;
union {
- BitField<10, 2, Register::Size> size;
- BitField<13, 1, u64> is_signed;
+ BitField<39, 3, u64> pred39;
+ BitField<42, 1, u64> neg_pred;
+ BitField<44, 1, u64> bf;
+ BitField<45, 2, PredOperation> op;
+ BitField<48, 1, u64> is_signed;
+ BitField<49, 3, PredCondition> cond;
+ } iset;
+
+ union {
+ BitField<8, 2, Register::Size> dest_size;
+ BitField<10, 2, Register::Size> src_size;
+ BitField<12, 1, u64> is_output_signed;
+ BitField<13, 1, u64> is_input_signed;
BitField<41, 2, u64> selector;
BitField<45, 1, u64> negate_a;
BitField<49, 1, u64> abs_a;
- BitField<50, 1, u64> saturate_a;
+
+ union {
+ BitField<39, 2, F2iRoundingOp> rounding;
+ } f2i;
+
+ union {
+ BitField<39, 4, F2fRoundingOp> rounding;
+ } f2f;
} conversion;
union {
BitField<31, 4, u64> component_mask;
bool IsComponentEnabled(size_t component) const {
- return ((1 << component) & component_mask) != 0;
+ return ((1ull << component) & component_mask) != 0;
}
} tex;
@@ -320,7 +391,7 @@ union Instruction {
ASSERT(component_mask_selector < mask.size());
- return ((1 << component) & mask[component_mask_selector]) != 0;
+ return ((1ull << component) & mask[component_mask_selector]) != 0;
}
} texs;
@@ -338,12 +409,21 @@ union Instruction {
}
} bra;
+ union {
+ BitField<20, 14, u64> offset;
+ BitField<34, 5, u64> index;
+ } cbuf34;
+
+ union {
+ BitField<20, 16, s64> offset;
+ BitField<36, 5, u64> index;
+ } cbuf36;
+
BitField<61, 1, u64> is_b_imm;
BitField<60, 1, u64> is_b_gpr;
BitField<59, 1, u64> is_c_gpr;
Attribute attribute;
- Uniform uniform;
Sampler sampler;
u64 value;
@@ -356,8 +436,13 @@ class OpCode {
public:
enum class Id {
KIL,
+ SSY,
+ BFE_C,
+ BFE_R,
+ BFE_IMM,
BRA,
LD_A,
+ LD_C,
ST_A,
TEX,
TEXQ, // Texture Query
@@ -376,6 +461,10 @@ public:
FMUL_R,
FMUL_IMM,
FMUL32_IMM,
+ IADD_C,
+ IADD_R,
+ IADD_IMM,
+ IADD32I,
ISCADD_C, // Scale and Add
ISCADD_R,
ISCADD_IMM,
@@ -395,6 +484,9 @@ public:
I2I_C,
I2I_R,
I2I_IMM,
+ LOP_C,
+ LOP_R,
+ LOP_IMM,
LOP32I,
MOV_C,
MOV_R,
@@ -409,6 +501,9 @@ public:
FMNMX_C,
FMNMX_R,
FMNMX_IMM,
+ IMNMX_C,
+ IMNMX_R,
+ IMNMX_IMM,
FSETP_C, // Set Predicate
FSETP_R,
FSETP_IMM,
@@ -418,20 +513,30 @@ public:
ISETP_C,
ISETP_IMM,
ISETP_R,
+ ISET_R,
+ ISET_C,
+ ISET_IMM,
PSETP,
+ XMAD_IMM,
+ XMAD_CR,
+ XMAD_RC,
+ XMAD_RR,
};
enum class Type {
Trivial,
Arithmetic,
- Logic,
+ ArithmeticImmediate,
+ ArithmeticInteger,
+ ArithmeticIntegerImmediate,
+ Bfe,
Shift,
- ScaledAdd,
Ffma,
Flow,
Memory,
FloatSet,
FloatSetPredicate,
+ IntegerSet,
IntegerSetPredicate,
PredicateSetPredicate,
Conversion,
@@ -530,8 +635,10 @@ private:
std::vector<Matcher> table = {
#define INST(bitstring, op, type, name) Detail::GetMatcher(bitstring, op, type, name)
INST("111000110011----", Id::KIL, Type::Flow, "KIL"),
+ INST("111000101001----", Id::SSY, Type::Flow, "SSY"),
INST("111000100100----", Id::BRA, Type::Flow, "BRA"),
INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
+ INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
INST("1100000000111---", Id::TEX, Type::Memory, "TEX"),
INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"),
@@ -549,10 +656,14 @@ private:
INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"),
INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"),
INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"),
- INST("00011110--------", Id::FMUL32_IMM, Type::Arithmetic, "FMUL32_IMM"),
- INST("0100110000011---", Id::ISCADD_C, Type::ScaledAdd, "ISCADD_C"),
- INST("0101110000011---", Id::ISCADD_R, Type::ScaledAdd, "ISCADD_R"),
- INST("0011100-00011---", Id::ISCADD_IMM, Type::ScaledAdd, "ISCADD_IMM"),
+ INST("00011110--------", Id::FMUL32_IMM, Type::ArithmeticImmediate, "FMUL32_IMM"),
+ INST("0100110000010---", Id::IADD_C, Type::ArithmeticInteger, "IADD_C"),
+ INST("0101110000010---", Id::IADD_R, Type::ArithmeticInteger, "IADD_R"),
+ INST("0011100-00010---", Id::IADD_IMM, Type::ArithmeticInteger, "IADD_IMM"),
+ INST("0001110---------", Id::IADD32I, Type::ArithmeticIntegerImmediate, "IADD32I"),
+ INST("0100110000011---", Id::ISCADD_C, Type::ArithmeticInteger, "ISCADD_C"),
+ INST("0101110000011---", Id::ISCADD_R, Type::ArithmeticInteger, "ISCADD_R"),
+ INST("0011100-00011---", Id::ISCADD_IMM, Type::ArithmeticInteger, "ISCADD_IMM"),
INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
@@ -560,17 +671,26 @@ private:
INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"),
INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"),
INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"),
- INST("0100110010110---", Id::F2I_C, Type::Arithmetic, "F2I_C"),
- INST("0101110010110---", Id::F2I_R, Type::Arithmetic, "F2I_R"),
- INST("0011100-10110---", Id::F2I_IMM, Type::Arithmetic, "F2I_IMM"),
+ INST("0100110010110---", Id::F2I_C, Type::Conversion, "F2I_C"),
+ INST("0101110010110---", Id::F2I_R, Type::Conversion, "F2I_R"),
+ INST("0011100-10110---", Id::F2I_IMM, Type::Conversion, "F2I_IMM"),
INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"),
INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"),
INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"),
- INST("000000010000----", Id::MOV32_IMM, Type::Arithmetic, "MOV32_IMM"),
+ INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"),
INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"),
INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"),
INST("0011100-01100---", Id::FMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"),
- INST("000001----------", Id::LOP32I, Type::Logic, "LOP32I"),
+ INST("0100110000100---", Id::IMNMX_C, Type::Arithmetic, "FMNMX_IMM"),
+ INST("0101110000100---", Id::IMNMX_R, Type::Arithmetic, "FMNMX_IMM"),
+ INST("0011100-00100---", Id::IMNMX_IMM, Type::Arithmetic, "FMNMX_IMM"),
+ INST("0100110000000---", Id::BFE_C, Type::Bfe, "BFE_C"),
+ INST("0101110000000---", Id::BFE_R, Type::Bfe, "BFE_R"),
+ INST("0011100-00000---", Id::BFE_IMM, Type::Bfe, "BFE_IMM"),
+ INST("0100110001000---", Id::LOP_C, Type::ArithmeticInteger, "LOP_C"),
+ INST("0101110001000---", Id::LOP_R, Type::ArithmeticInteger, "LOP_R"),
+ INST("0011100001000---", Id::LOP_IMM, Type::ArithmeticInteger, "LOP_IMM"),
+ INST("000001----------", Id::LOP32I, Type::ArithmeticIntegerImmediate, "LOP32I"),
INST("0100110001001---", Id::SHL_C, Type::Shift, "SHL_C"),
INST("0101110001001---", Id::SHL_R, Type::Shift, "SHL_R"),
INST("0011100-01001---", Id::SHL_IMM, Type::Shift, "SHL_IMM"),
@@ -592,7 +712,14 @@ private:
INST("010010110110----", Id::ISETP_C, Type::IntegerSetPredicate, "ISETP_C"),
INST("010110110110----", Id::ISETP_R, Type::IntegerSetPredicate, "ISETP_R"),
INST("0011011-0110----", Id::ISETP_IMM, Type::IntegerSetPredicate, "ISETP_IMM"),
+ INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"),
+ INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"),
+ INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"),
INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
+ INST("0011011-00------", Id::XMAD_IMM, Type::Arithmetic, "XMAD_IMM"),
+ INST("0100111---------", Id::XMAD_CR, Type::Arithmetic, "XMAD_CR"),
+ INST("010100010-------", Id::XMAD_RC, Type::Arithmetic, "XMAD_RC"),
+ INST("0101101100------", Id::XMAD_RR, Type::Arithmetic, "XMAD_RR"),
};
#undef INST
std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) {
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 756518ee7..e36483145 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -5,6 +5,7 @@
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_compute.h"
+#include "video_core/engines/maxwell_dma.h"
#include "video_core/gpu.h"
namespace Tegra {
@@ -14,6 +15,7 @@ GPU::GPU() {
maxwell_3d = std::make_unique<Engines::Maxwell3D>(*memory_manager);
fermi_2d = std::make_unique<Engines::Fermi2D>(*memory_manager);
maxwell_compute = std::make_unique<Engines::MaxwellCompute>();
+ maxwell_dma = std::make_unique<Engines::MaxwellDMA>(*memory_manager);
}
GPU::~GPU() = default;
@@ -26,6 +28,10 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
ASSERT(format != RenderTargetFormat::NONE);
switch (format) {
+ case RenderTargetFormat::RGBA32_FLOAT:
+ return 16;
+ case RenderTargetFormat::RGBA16_FLOAT:
+ return 8;
case RenderTargetFormat::RGBA8_UNORM:
case RenderTargetFormat::RGB10_A2_UNORM:
return 4;
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index f168a5171..7b4e9b842 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -15,10 +15,12 @@ namespace Tegra {
enum class RenderTargetFormat : u32 {
NONE = 0x0,
+ RGBA32_FLOAT = 0xC0,
RGBA16_FLOAT = 0xCA,
RGB10_A2_UNORM = 0xD1,
RGBA8_UNORM = 0xD5,
RGBA8_SRGB = 0xD6,
+ R11G11B10_FLOAT = 0xE0,
};
/// Returns the number of bytes per pixel of each rendertarget format.
@@ -61,6 +63,7 @@ namespace Engines {
class Fermi2D;
class Maxwell3D;
class MaxwellCompute;
+class MaxwellDMA;
} // namespace Engines
enum class EngineID {
@@ -101,6 +104,8 @@ private:
std::unique_ptr<Engines::Fermi2D> fermi_2d;
/// Compute engine
std::unique_ptr<Engines::MaxwellCompute> maxwell_compute;
+ /// DMA engine
+ std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
};
} // namespace Tegra
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 0a33868b7..3ba20f978 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -196,8 +196,10 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
auto& gpu = Core::System().GetInstance().GPU().Maxwell3D();
ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!");
- // Next available bindpoint to use when uploading the const buffers to the GLSL shaders.
+ // Next available bindpoints to use when uploading the const buffers and textures to the GLSL
+ // shaders.
u32 current_constbuffer_bindpoint = 0;
+ u32 current_texture_bindpoint = 0;
for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) {
auto& shader_config = gpu.regs.shader_config[index];
@@ -212,13 +214,17 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
continue;
}
+ GLShader::MaxwellUniformData ubo{};
+ ubo.SetFromRegs(gpu.state.shader_stages[stage]);
+ std::memcpy(buffer_ptr, &ubo, sizeof(ubo));
+
+ // Flush the buffer so that the GPU can see the data we just wrote.
+ glFlushMappedBufferRange(GL_ARRAY_BUFFER, buffer_offset, sizeof(ubo));
+
// Upload uniform data as one UBO per stage
const GLintptr ubo_offset = buffer_offset;
copy_buffer(uniform_buffers[stage].handle, ubo_offset,
sizeof(GLShader::MaxwellUniformData));
- GLShader::MaxwellUniformData* ub_ptr =
- reinterpret_cast<GLShader::MaxwellUniformData*>(buffer_ptr);
- ub_ptr->SetFromRegs(gpu.state.shader_stages[stage]);
buffer_ptr += sizeof(GLShader::MaxwellUniformData);
buffer_offset += sizeof(GLShader::MaxwellUniformData);
@@ -258,6 +264,11 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
current_constbuffer_bindpoint =
SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program,
current_constbuffer_bindpoint, shader_resources.const_buffer_entries);
+
+ // Configure the textures for this shader stage.
+ current_texture_bindpoint =
+ SetupTextures(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program,
+ current_texture_bindpoint, shader_resources.texture_samplers);
}
shader_program_manager->UseTrivialGeometryShader();
@@ -338,12 +349,12 @@ void RasterizerOpenGL::DrawArrays() {
// Sync the viewport
SyncViewport(surfaces_rect, res_scale);
+ // Sync the blend state registers
+ SyncBlendState();
+
// TODO(bunnei): Sync framebuffer_scale uniform here
// TODO(bunnei): Sync scissorbox uniform(s) here
- // Sync and bind the texture surfaces
- BindTextures();
-
// Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable
// scissor test to prevent drawing outside of the framebuffer region
state.scissor.enabled = true;
@@ -447,65 +458,7 @@ void RasterizerOpenGL::DrawArrays() {
}
}
-void RasterizerOpenGL::BindTextures() {
- using Regs = Tegra::Engines::Maxwell3D::Regs;
- auto& maxwell3d = Core::System::GetInstance().GPU().Get3DEngine();
-
- // Each Maxwell shader stage can have an arbitrary number of textures, but we're limited to a
- // certain number in OpenGL. We try to only use the minimum amount of host textures by not
- // keeping a 1:1 relation between guest texture ids and host texture ids, ie, guest texture id 8
- // can be host texture id 0 if it's the only texture used in the guest shader program.
- u32 host_texture_index = 0;
- for (u32 stage = 0; stage < Regs::MaxShaderStage; ++stage) {
- ASSERT(host_texture_index < texture_samplers.size());
- const auto textures = maxwell3d.GetStageTextures(static_cast<Regs::ShaderStage>(stage));
- for (unsigned texture_index = 0; texture_index < textures.size(); ++texture_index) {
- const auto& texture = textures[texture_index];
-
- if (texture.enabled) {
- texture_samplers[host_texture_index].SyncWithConfig(texture.tsc);
- Surface surface = res_cache.GetTextureSurface(texture);
- if (surface != nullptr) {
- state.texture_units[host_texture_index].texture_2d = surface->texture.handle;
- } else {
- // Can occur when texture addr is null or its memory is unmapped/invalid
- state.texture_units[texture_index].texture_2d = 0;
- }
-
- ++host_texture_index;
- } else {
- state.texture_units[texture_index].texture_2d = 0;
- }
- }
- }
-}
-
-void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {
- const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
- switch (method) {
- case MAXWELL3D_REG_INDEX(blend.separate_alpha):
- ASSERT_MSG(false, "unimplemented");
- break;
- case MAXWELL3D_REG_INDEX(blend.equation_rgb):
- state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.blend.equation_rgb);
- break;
- case MAXWELL3D_REG_INDEX(blend.factor_source_rgb):
- state.blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_rgb);
- break;
- case MAXWELL3D_REG_INDEX(blend.factor_dest_rgb):
- state.blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_rgb);
- break;
- case MAXWELL3D_REG_INDEX(blend.equation_a):
- state.blend.a_equation = MaxwellToGL::BlendEquation(regs.blend.equation_a);
- break;
- case MAXWELL3D_REG_INDEX(blend.factor_source_a):
- state.blend.src_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_a);
- break;
- case MAXWELL3D_REG_INDEX(blend.factor_dest_a):
- state.blend.dst_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_a);
- break;
- }
-}
+void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {}
void RasterizerOpenGL::FlushAll() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
@@ -654,7 +607,16 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
buffer_draw_state.bindpoint = current_bindpoint + bindpoint;
boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address);
- std::vector<u8> data(used_buffer.GetSize() * sizeof(float));
+
+ std::vector<u8> data;
+ if (used_buffer.IsIndirect()) {
+ // Buffer is accessed indirectly, so upload the entire thing
+ data.resize(buffer.size * sizeof(float));
+ } else {
+ // Buffer is accessed directly, upload just what we use
+ data.resize(used_buffer.GetSize() * sizeof(float));
+ }
+
Memory::ReadBlock(*addr, data.data(), data.size());
glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer_draw_state.ssbo);
@@ -671,7 +633,53 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
state.Apply();
- return current_bindpoint + entries.size();
+ return current_bindpoint + static_cast<u32>(entries.size());
+}
+
+u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, u32 current_unit,
+ const std::vector<GLShader::SamplerEntry>& entries) {
+ auto& gpu = Core::System::GetInstance().GPU();
+ auto& maxwell3d = gpu.Get3DEngine();
+
+ ASSERT_MSG(maxwell3d.IsShaderStageEnabled(stage),
+ "Attempted to upload textures of disabled shader stage");
+
+ ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units),
+ "Exceeded the number of active textures.");
+
+ for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
+ const auto& entry = entries[bindpoint];
+ u32 current_bindpoint = current_unit + bindpoint;
+
+ // Bind the uniform to the sampler.
+ GLint uniform = glGetUniformLocation(program, entry.GetName().c_str());
+ ASSERT(uniform != -1);
+ glProgramUniform1i(program, uniform, current_bindpoint);
+
+ const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
+ ASSERT(texture.enabled);
+
+ texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
+ Surface surface = res_cache.GetTextureSurface(texture);
+ if (surface != nullptr) {
+ state.texture_units[current_bindpoint].texture_2d = surface->texture.handle;
+ state.texture_units[current_bindpoint].swizzle.r =
+ MaxwellToGL::SwizzleSource(texture.tic.x_source);
+ state.texture_units[current_bindpoint].swizzle.g =
+ MaxwellToGL::SwizzleSource(texture.tic.y_source);
+ state.texture_units[current_bindpoint].swizzle.b =
+ MaxwellToGL::SwizzleSource(texture.tic.z_source);
+ state.texture_units[current_bindpoint].swizzle.a =
+ MaxwellToGL::SwizzleSource(texture.tic.w_source);
+ } else {
+ // Can occur when texture addr is null or its memory is unmapped/invalid
+ state.texture_units[current_bindpoint].texture_2d = 0;
+ }
+ }
+
+ state.Apply();
+
+ return current_unit + static_cast<u32>(entries.size());
}
void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
@@ -730,14 +738,21 @@ void RasterizerOpenGL::SyncDepthOffset() {
UNREACHABLE();
}
-void RasterizerOpenGL::SyncBlendEnabled() {
- UNREACHABLE();
-}
+void RasterizerOpenGL::SyncBlendState() {
+ const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
-void RasterizerOpenGL::SyncBlendFuncs() {
- UNREACHABLE();
-}
+ // TODO(Subv): Support more than just render target 0.
+ state.blend.enabled = regs.blend.enable[0] != 0;
-void RasterizerOpenGL::SyncBlendColor() {
- UNREACHABLE();
+ if (!state.blend.enabled)
+ return;
+
+ ASSERT_MSG(regs.independent_blend_enable == 1, "Only independent blending is implemented");
+ ASSERT_MSG(!regs.independent_blend[0].separate_alpha, "Unimplemented");
+ state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.independent_blend[0].equation_rgb);
+ state.blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_source_rgb);
+ state.blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_dest_rgb);
+ state.blend.a_equation = MaxwellToGL::BlendEquation(regs.independent_blend[0].equation_a);
+ state.blend.src_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_source_a);
+ state.blend.dst_a_func = MaxwellToGL::BlendFunc(regs.independent_blend[0].factor_dest_a);
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 4b915c76a..b7c8cf843 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -80,9 +80,6 @@ private:
void BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface,
bool has_stencil);
- /// Binds the required textures to OpenGL before drawing a batch.
- void BindTextures();
-
/*
* Configures the current constbuffers to use for the draw command.
* @param stage The shader stage to configure buffers for.
@@ -95,6 +92,17 @@ private:
u32 current_bindpoint,
const std::vector<GLShader::ConstBufferEntry>& entries);
+ /*
+ * Configures the current textures to use for the draw command.
+ * @param stage The shader stage to configure textures for.
+ * @param program The OpenGL program object that contains the specified stage.
+ * @param current_unit The offset at which to start counting unused texture units.
+ * @param entries Vector describing the textures that are actually used in the guest shader.
+ * @returns The next available bindpoint for use in the next shader stage.
+ */
+ u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, GLuint program,
+ u32 current_unit, const std::vector<GLShader::SamplerEntry>& entries);
+
/// Syncs the viewport to match the guest state
void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale);
@@ -113,14 +121,8 @@ private:
/// Syncs the depth offset to match the guest state
void SyncDepthOffset();
- /// Syncs the blend enabled status to match the guest state
- void SyncBlendEnabled();
-
- /// Syncs the blend functions to match the guest state
- void SyncBlendFuncs();
-
- /// Syncs the blend color to match the guest state
- void SyncBlendColor();
+ /// Syncs the blend state to match the guest state
+ void SyncBlendState();
bool has_ARB_buffer_storage;
bool has_ARB_direct_state_access;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index d6048f639..61d670dcb 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -28,6 +28,7 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_state.h"
+#include "video_core/textures/astc.h"
#include "video_core/textures/decoders.h"
#include "video_core/utils.h"
#include "video_core/video_core.h"
@@ -50,18 +51,22 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false}, // A1B5G5R5
{GL_R8, GL_RED, GL_UNSIGNED_BYTE, false}, // R8
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F
+ {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F
{GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45
{GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN1
+ {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4
}};
static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
const SurfaceType type = SurfaceParams::GetFormatType(pixel_format);
if (type == SurfaceType::ColorTexture) {
ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
- // For now only UNORM components are supported, or RGBA16F which is type FLOAT
- ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F);
+ // For now only UNORM components are supported, or either R11FG11FB10F or RGBA16F which are
+ // type FLOAT
+ ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F ||
+ pixel_format == PixelFormat::R11FG11FB10F);
return tex_format_tuples[static_cast<unsigned int>(pixel_format)];
} else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
// TODO(Subv): Implement depth formats
@@ -83,6 +88,23 @@ static u16 GetResolutionScaleFactor() {
: Settings::values.resolution_factor);
}
+static void ConvertASTCToRGBA8(std::vector<u8>& data, PixelFormat format, u32 width, u32 height) {
+ u32 block_width{};
+ u32 block_height{};
+
+ switch (format) {
+ case PixelFormat::ASTC_2D_4X4:
+ block_width = 4;
+ block_height = 4;
+ break;
+ default:
+ NGLOG_CRITICAL(HW_GPU, "Unhandled format: {}", static_cast<u32>(format));
+ UNREACHABLE();
+ }
+
+ data = Tegra::Texture::ASTC::Decompress(data, width, height, block_width, block_height);
+}
+
template <bool morton_to_gl, PixelFormat format>
void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr base,
Tegra::GPUVAddr start, Tegra::GPUVAddr end) {
@@ -94,6 +116,12 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::
auto data = Tegra::Texture::UnswizzleTexture(
*gpu.memory_manager->GpuToCpuAddress(base),
SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height);
+
+ if (SurfaceParams::IsFormatASTC(format)) {
+ // ASTC formats are converted to RGBA8 in software, as most PC GPUs do not support this
+ ConvertASTCToRGBA8(data, format, stride, height);
+ }
+
std::memcpy(gl_buffer, data.data(), data.size());
} else {
// TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check
@@ -110,11 +138,12 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
Tegra::GPUVAddr),
SurfaceParams::MaxPixelFormat>
morton_to_gl_fns = {
- MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>,
- MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>,
- MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>,
- MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>,
- MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>,
+ MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>,
+ MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>,
+ MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>,
+ MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::DXT1>,
+ MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>,
+ MortonCopy<true, PixelFormat::DXN1>, MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
};
static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr,
@@ -127,11 +156,13 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
MortonCopy<false, PixelFormat::A1B5G5R5>,
MortonCopy<false, PixelFormat::R8>,
MortonCopy<false, PixelFormat::RGBA16F>,
+ MortonCopy<false, PixelFormat::R11FG11FB10F>,
// TODO(Subv): Swizzling the DXT1/DXT23/DXT45/DXN1 formats is not yet supported
nullptr,
nullptr,
nullptr,
nullptr,
+ MortonCopy<false, PixelFormat::ABGR8>,
};
// Allocate an uninitialized texture of appropriate size and format for the surface
@@ -164,60 +195,10 @@ static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tup
static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex,
const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type,
GLuint read_fb_handle, GLuint draw_fb_handle) {
- OpenGLState state = OpenGLState::GetCurState();
-
- OpenGLState prev_state = state;
- SCOPE_EXIT({ prev_state.Apply(); });
-
- // Make sure textures aren't bound to texture units, since going to bind them to framebuffer
- // components
- state.ResetTexture(src_tex);
- state.ResetTexture(dst_tex);
-
- state.draw.read_framebuffer = read_fb_handle;
- state.draw.draw_framebuffer = draw_fb_handle;
- state.Apply();
-
- u32 buffers = 0;
-
- if (type == SurfaceType::ColorTexture) {
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex,
- 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
- 0);
-
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dst_tex,
- 0);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
- 0);
-
- buffers = GL_COLOR_BUFFER_BIT;
- } else if (type == SurfaceType::Depth) {
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, src_tex, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
-
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, dst_tex, 0);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
-
- buffers = GL_DEPTH_BUFFER_BIT;
- } else if (type == SurfaceType::DepthStencil) {
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- src_tex, 0);
-
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
- glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
- dst_tex, 0);
-
- buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
- }
-
- glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left,
- dst_rect.bottom, dst_rect.right, dst_rect.top, buffers,
- buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);
+ glCopyImageSubData(src_tex, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, dst_tex,
+ GL_TEXTURE_2D, 0, dst_rect.left, dst_rect.bottom, 0, src_rect.GetWidth(),
+ src_rect.GetHeight(), 0);
return true;
}
@@ -594,7 +575,7 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint
glCompressedTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format,
static_cast<GLsizei>(rect.GetWidth() * GetCompresssionFactor()),
static_cast<GLsizei>(rect.GetHeight() * GetCompresssionFactor()), 0,
- size, &gl_buffer[buffer_offset]);
+ static_cast<GLsizei>(size), &gl_buffer[buffer_offset]);
} else {
glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
@@ -933,9 +914,6 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatc
// Use GetSurfaceSubRect instead
ASSERT(params.width == params.stride);
- ASSERT(!params.is_tiled ||
- (params.GetActualWidth() % 8 == 0 && params.GetActualHeight() % 8 == 0));
-
// Check for an exact match in existing surfaces
Surface surface =
FindMatch<MatchFlags::Exact | MatchFlags::Invalid>(surface_cache, params, match_res_scale);
@@ -1078,8 +1056,11 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
params.addr = config.tic.Address();
params.is_tiled = config.tic.IsTiled();
params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format);
- params.width = config.tic.Width() / params.GetCompresssionFactor();
- params.height = config.tic.Height() / params.GetCompresssionFactor();
+
+ params.width = Common::AlignUp(config.tic.Width(), params.GetCompresssionFactor()) /
+ params.GetCompresssionFactor();
+ params.height = Common::AlignUp(config.tic.Height(), params.GetCompresssionFactor()) /
+ params.GetCompresssionFactor();
// TODO(Subv): Different types per component are not supported.
ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() &&
@@ -1090,6 +1071,13 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
if (config.tic.IsTiled()) {
params.block_height = config.tic.BlockHeight();
+
+ // TODO(bunnei): The below align up is a hack. This is here because some compressed textures
+ // are not a multiple of their own compression factor, and so this accounts for that. This
+ // could potentially result in an extra row of 4px being decoded if a texture is not a
+ // multiple of 4.
+ params.width = Common::AlignUp(params.width, 4);
+ params.height = Common::AlignUp(params.height, 4);
} else {
// Use the texture-provided stride value if the texture isn't tiled.
params.stride = static_cast<u32>(params.PixelsInBytes(config.tic.Pitch()));
@@ -1097,23 +1085,6 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
params.UpdateParams();
- if (config.tic.Width() % 8 != 0 || config.tic.Height() % 8 != 0 ||
- params.stride != params.width) {
- Surface src_surface;
- MathUtil::Rectangle<u32> rect;
- std::tie(src_surface, rect) = GetSurfaceSubRect(params, ScaleMatch::Ignore, true);
-
- params.res_scale = src_surface->res_scale;
- Surface tmp_surface = CreateSurface(params);
- BlitTextures(src_surface->texture.handle, rect, tmp_surface->texture.handle,
- tmp_surface->GetScaledRect(),
- SurfaceParams::GetFormatType(params.pixel_format), read_framebuffer.handle,
- draw_framebuffer.handle);
-
- remove_surfaces.emplace(tmp_surface);
- return tmp_surface;
- }
-
return GetSurface(params, ScaleMatch::Ignore, true);
}
@@ -1288,7 +1259,7 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, Tegra::GPUVA
const auto interval = *it & validate_interval;
// Look for a valid surface to copy from
- SurfaceParams params = surface->FromInterval(interval);
+ SurfaceParams params = *surface;
Surface copy_surface =
FindMatch<MatchFlags::Copy>(surface_cache, params, ScaleMatch::Ignore, interval);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 6f08678ab..9da945e19 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -60,10 +60,12 @@ struct SurfaceParams {
A1B5G5R5 = 3,
R8 = 4,
RGBA16F = 5,
- DXT1 = 6,
- DXT23 = 7,
- DXT45 = 8,
- DXN1 = 9, // This is also known as BC4
+ R11FG11FB10F = 6,
+ DXT1 = 7,
+ DXT23 = 8,
+ DXT45 = 9,
+ DXN1 = 10, // This is also known as BC4
+ ASTC_2D_4X4 = 11,
Max,
Invalid = 255,
@@ -104,11 +106,13 @@ struct SurfaceParams {
1, // A2B10G10R10
1, // A1B5G5R5
1, // R8
- 2, // RGBA16F
+ 1, // RGBA16F
+ 1, // R11FG11FB10F
4, // DXT1
4, // DXT23
4, // DXT45
4, // DXN1
+ 1, // ASTC_2D_4X4
}};
ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
@@ -129,10 +133,12 @@ struct SurfaceParams {
16, // A1B5G5R5
8, // R8
64, // RGBA16F
+ 32, // R11FG11FB10F
64, // DXT1
128, // DXT23
128, // DXT45
64, // DXN1
+ 32, // ASTC_2D_4X4
}};
ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -151,12 +157,23 @@ struct SurfaceParams {
return PixelFormat::A2B10G10R10;
case Tegra::RenderTargetFormat::RGBA16_FLOAT:
return PixelFormat::RGBA16F;
+ case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
+ return PixelFormat::R11FG11FB10F;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
}
}
+ static bool IsFormatASTC(PixelFormat format) {
+ switch (format) {
+ case PixelFormat::ASTC_2D_4X4:
+ return true;
+ default:
+ return false;
+ }
+ }
+
static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
switch (format) {
case Tegra::FramebufferConfig::PixelFormat::ABGR8:
@@ -182,6 +199,8 @@ struct SurfaceParams {
return PixelFormat::R8;
case Tegra::Texture::TextureFormat::R16_G16_B16_A16:
return PixelFormat::RGBA16F;
+ case Tegra::Texture::TextureFormat::BF10GF11RF11:
+ return PixelFormat::R11FG11FB10F;
case Tegra::Texture::TextureFormat::DXT1:
return PixelFormat::DXT1;
case Tegra::Texture::TextureFormat::DXT23:
@@ -190,6 +209,8 @@ struct SurfaceParams {
return PixelFormat::DXT45;
case Tegra::Texture::TextureFormat::DXN1:
return PixelFormat::DXN1;
+ case Tegra::Texture::TextureFormat::ASTC_2D_4X4:
+ return PixelFormat::ASTC_2D_4X4;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
@@ -211,6 +232,8 @@ struct SurfaceParams {
return Tegra::Texture::TextureFormat::R8;
case PixelFormat::RGBA16F:
return Tegra::Texture::TextureFormat::R16_G16_B16_A16;
+ case PixelFormat::R11FG11FB10F:
+ return Tegra::Texture::TextureFormat::BF10GF11RF11;
case PixelFormat::DXT1:
return Tegra::Texture::TextureFormat::DXT1;
case PixelFormat::DXT23:
@@ -219,6 +242,8 @@ struct SurfaceParams {
return Tegra::Texture::TextureFormat::DXT45;
case PixelFormat::DXN1:
return Tegra::Texture::TextureFormat::DXN1;
+ case PixelFormat::ASTC_2D_4X4:
+ return Tegra::Texture::TextureFormat::ASTC_2D_4X4;
default:
UNREACHABLE();
}
@@ -243,6 +268,7 @@ struct SurfaceParams {
case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
return ComponentType::UNorm;
case Tegra::RenderTargetFormat::RGBA16_FLOAT:
+ case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
return ComponentType::Float;
default:
NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index f886e49ca..65fed77ef 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -16,11 +16,11 @@ namespace Decompiler {
using Tegra::Shader::Attribute;
using Tegra::Shader::Instruction;
+using Tegra::Shader::LogicOperation;
using Tegra::Shader::OpCode;
using Tegra::Shader::Register;
using Tegra::Shader::Sampler;
using Tegra::Shader::SubOp;
-using Tegra::Shader::Uniform;
constexpr u32 PROGRAM_END = MAX_PROGRAM_CODE_LENGTH;
@@ -267,6 +267,27 @@ public:
}
/**
+ * Returns code that does an integer size conversion for the specified size.
+ * @param value Value to perform integer size conversion on.
+ * @param size Register size to use for conversion instructions.
+ * @returns GLSL string corresponding to the value converted to the specified size.
+ */
+ static std::string ConvertIntegerSize(const std::string& value, Register::Size size) {
+ switch (size) {
+ case Register::Size::Byte:
+ return "((" + value + " << 24) >> 24)";
+ case Register::Size::Short:
+ return "((" + value + " << 16) >> 16)";
+ case Register::Size::Word:
+ // Default - do nothing
+ return value;
+ default:
+ NGLOG_CRITICAL(HW_GPU, "Unimplemented conversion size {}", static_cast<u32>(size));
+ UNREACHABLE();
+ }
+ }
+
+ /**
* Gets a register as an float.
* @param reg The register to get.
* @param elem The element to use for the operation.
@@ -282,15 +303,18 @@ public:
* @param reg The register to get.
* @param elem The element to use for the operation.
* @param is_signed Whether to get the register as a signed (or unsigned) integer.
+ * @param size Register size to use for conversion instructions.
* @returns GLSL string corresponding to the register as an integer.
*/
- std::string GetRegisterAsInteger(const Register& reg, unsigned elem = 0,
- bool is_signed = true) {
+ std::string GetRegisterAsInteger(const Register& reg, unsigned elem = 0, bool is_signed = true,
+ Register::Size size = Register::Size::Word) {
const std::string func = GetGLSLConversionFunc(
GLSLRegister::Type::Float,
is_signed ? GLSLRegister::Type::Integer : GLSLRegister::Type::UnsignedInteger);
- return func + '(' + GetRegister(reg, elem) + ')';
+ std::string value = func + '(' + GetRegister(reg, elem) + ')';
+
+ return ConvertIntegerSize(value, size);
}
/**
@@ -300,13 +324,15 @@ public:
* @param value The code representing the value to assign.
* @param dest_num_components Number of components in the destination.
* @param value_num_components Number of components in the value.
- * @param is_abs Optional, when True, applies absolute value to output.
+ * @param is_saturated Optional, when True, saturates the provided value.
* @param dest_elem Optional, the destination element to use for the operation.
*/
void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value,
- u64 dest_num_components, u64 value_num_components, bool is_abs = false,
- u64 dest_elem = 0) {
- SetRegister(reg, elem, value, dest_num_components, value_num_components, is_abs, dest_elem);
+ u64 dest_num_components, u64 value_num_components,
+ bool is_saturated = false, u64 dest_elem = 0) {
+
+ SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value,
+ dest_num_components, value_num_components, dest_elem);
}
/**
@@ -316,18 +342,22 @@ public:
* @param value The code representing the value to assign.
* @param dest_num_components Number of components in the destination.
* @param value_num_components Number of components in the value.
- * @param is_abs Optional, when True, applies absolute value to output.
+ * @param is_saturated Optional, when True, saturates the provided value.
* @param dest_elem Optional, the destination element to use for the operation.
+ * @param size Register size to use for conversion instructions.
*/
void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem,
const std::string& value, u64 dest_num_components,
- u64 value_num_components, bool is_abs = false, u64 dest_elem = 0) {
+ u64 value_num_components, bool is_saturated = false,
+ u64 dest_elem = 0, Register::Size size = Register::Size::Word) {
+ ASSERT_MSG(!is_saturated, "Unimplemented");
+
const std::string func = GetGLSLConversionFunc(
is_signed ? GLSLRegister::Type::Integer : GLSLRegister::Type::UnsignedInteger,
GLSLRegister::Type::Float);
- SetRegister(reg, elem, func + '(' + value + ')', dest_num_components, value_num_components,
- is_abs, dest_elem);
+ SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')',
+ dest_num_components, value_num_components, dest_elem);
}
/**
@@ -365,11 +395,9 @@ public:
}
/// Generates code representing a uniform (C buffer) register, interpreted as the input type.
- std::string GetUniform(const Uniform& uniform, GLSLRegister::Type type) {
- declr_const_buffers[uniform.index].MarkAsUsed(static_cast<unsigned>(uniform.index),
- static_cast<unsigned>(uniform.offset), stage);
- std::string value =
- 'c' + std::to_string(uniform.index) + '[' + std::to_string(uniform.offset) + ']';
+ std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type) {
+ declr_const_buffers[index].MarkAsUsed(index, offset, stage);
+ std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset) + ']';
if (type == GLSLRegister::Type::Float) {
return value;
@@ -380,10 +408,19 @@ public:
}
}
- /// Generates code representing a uniform (C buffer) register, interpreted as the type of the
- /// destination register.
- std::string GetUniform(const Uniform& uniform, const Register& dest_reg) {
- return GetUniform(uniform, regs[dest_reg].GetActiveType());
+ std::string GetUniformIndirect(u64 index, s64 offset, const Register& index_reg,
+ GLSLRegister::Type type) {
+ declr_const_buffers[index].MarkAsUsedIndirect(index, stage);
+ std::string value = 'c' + std::to_string(index) + "[(floatBitsToInt(" +
+ GetRegister(index_reg, 0) + ") + " + std::to_string(offset) + ") / 4]";
+
+ if (type == GLSLRegister::Type::Float) {
+ return value;
+ } else if (type == GLSLRegister::Type::Integer) {
+ return "floatBitsToInt(" + value + ')';
+ } else {
+ UNREACHABLE();
+ }
}
/// Add declarations for registers
@@ -425,6 +462,14 @@ public:
++const_buffer_layout;
}
declarations.AddNewLine();
+
+ // Append the sampler2D array for the used textures.
+ size_t num_samplers = GetSamplers().size();
+ if (num_samplers > 0) {
+ declarations.AddLine("uniform sampler2D " + SamplerEntry::GetArrayName(stage) + '[' +
+ std::to_string(num_samplers) + "];");
+ declarations.AddNewLine();
+ }
}
/// Returns a list of constant buffer declarations
@@ -435,6 +480,32 @@ public:
return result;
}
+ /// Returns a list of samplers used in the shader
+ std::vector<SamplerEntry> GetSamplers() const {
+ return used_samplers;
+ }
+
+ /// Returns the GLSL sampler used for the input shader sampler, and creates a new one if
+ /// necessary.
+ std::string AccessSampler(const Sampler& sampler) {
+ size_t offset = static_cast<size_t>(sampler.index.Value());
+
+ // If this sampler has already been used, return the existing mapping.
+ auto itr =
+ std::find_if(used_samplers.begin(), used_samplers.end(),
+ [&](const SamplerEntry& entry) { return entry.GetOffset() == offset; });
+
+ if (itr != used_samplers.end()) {
+ return itr->GetName();
+ }
+
+ // Otherwise create a new mapping for this sampler
+ size_t next_index = used_samplers.size();
+ SamplerEntry entry{stage, offset, next_index};
+ used_samplers.emplace_back(entry);
+ return entry.GetName();
+ }
+
private:
/// Build GLSL conversion function, e.g. floatBitsToInt, intBitsToFloat, etc.
const std::string GetGLSLConversionFunc(GLSLRegister::Type src, GLSLRegister::Type dest) const {
@@ -460,13 +531,11 @@ private:
* @param value The code representing the value to assign.
* @param dest_num_components Number of components in the destination.
* @param value_num_components Number of components in the value.
- * @param is_abs Optional, when True, applies absolute value to output.
* @param dest_elem Optional, the destination element to use for the operation.
*/
void SetRegister(const Register& reg, u64 elem, const std::string& value,
- u64 dest_num_components, u64 value_num_components, bool is_abs,
- u64 dest_elem) {
- std::string dest = GetRegister(reg, dest_elem);
+ u64 dest_num_components, u64 value_num_components, u64 dest_elem) {
+ std::string dest = GetRegister(reg, static_cast<u32>(dest_elem));
if (dest_num_components > 1) {
dest += GetSwizzle(elem);
}
@@ -476,8 +545,6 @@ private:
src += GetSwizzle(elem);
}
- src = is_abs ? "abs(" + src + ')' : src;
-
shader.AddLine(dest + " = " + src + ';');
}
@@ -498,7 +565,7 @@ private:
// vertex shader, and what's the value of the fourth element when inside a Tess Eval
// shader.
ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex);
- return "vec4(0, 0, gl_InstanceID, gl_VertexID)";
+ return "vec4(0, 0, uintBitsToFloat(gl_InstanceID), uintBitsToFloat(gl_VertexID))";
default:
const u32 index{static_cast<u32>(attribute) -
static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -544,6 +611,7 @@ private:
std::set<Attribute::Index> declr_input_attribute;
std::set<Attribute::Index> declr_output_attribute;
std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers;
+ std::vector<SamplerEntry> used_samplers;
const Maxwell3D::Regs::ShaderStage& stage;
};
@@ -563,7 +631,7 @@ public:
/// Returns entries in the shader that are useful for external functions
ShaderEntries GetEntries() const {
- return {regs.GetConstBuffersDeclarations()};
+ return {regs.GetConstBuffersDeclarations(), regs.GetSamplers()};
}
private:
@@ -585,12 +653,8 @@ private:
}
/// Generates code representing a texture sampler.
- std::string GetSampler(const Sampler& sampler) const {
- // TODO(Subv): Support more than just texture sampler 0
- ASSERT_MSG(sampler.index == Sampler::Index::Sampler_0, "unsupported");
- const unsigned index{static_cast<unsigned>(sampler.index.Value()) -
- static_cast<unsigned>(Sampler::Index::Sampler_0)};
- return "tex[" + std::to_string(index) + ']';
+ std::string GetSampler(const Sampler& sampler) {
+ return regs.AccessSampler(sampler);
}
/**
@@ -696,6 +760,31 @@ private:
return (absolute_offset % SchedPeriod) == 0;
}
+ void WriteLogicOperation(Register dest, LogicOperation logic_op, const std::string& op_a,
+ const std::string& op_b) {
+ switch (logic_op) {
+ case LogicOperation::And: {
+ regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " & " + op_b + ')', 1, 1);
+ break;
+ }
+ case LogicOperation::Or: {
+ regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " | " + op_b + ')', 1, 1);
+ break;
+ }
+ case LogicOperation::Xor: {
+ regs.SetRegisterToInteger(dest, true, 0, '(' + op_a + " ^ " + op_b + ')', 1, 1);
+ break;
+ }
+ case LogicOperation::PassB: {
+ regs.SetRegisterToInteger(dest, true, 0, op_b, 1, 1);
+ break;
+ }
+ default:
+ NGLOG_CRITICAL(HW_GPU, "Unimplemented logic operation: {}", static_cast<u32>(logic_op));
+ UNREACHABLE();
+ }
+ }
+
/**
* Compiles a single instruction from Tegra to GLSL.
* @param offset the offset of the Tegra shader instruction.
@@ -733,21 +822,25 @@ private:
switch (opcode->GetType()) {
case OpCode::Type::Arithmetic: {
- std::string op_a = instr.alu.negate_a ? "-" : "";
- op_a += regs.GetRegisterAsFloat(instr.gpr8);
+ std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
if (instr.alu.abs_a) {
op_a = "abs(" + op_a + ')';
}
- std::string op_b = instr.alu.negate_b ? "-" : "";
+ if (instr.alu.negate_a) {
+ op_a = "-(" + op_a + ')';
+ }
+
+ std::string op_b;
if (instr.is_b_imm) {
- op_b += GetImmediate19(instr);
+ op_b = GetImmediate19(instr);
} else {
if (instr.is_b_gpr) {
- op_b += regs.GetRegisterAsFloat(instr.gpr20);
+ op_b = regs.GetRegisterAsFloat(instr.gpr20);
} else {
- op_b += regs.GetUniform(instr.uniform, instr.gpr0);
+ op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+ GLSLRegister::Type::Float);
}
}
@@ -755,6 +848,10 @@ private:
op_b = "abs(" + op_b + ')';
}
+ if (instr.alu.negate_b) {
+ op_b = "-(" + op_b + ')';
+ }
+
switch (opcode->GetId()) {
case OpCode::Id::MOV_C:
case OpCode::Id::MOV_R: {
@@ -762,58 +859,49 @@ private:
break;
}
- case OpCode::Id::MOV32_IMM: {
- // mov32i doesn't have abs or neg bits.
- regs.SetRegisterToFloat(instr.gpr0, 0, GetImmediate32(instr), 1, 1);
- break;
- }
case OpCode::Id::FMUL_C:
case OpCode::Id::FMUL_R:
case OpCode::Id::FMUL_IMM: {
- regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1, instr.alu.abs_d);
- break;
- }
- case OpCode::Id::FMUL32_IMM: {
- // fmul32i doesn't have abs or neg bits.
- regs.SetRegisterToFloat(
- instr.gpr0, 0,
- regs.GetRegisterAsFloat(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1);
+ regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1,
+ instr.alu.saturate_d);
break;
}
case OpCode::Id::FADD_C:
case OpCode::Id::FADD_R:
case OpCode::Id::FADD_IMM: {
- regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, instr.alu.abs_d);
+ regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1,
+ instr.alu.saturate_d);
break;
}
case OpCode::Id::MUFU: {
switch (instr.sub_op) {
case SubOp::Cos:
regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1,
- instr.alu.abs_d);
+ instr.alu.saturate_d);
break;
case SubOp::Sin:
regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1,
- instr.alu.abs_d);
+ instr.alu.saturate_d);
break;
case SubOp::Ex2:
regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1,
- instr.alu.abs_d);
+ instr.alu.saturate_d);
break;
case SubOp::Lg2:
regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1,
- instr.alu.abs_d);
+ instr.alu.saturate_d);
break;
case SubOp::Rcp:
- regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1, instr.alu.abs_d);
+ regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1,
+ instr.alu.saturate_d);
break;
case SubOp::Rsq:
regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1,
- instr.alu.abs_d);
+ instr.alu.saturate_d);
break;
case SubOp::Min:
regs.SetRegisterToFloat(instr.gpr0, 0, "min(" + op_a + "," + op_b + ')', 1, 1,
- instr.alu.abs_d);
+ instr.alu.saturate_d);
break;
default:
NGLOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}",
@@ -850,52 +938,49 @@ private:
}
break;
}
- case OpCode::Type::Logic: {
- std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, false);
-
- if (instr.alu.lop.invert_a)
- op_a = "~(" + op_a + ')';
-
+ case OpCode::Type::ArithmeticImmediate: {
switch (opcode->GetId()) {
- case OpCode::Id::LOP32I: {
- u32 imm = static_cast<u32>(instr.alu.imm20_32.Value());
+ case OpCode::Id::MOV32_IMM: {
+ regs.SetRegisterToFloat(instr.gpr0, 0, GetImmediate32(instr), 1, 1);
+ break;
+ }
+ case OpCode::Id::FMUL32_IMM: {
+ regs.SetRegisterToFloat(
+ instr.gpr0, 0,
+ regs.GetRegisterAsFloat(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1);
+ break;
+ }
+ }
+ break;
+ }
+ case OpCode::Type::Bfe: {
+ ASSERT_MSG(!instr.bfe.negate_b, "Unimplemented");
- if (instr.alu.lop.invert_b)
- imm = ~imm;
+ std::string op_a = instr.bfe.negate_a ? "-" : "";
+ op_a += regs.GetRegisterAsInteger(instr.gpr8);
- switch (instr.alu.lop.operation) {
- case Tegra::Shader::LogicOperation::And: {
- regs.SetRegisterToInteger(instr.gpr0, false, 0,
- '(' + op_a + " & " + std::to_string(imm) + ')', 1, 1);
- break;
- }
- case Tegra::Shader::LogicOperation::Or: {
- regs.SetRegisterToInteger(instr.gpr0, false, 0,
- '(' + op_a + " | " + std::to_string(imm) + ')', 1, 1);
- break;
- }
- case Tegra::Shader::LogicOperation::Xor: {
- regs.SetRegisterToInteger(instr.gpr0, false, 0,
- '(' + op_a + " ^ " + std::to_string(imm) + ')', 1, 1);
- break;
- }
- default:
- NGLOG_CRITICAL(HW_GPU, "Unimplemented lop32i operation: {}",
- static_cast<u32>(instr.alu.lop.operation.Value()));
- UNREACHABLE();
- }
+ switch (opcode->GetId()) {
+ case OpCode::Id::BFE_IMM: {
+ std::string inner_shift =
+ '(' + op_a + " << " + std::to_string(instr.bfe.GetLeftShiftValue()) + ')';
+ std::string outer_shift =
+ '(' + inner_shift + " >> " +
+ std::to_string(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position) + ')';
+
+ regs.SetRegisterToInteger(instr.gpr0, true, 0, outer_shift, 1, 1);
break;
}
default: {
- NGLOG_CRITICAL(HW_GPU, "Unhandled logic instruction: {}", opcode->GetName());
+ NGLOG_CRITICAL(HW_GPU, "Unhandled BFE instruction: {}", opcode->GetName());
UNREACHABLE();
}
}
+
break;
}
case OpCode::Type::Shift: {
- std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, false);
+ std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, true);
std::string op_b;
if (instr.is_b_imm) {
@@ -904,11 +989,25 @@ private:
if (instr.is_b_gpr) {
op_b += regs.GetRegisterAsInteger(instr.gpr20);
} else {
- op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Integer);
+ op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+ GLSLRegister::Type::Integer);
}
}
switch (opcode->GetId()) {
+ case OpCode::Id::SHR_C:
+ case OpCode::Id::SHR_R:
+ case OpCode::Id::SHR_IMM: {
+ if (!instr.shift.is_signed) {
+ // Logical shift right
+ op_a = "uint(" + op_a + ')';
+ }
+
+ // Cast to int is superfluous for arithmetic shift, it's only for a logical shift
+ regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(" + op_a + " >> " + op_b + ')',
+ 1, 1);
+ break;
+ }
case OpCode::Id::SHL_C:
case OpCode::Id::SHL_R:
case OpCode::Id::SHL_IMM:
@@ -922,28 +1021,101 @@ private:
break;
}
- case OpCode::Type::ScaledAdd: {
+ case OpCode::Type::ArithmeticIntegerImmediate: {
std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
+ std::string op_b = std::to_string(instr.alu.imm20_32.Value());
- if (instr.iscadd.negate_a)
- op_a = '-' + op_a;
+ switch (opcode->GetId()) {
+ case OpCode::Id::IADD32I:
+ if (instr.iadd32i.negate_a)
+ op_a = "-(" + op_a + ')';
+
+ regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
+ instr.iadd32i.saturate != 0);
+ break;
+ case OpCode::Id::LOP32I: {
+ if (instr.alu.lop32i.invert_a)
+ op_a = "~(" + op_a + ')';
- std::string op_b = instr.iscadd.negate_b ? "-" : "";
+ if (instr.alu.lop32i.invert_b)
+ op_b = "~(" + op_b + ')';
+ WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b);
+ break;
+ }
+ default: {
+ NGLOG_CRITICAL(HW_GPU, "Unhandled ArithmeticIntegerImmediate instruction: {}",
+ opcode->GetName());
+ UNREACHABLE();
+ }
+ }
+ break;
+ }
+ case OpCode::Type::ArithmeticInteger: {
+ std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
+ std::string op_b;
if (instr.is_b_imm) {
op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
} else {
if (instr.is_b_gpr) {
op_b += regs.GetRegisterAsInteger(instr.gpr20);
} else {
- op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Integer);
+ op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+ GLSLRegister::Type::Integer);
}
}
- std::string shift = std::to_string(instr.iscadd.shift_amount.Value());
+ switch (opcode->GetId()) {
+ case OpCode::Id::IADD_C:
+ case OpCode::Id::IADD_R:
+ case OpCode::Id::IADD_IMM: {
+ if (instr.alu_integer.negate_a)
+ op_a = "-(" + op_a + ')';
+
+ if (instr.alu_integer.negate_b)
+ op_b = "-(" + op_b + ')';
+
+ regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
+ instr.alu.saturate_d);
+ break;
+ }
+ case OpCode::Id::ISCADD_C:
+ case OpCode::Id::ISCADD_R:
+ case OpCode::Id::ISCADD_IMM: {
+ if (instr.alu_integer.negate_a)
+ op_a = "-(" + op_a + ')';
+
+ if (instr.alu_integer.negate_b)
+ op_b = "-(" + op_b + ')';
+
+ std::string shift = std::to_string(instr.alu_integer.shift_amount.Value());
+
+ regs.SetRegisterToInteger(instr.gpr0, true, 0,
+ "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1);
+ break;
+ }
+ case OpCode::Id::LOP_C:
+ case OpCode::Id::LOP_R:
+ case OpCode::Id::LOP_IMM: {
+ ASSERT_MSG(!instr.alu.lop.unk44, "Unimplemented");
+ ASSERT_MSG(instr.alu.lop.pred48 == Pred::UnusedIndex, "Unimplemented");
+
+ if (instr.alu.lop.invert_a)
+ op_a = "~(" + op_a + ')';
+
+ if (instr.alu.lop.invert_b)
+ op_b = "~(" + op_b + ')';
+
+ WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b);
+ break;
+ }
+ default: {
+ NGLOG_CRITICAL(HW_GPU, "Unhandled ArithmeticInteger instruction: {}",
+ opcode->GetName());
+ UNREACHABLE();
+ }
+ }
- regs.SetRegisterToInteger(instr.gpr0, true, 0,
- "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1);
break;
}
case OpCode::Type::Ffma: {
@@ -953,7 +1125,8 @@ private:
switch (opcode->GetId()) {
case OpCode::Id::FFMA_CR: {
- op_b += regs.GetUniform(instr.uniform, instr.gpr0);
+ op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+ GLSLRegister::Type::Float);
op_c += regs.GetRegisterAsFloat(instr.gpr39);
break;
}
@@ -964,7 +1137,8 @@ private:
}
case OpCode::Id::FFMA_RC: {
op_b += regs.GetRegisterAsFloat(instr.gpr39);
- op_c += regs.GetUniform(instr.uniform, instr.gpr0);
+ op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+ GLSLRegister::Type::Float);
break;
}
case OpCode::Id::FFMA_IMM: {
@@ -978,31 +1152,33 @@ private:
}
}
- regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + " + " + op_c, 1, 1);
+ regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + " + " + op_c, 1, 1,
+ instr.alu.saturate_d);
break;
}
case OpCode::Type::Conversion: {
- ASSERT_MSG(instr.conversion.size == Register::Size::Word, "Unimplemented");
ASSERT_MSG(!instr.conversion.negate_a, "Unimplemented");
- ASSERT_MSG(!instr.conversion.saturate_a, "Unimplemented");
switch (opcode->GetId()) {
case OpCode::Id::I2I_R: {
ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
- std::string op_a =
- regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_signed);
+ std::string op_a = regs.GetRegisterAsInteger(
+ instr.gpr20, 0, instr.conversion.is_input_signed, instr.conversion.src_size);
if (instr.conversion.abs_a) {
op_a = "abs(" + op_a + ')';
}
- regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_signed, 0, op_a, 1, 1);
+ regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
+ 1, instr.alu.saturate_d, 0, instr.conversion.dest_size);
break;
}
case OpCode::Id::I2F_R: {
- std::string op_a =
- regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_signed);
+ ASSERT_MSG(instr.conversion.dest_size == Register::Size::Word, "Unimplemented");
+ ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
+ std::string op_a = regs.GetRegisterAsInteger(
+ instr.gpr20, 0, instr.conversion.is_input_signed, instr.conversion.src_size);
if (instr.conversion.abs_a) {
op_a = "abs(" + op_a + ')';
@@ -1012,13 +1188,71 @@ private:
break;
}
case OpCode::Id::F2F_R: {
+ ASSERT_MSG(instr.conversion.dest_size == Register::Size::Word, "Unimplemented");
+ ASSERT_MSG(instr.conversion.src_size == Register::Size::Word, "Unimplemented");
std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
+ switch (instr.conversion.f2f.rounding) {
+ case Tegra::Shader::F2fRoundingOp::None:
+ break;
+ case Tegra::Shader::F2fRoundingOp::Floor:
+ op_a = "floor(" + op_a + ')';
+ break;
+ case Tegra::Shader::F2fRoundingOp::Ceil:
+ op_a = "ceil(" + op_a + ')';
+ break;
+ case Tegra::Shader::F2fRoundingOp::Trunc:
+ op_a = "trunc(" + op_a + ')';
+ break;
+ default:
+ NGLOG_CRITICAL(HW_GPU, "Unimplemented f2f rounding mode {}",
+ static_cast<u32>(instr.conversion.f2f.rounding.Value()));
+ UNREACHABLE();
+ break;
+ }
+
if (instr.conversion.abs_a) {
op_a = "abs(" + op_a + ')';
}
- regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
+ regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d);
+ break;
+ }
+ case OpCode::Id::F2I_R: {
+ ASSERT_MSG(instr.conversion.src_size == Register::Size::Word, "Unimplemented");
+ std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
+
+ if (instr.conversion.abs_a) {
+ op_a = "abs(" + op_a + ')';
+ }
+
+ switch (instr.conversion.f2i.rounding) {
+ case Tegra::Shader::F2iRoundingOp::None:
+ break;
+ case Tegra::Shader::F2iRoundingOp::Floor:
+ op_a = "floor(" + op_a + ')';
+ break;
+ case Tegra::Shader::F2iRoundingOp::Ceil:
+ op_a = "ceil(" + op_a + ')';
+ break;
+ case Tegra::Shader::F2iRoundingOp::Trunc:
+ op_a = "trunc(" + op_a + ')';
+ break;
+ default:
+ NGLOG_CRITICAL(HW_GPU, "Unimplemented f2i rounding mode {}",
+ static_cast<u32>(instr.conversion.f2i.rounding.Value()));
+ UNREACHABLE();
+ break;
+ }
+
+ if (instr.conversion.is_output_signed) {
+ op_a = "int(" + op_a + ')';
+ } else {
+ op_a = "uint(" + op_a + ')';
+ }
+
+ regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
+ 1, false, 0, instr.conversion.dest_size);
break;
}
default: {
@@ -1029,36 +1263,60 @@ private:
break;
}
case OpCode::Type::Memory: {
- const Attribute::Index attribute = instr.attribute.fmt20.index;
-
switch (opcode->GetId()) {
case OpCode::Id::LD_A: {
ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
regs.SetRegisterToInputAttibute(instr.gpr0, instr.attribute.fmt20.element,
- attribute);
+ instr.attribute.fmt20.index);
+ break;
+ }
+ case OpCode::Id::LD_C: {
+ ASSERT_MSG(instr.ld_c.unknown == 0, "Unimplemented");
+
+ std::string op_a =
+ regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, instr.gpr8,
+ GLSLRegister::Type::Float);
+ std::string op_b =
+ regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, instr.gpr8,
+ GLSLRegister::Type::Float);
+
+ switch (instr.ld_c.type.Value()) {
+ case Tegra::Shader::UniformType::Single:
+ regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
+ break;
+
+ case Tegra::Shader::UniformType::Double:
+ regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
+ regs.SetRegisterToFloat(instr.gpr0.Value() + 1, 0, op_b, 1, 1);
+ break;
+
+ default:
+ NGLOG_CRITICAL(HW_GPU, "Unhandled type: {}",
+ static_cast<unsigned>(instr.ld_c.type.Value()));
+ UNREACHABLE();
+ }
break;
}
case OpCode::Id::ST_A: {
ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
- regs.SetOutputAttributeToRegister(attribute, instr.attribute.fmt20.element,
- instr.gpr0);
+ regs.SetOutputAttributeToRegister(instr.attribute.fmt20.index,
+ instr.attribute.fmt20.element, instr.gpr0);
break;
}
case OpCode::Id::TEX: {
- ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested");
const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
const std::string sampler = GetSampler(instr.sampler);
const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
- // Add an extra scope and declare the texture coords inside to prevent overwriting
- // them in case they are used as outputs of the texs instruction.
+ // Add an extra scope and declare the texture coords inside to prevent
+ // overwriting them in case they are used as outputs of the texs instruction.
shader.AddLine("{");
++shader.scope;
shader.AddLine(coord);
const std::string texture = "texture(" + sampler + ", coords)";
size_t dest_elem{};
- for (size_t elem = 0; elem < instr.attribute.fmt20.size; ++elem) {
+ for (size_t elem = 0; elem < 4; ++elem) {
if (!instr.tex.IsComponentEnabled(elem)) {
// Skip disabled components
continue;
@@ -1071,7 +1329,6 @@ private:
break;
}
case OpCode::Id::TEXS: {
- ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested");
const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
const std::string sampler = GetSampler(instr.sampler);
@@ -1083,8 +1340,8 @@ private:
shader.AddLine(coord);
const std::string texture = "texture(" + sampler + ", coords)";
- // TEXS has two destination registers. RG goes into gpr0+0 and gpr0+1, and BA goes
- // into gpr28+0 and gpr28+1
+ // TEXS has two destination registers. RG goes into gpr0+0 and gpr0+1, and BA
+ // goes into gpr28+0 and gpr28+1
size_t offset{};
for (const auto& dest : {instr.gpr0.Value(), instr.gpr28.Value()}) {
@@ -1134,7 +1391,8 @@ private:
if (instr.is_b_gpr) {
op_b += regs.GetRegisterAsFloat(instr.gpr20);
} else {
- op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Float);
+ op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+ GLSLRegister::Type::Float);
}
}
@@ -1167,15 +1425,17 @@ private:
}
case OpCode::Type::IntegerSetPredicate: {
std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.isetp.is_signed);
+ std::string op_b;
- std::string op_b{};
-
- ASSERT_MSG(!instr.is_b_imm, "ISETP_IMM not implemented");
-
- if (instr.is_b_gpr) {
- op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.isetp.is_signed);
+ if (instr.is_b_imm) {
+ op_b += '(' + std::to_string(instr.alu.GetSignedImm20_20()) + ')';
} else {
- op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Integer);
+ if (instr.is_b_gpr) {
+ op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.isetp.is_signed);
+ } else {
+ op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+ GLSLRegister::Type::Integer);
+ }
}
using Tegra::Shader::Pred;
@@ -1221,7 +1481,8 @@ private:
if (instr.is_b_gpr) {
op_b += regs.GetRegisterAsFloat(instr.gpr20);
} else {
- op_b += regs.GetUniform(instr.uniform, GLSLRegister::Type::Float);
+ op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+ GLSLRegister::Type::Float);
}
}
@@ -1229,8 +1490,8 @@ private:
op_b = "abs(" + op_b + ')';
}
- // The fset instruction sets a register to 1.0 if the condition is true, and to 0
- // otherwise.
+ // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
+ // condition is true, and to 0 otherwise.
std::string second_pred =
GetPredicateCondition(instr.fset.pred39, instr.fset.neg_pred != 0);
@@ -1248,6 +1509,41 @@ private:
}
break;
}
+ case OpCode::Type::IntegerSet: {
+ std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.iset.is_signed);
+
+ std::string op_b;
+
+ if (instr.is_b_imm) {
+ op_b = std::to_string(instr.alu.GetSignedImm20_20());
+ } else {
+ if (instr.is_b_gpr) {
+ op_b = regs.GetRegisterAsInteger(instr.gpr20, 0, instr.iset.is_signed);
+ } else {
+ op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+ GLSLRegister::Type::Integer);
+ }
+ }
+
+ // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
+ // condition is true, and to 0 otherwise.
+ std::string second_pred =
+ GetPredicateCondition(instr.iset.pred39, instr.iset.neg_pred != 0);
+
+ std::string comparator = GetPredicateComparison(instr.iset.cond);
+ std::string combiner = GetPredicateCombiner(instr.iset.op);
+
+ std::string predicate = "(((" + op_a + ") " + comparator + " (" + op_b + ")) " +
+ combiner + " (" + second_pred + "))";
+
+ if (instr.iset.bf) {
+ regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
+ } else {
+ regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1,
+ 1);
+ }
+ break;
+ }
default: {
switch (opcode->GetId()) {
case OpCode::Id::EXIT: {
@@ -1261,8 +1557,8 @@ private:
shader.AddLine("return true;");
if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
- // If this is an unconditional exit then just end processing here, otherwise we
- // have to account for the possibility of the condition not being met, so
+ // If this is an unconditional exit then just end processing here, otherwise
+ // we have to account for the possibility of the condition not being met, so
// continue processing the next instruction.
offset = PROGRAM_END - 1;
}
@@ -1284,6 +1580,11 @@ private:
regs.SetRegisterToInputAttibute(instr.gpr0, attribute.element, attribute.index);
break;
}
+ case OpCode::Id::SSY: {
+ // The SSY opcode tells the GPU where to re-converge divergent execution paths, we
+ // can ignore this when generating GLSL code.
+ break;
+ }
default: {
NGLOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName());
UNREACHABLE();
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 254f6e2c3..c1e6fac9f 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -39,6 +39,10 @@ void main() {
// Viewport can be flipped, which is unsupported by glViewport
position.xy *= viewport_flip.xy;
gl_Position = position;
+
+ // TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0
+ // For now, this is here to bring order in lieu of proper emulation
+ position.w = 1.0;
}
)";
out += program.first;
@@ -62,8 +66,6 @@ layout (std140) uniform fs_config {
vec4 viewport_flip;
};
-uniform sampler2D tex[32];
-
void main() {
exec_shader();
}
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 458032b5c..ed890e0f9 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -22,17 +22,28 @@ class ConstBufferEntry {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
public:
- void MarkAsUsed(unsigned index, unsigned offset, Maxwell::ShaderStage stage) {
+ void MarkAsUsed(u64 index, u64 offset, Maxwell::ShaderStage stage) {
is_used = true;
- this->index = index;
+ this->index = static_cast<unsigned>(index);
+ this->stage = stage;
+ max_offset = std::max(max_offset, static_cast<unsigned>(offset));
+ }
+
+ void MarkAsUsedIndirect(u64 index, Maxwell::ShaderStage stage) {
+ is_used = true;
+ is_indirect = true;
+ this->index = static_cast<unsigned>(index);
this->stage = stage;
- max_offset = std::max(max_offset, offset);
}
bool IsUsed() const {
return is_used;
}
+ bool IsIndirect() const {
+ return is_indirect;
+ }
+
unsigned GetIndex() const {
return index;
}
@@ -51,13 +62,54 @@ private:
};
bool is_used{};
+ bool is_indirect{};
unsigned index{};
unsigned max_offset{};
Maxwell::ShaderStage stage;
};
+class SamplerEntry {
+ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+public:
+ SamplerEntry(Maxwell::ShaderStage stage, size_t offset, size_t index)
+ : offset(offset), stage(stage), sampler_index(index) {}
+
+ size_t GetOffset() const {
+ return offset;
+ }
+
+ size_t GetIndex() const {
+ return sampler_index;
+ }
+
+ Maxwell::ShaderStage GetStage() const {
+ return stage;
+ }
+
+ std::string GetName() const {
+ return std::string(TextureSamplerNames[static_cast<size_t>(stage)]) + '[' +
+ std::to_string(sampler_index) + ']';
+ }
+
+ static std::string GetArrayName(Maxwell::ShaderStage stage) {
+ return TextureSamplerNames[static_cast<size_t>(stage)];
+ }
+
+private:
+ static constexpr std::array<const char*, Maxwell::MaxShaderStage> TextureSamplerNames = {
+ "tex_vs", "tex_tessc", "tex_tesse", "tex_gs", "tex_fs",
+ };
+ /// Offset in TSC memory from which to read the sampler object, as specified by the sampling
+ /// instruction.
+ size_t offset;
+ Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used.
+ size_t sampler_index; ///< Value used to index into the generated GLSL sampler array.
+};
+
struct ShaderEntries {
std::vector<ConstBufferEntry> const_buffer_entries;
+ std::vector<SamplerEntry> texture_samplers;
};
using ProgramResult = std::pair<std::string, ShaderEntries>;
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index ccdfc2718..d7167b298 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -32,33 +32,14 @@ void SetShaderUniformBlockBindings(GLuint shader) {
sizeof(MaxwellUniformData));
}
-void SetShaderSamplerBindings(GLuint shader) {
- OpenGLState cur_state = OpenGLState::GetCurState();
- GLuint old_program = std::exchange(cur_state.draw.shader_program, shader);
- cur_state.Apply();
-
- // Set the texture samplers to correspond to different texture units
- for (u32 texture = 0; texture < NumTextureSamplers; ++texture) {
- // Set the texture samplers to correspond to different texture units
- std::string uniform_name = "tex[" + std::to_string(texture) + "]";
- GLint uniform_tex = glGetUniformLocation(shader, uniform_name.c_str());
- if (uniform_tex != -1) {
- glUniform1i(uniform_tex, TextureUnits::MaxwellTexture(texture).id);
- }
- }
-
- cur_state.draw.shader_program = old_program;
- cur_state.Apply();
-}
-
} // namespace Impl
void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {
const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
// TODO(bunnei): Support more than one viewport
- viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0 : 1.0;
- viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0 : 1.0;
+ viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
+ viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f;
}
} // namespace GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index e963b4b7e..4295c20a6 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -45,7 +45,6 @@ public:
shader.Create(program_result.first.c_str(), type);
program.Create(true, shader.handle);
Impl::SetShaderUniformBlockBindings(program.handle);
- Impl::SetShaderSamplerBindings(program.handle);
entries = program_result.second;
}
GLuint GetHandle() const {
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index f91dfe36a..1f1e48425 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -50,6 +50,10 @@ OpenGLState::OpenGLState() {
for (auto& texture_unit : texture_units) {
texture_unit.texture_2d = 0;
texture_unit.sampler = 0;
+ texture_unit.swizzle.r = GL_RED;
+ texture_unit.swizzle.g = GL_GREEN;
+ texture_unit.swizzle.b = GL_BLUE;
+ texture_unit.swizzle.a = GL_ALPHA;
}
lighting_lut.texture_buffer = 0;
@@ -192,13 +196,22 @@ void OpenGLState::Apply() const {
}
// Textures
- for (size_t i = 0; i < std::size(texture_units); ++i) {
+ for (int i = 0; i < std::size(texture_units); ++i) {
if (texture_units[i].texture_2d != cur_state.texture_units[i].texture_2d) {
glActiveTexture(TextureUnits::MaxwellTexture(i).Enum());
glBindTexture(GL_TEXTURE_2D, texture_units[i].texture_2d);
}
if (texture_units[i].sampler != cur_state.texture_units[i].sampler) {
- glBindSampler(i, texture_units[i].sampler);
+ glBindSampler(static_cast<GLuint>(i), texture_units[i].sampler);
+ }
+ // Update the texture swizzle
+ if (texture_units[i].swizzle.r != cur_state.texture_units[i].swizzle.r ||
+ texture_units[i].swizzle.g != cur_state.texture_units[i].swizzle.g ||
+ texture_units[i].swizzle.b != cur_state.texture_units[i].swizzle.b ||
+ texture_units[i].swizzle.a != cur_state.texture_units[i].swizzle.a) {
+ std::array<GLint, 4> mask = {texture_units[i].swizzle.r, texture_units[i].swizzle.g,
+ texture_units[i].swizzle.b, texture_units[i].swizzle.a};
+ glTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_RGBA, mask.data());
}
}
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 75c08e645..839e50e93 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -85,6 +85,12 @@ public:
struct {
GLuint texture_2d; // GL_TEXTURE_BINDING_2D
GLuint sampler; // GL_SAMPLER_BINDING
+ struct {
+ GLint r; // GL_TEXTURE_SWIZZLE_R
+ GLint g; // GL_TEXTURE_SWIZZLE_G
+ GLint b; // GL_TEXTURE_SWIZZLE_B
+ GLint a; // GL_TEXTURE_SWIZZLE_A
+ } swizzle;
} texture_units[32];
struct {
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index a630610d8..2155fb019 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -100,6 +100,8 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
switch (wrap_mode) {
case Tegra::Texture::WrapMode::Wrap:
return GL_REPEAT;
+ case Tegra::Texture::WrapMode::Mirror:
+ return GL_MIRRORED_REPEAT;
case Tegra::Texture::WrapMode::ClampToEdge:
return GL_CLAMP_TO_EDGE;
case Tegra::Texture::WrapMode::ClampOGL:
@@ -178,4 +180,25 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
return {};
}
+inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) {
+ switch (source) {
+ case Tegra::Texture::SwizzleSource::Zero:
+ return GL_ZERO;
+ case Tegra::Texture::SwizzleSource::R:
+ return GL_RED;
+ case Tegra::Texture::SwizzleSource::G:
+ return GL_GREEN;
+ case Tegra::Texture::SwizzleSource::B:
+ return GL_BLUE;
+ case Tegra::Texture::SwizzleSource::A:
+ return GL_ALPHA;
+ case Tegra::Texture::SwizzleSource::OneInt:
+ case Tegra::Texture::SwizzleSource::OneFloat:
+ return GL_ONE;
+ }
+ NGLOG_CRITICAL(Render_OpenGL, "Unimplemented swizzle source={}", static_cast<u32>(source));
+ UNREACHABLE();
+ return {};
+}
+
} // namespace MaxwellToGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 3440d2190..f33766bfd 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -316,6 +316,7 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
}};
state.texture_units[0].texture_2d = screen_info.display_texture;
+ state.texture_units[0].swizzle = {GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
state.Apply();
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data());
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
new file mode 100644
index 000000000..3c4ad1c9d
--- /dev/null
+++ b/src/video_core/textures/astc.cpp
@@ -0,0 +1,1646 @@
+// Copyright 2016 The University of North Carolina at Chapel Hill
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Please send all BUG REPORTS to <pavel@cs.unc.edu>.
+// <http://gamma.cs.unc.edu/FasTC/>
+
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <vector>
+
+#include "video_core/textures/astc.h"
+
+class BitStream {
+public:
+ BitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0)
+ : m_BitsWritten(0), m_BitsRead(0), m_NumBits(nBits), m_CurByte(ptr),
+ m_NextBit(start_offset % 8), done(false) {}
+
+ int GetBitsWritten() const {
+ return m_BitsWritten;
+ }
+
+ ~BitStream() {}
+
+ void WriteBitsR(unsigned int val, unsigned int nBits) {
+ for (unsigned int i = 0; i < nBits; i++) {
+ WriteBit((val >> (nBits - i - 1)) & 1);
+ }
+ }
+
+ void WriteBits(unsigned int val, unsigned int nBits) {
+ for (unsigned int i = 0; i < nBits; i++) {
+ WriteBit((val >> i) & 1);
+ }
+ }
+
+ int GetBitsRead() const {
+ return m_BitsRead;
+ }
+
+ int ReadBit() {
+
+ int bit = *m_CurByte >> m_NextBit++;
+ while (m_NextBit >= 8) {
+ m_NextBit -= 8;
+ m_CurByte++;
+ }
+
+ m_BitsRead++;
+ return bit & 1;
+ }
+
+ unsigned int ReadBits(unsigned int nBits) {
+ unsigned int ret = 0;
+ for (unsigned int i = 0; i < nBits; i++) {
+ ret |= (ReadBit() & 1) << i;
+ }
+ return ret;
+ }
+
+private:
+ void WriteBit(int b) {
+
+ if (done)
+ return;
+
+ const unsigned int mask = 1 << m_NextBit++;
+
+ // clear the bit
+ *m_CurByte &= ~mask;
+
+ // Write the bit, if necessary
+ if (b)
+ *m_CurByte |= mask;
+
+ // Next byte?
+ if (m_NextBit >= 8) {
+ m_CurByte += 1;
+ m_NextBit = 0;
+ }
+
+ done = done || ++m_BitsWritten >= m_NumBits;
+ }
+
+ int m_BitsWritten;
+ const int m_NumBits;
+ unsigned char* m_CurByte;
+ int m_NextBit;
+ int m_BitsRead;
+
+ bool done;
+};
+
+template <typename IntType>
+class Bits {
+private:
+ const IntType& m_Bits;
+
+ // Don't copy
+ Bits() {}
+ Bits(const Bits&) {}
+ Bits& operator=(const Bits&) {}
+
+public:
+ explicit Bits(IntType& v) : m_Bits(v) {}
+
+ uint8_t operator[](uint32_t bitPos) {
+ return static_cast<uint8_t>((m_Bits >> bitPos) & 1);
+ }
+
+ IntType operator()(uint32_t start, uint32_t end) {
+ if (start == end) {
+ return (*this)[start];
+ } else if (start > end) {
+ uint32_t t = start;
+ start = end;
+ end = t;
+ }
+
+ uint64_t mask = (1 << (end - start + 1)) - 1;
+ return (m_Bits >> start) & mask;
+ }
+};
+
+enum EIntegerEncoding { eIntegerEncoding_JustBits, eIntegerEncoding_Quint, eIntegerEncoding_Trit };
+
+class IntegerEncodedValue {
+private:
+ const EIntegerEncoding m_Encoding;
+ const uint32_t m_NumBits;
+ uint32_t m_BitValue;
+ union {
+ uint32_t m_QuintValue;
+ uint32_t m_TritValue;
+ };
+
+public:
+ // Jank, but we're not doing any heavy lifting in this class, so it's
+ // probably OK. It allows us to use these in std::vectors...
+ IntegerEncodedValue& operator=(const IntegerEncodedValue& other) {
+ new (this) IntegerEncodedValue(other);
+ return *this;
+ }
+
+ IntegerEncodedValue(EIntegerEncoding encoding, uint32_t numBits)
+ : m_Encoding(encoding), m_NumBits(numBits) {}
+
+ EIntegerEncoding GetEncoding() const {
+ return m_Encoding;
+ }
+ uint32_t BaseBitLength() const {
+ return m_NumBits;
+ }
+
+ uint32_t GetBitValue() const {
+ return m_BitValue;
+ }
+ void SetBitValue(uint32_t val) {
+ m_BitValue = val;
+ }
+
+ uint32_t GetTritValue() const {
+ return m_TritValue;
+ }
+ void SetTritValue(uint32_t val) {
+ m_TritValue = val;
+ }
+
+ uint32_t GetQuintValue() const {
+ return m_QuintValue;
+ }
+ void SetQuintValue(uint32_t val) {
+ m_QuintValue = val;
+ }
+
+ bool MatchesEncoding(const IntegerEncodedValue& other) {
+ return m_Encoding == other.m_Encoding && m_NumBits == other.m_NumBits;
+ }
+
+ // Returns the number of bits required to encode nVals values.
+ uint32_t GetBitLength(uint32_t nVals) {
+ uint32_t totalBits = m_NumBits * nVals;
+ if (m_Encoding == eIntegerEncoding_Trit) {
+ totalBits += (nVals * 8 + 4) / 5;
+ } else if (m_Encoding == eIntegerEncoding_Quint) {
+ totalBits += (nVals * 7 + 2) / 3;
+ }
+ return totalBits;
+ }
+
+ // Count the number of bits set in a number.
+ static inline uint32_t Popcnt(uint32_t n) {
+ uint32_t c;
+ for (c = 0; n; c++) {
+ n &= n - 1;
+ }
+ return c;
+ }
+
+ // Returns a new instance of this struct that corresponds to the
+ // can take no more than maxval values
+ static IntegerEncodedValue CreateEncoding(uint32_t maxVal) {
+ while (maxVal > 0) {
+ uint32_t check = maxVal + 1;
+
+ // Is maxVal a power of two?
+ if (!(check & (check - 1))) {
+ return IntegerEncodedValue(eIntegerEncoding_JustBits, Popcnt(maxVal));
+ }
+
+ // Is maxVal of the type 3*2^n - 1?
+ if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) {
+ return IntegerEncodedValue(eIntegerEncoding_Trit, Popcnt(check / 3 - 1));
+ }
+
+ // Is maxVal of the type 5*2^n - 1?
+ if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) {
+ return IntegerEncodedValue(eIntegerEncoding_Quint, Popcnt(check / 5 - 1));
+ }
+
+ // Apparently it can't be represented with a bounded integer sequence...
+ // just iterate.
+ maxVal--;
+ }
+ return IntegerEncodedValue(eIntegerEncoding_JustBits, 0);
+ }
+
+ // Fills result with the values that are encoded in the given
+ // bitstream. We must know beforehand what the maximum possible
+ // value is, and how many values we're decoding.
+ static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, BitStream& bits,
+ uint32_t maxRange, uint32_t nValues) {
+ // Determine encoding parameters
+ IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange);
+
+ // Start decoding
+ uint32_t nValsDecoded = 0;
+ while (nValsDecoded < nValues) {
+ switch (val.GetEncoding()) {
+ case eIntegerEncoding_Quint:
+ DecodeQuintBlock(bits, result, val.BaseBitLength());
+ nValsDecoded += 3;
+ break;
+
+ case eIntegerEncoding_Trit:
+ DecodeTritBlock(bits, result, val.BaseBitLength());
+ nValsDecoded += 5;
+ break;
+
+ case eIntegerEncoding_JustBits:
+ val.SetBitValue(bits.ReadBits(val.BaseBitLength()));
+ result.push_back(val);
+ nValsDecoded++;
+ break;
+ }
+ }
+ }
+
+private:
+ static void DecodeTritBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result,
+ uint32_t nBitsPerValue) {
+ // Implement the algorithm in section C.2.12
+ uint32_t m[5];
+ uint32_t t[5];
+ uint32_t T;
+
+ // Read the trit encoded block according to
+ // table C.2.14
+ m[0] = bits.ReadBits(nBitsPerValue);
+ T = bits.ReadBits(2);
+ m[1] = bits.ReadBits(nBitsPerValue);
+ T |= bits.ReadBits(2) << 2;
+ m[2] = bits.ReadBits(nBitsPerValue);
+ T |= bits.ReadBit() << 4;
+ m[3] = bits.ReadBits(nBitsPerValue);
+ T |= bits.ReadBits(2) << 5;
+ m[4] = bits.ReadBits(nBitsPerValue);
+ T |= bits.ReadBit() << 7;
+
+ uint32_t C = 0;
+
+ Bits<uint32_t> Tb(T);
+ if (Tb(2, 4) == 7) {
+ C = (Tb(5, 7) << 2) | Tb(0, 1);
+ t[4] = t[3] = 2;
+ } else {
+ C = Tb(0, 4);
+ if (Tb(5, 6) == 3) {
+ t[4] = 2;
+ t[3] = Tb[7];
+ } else {
+ t[4] = Tb[7];
+ t[3] = Tb(5, 6);
+ }
+ }
+
+ Bits<uint32_t> Cb(C);
+ if (Cb(0, 1) == 3) {
+ t[2] = 2;
+ t[1] = Cb[4];
+ t[0] = (Cb[3] << 1) | (Cb[2] & ~Cb[3]);
+ } else if (Cb(2, 3) == 3) {
+ t[2] = 2;
+ t[1] = 2;
+ t[0] = Cb(0, 1);
+ } else {
+ t[2] = Cb[4];
+ t[1] = Cb(2, 3);
+ t[0] = (Cb[1] << 1) | (Cb[0] & ~Cb[1]);
+ }
+
+ for (uint32_t i = 0; i < 5; i++) {
+ IntegerEncodedValue val(eIntegerEncoding_Trit, nBitsPerValue);
+ val.SetBitValue(m[i]);
+ val.SetTritValue(t[i]);
+ result.push_back(val);
+ }
+ }
+
+ static void DecodeQuintBlock(BitStream& bits, std::vector<IntegerEncodedValue>& result,
+ uint32_t nBitsPerValue) {
+ // Implement the algorithm in section C.2.12
+ uint32_t m[3];
+ uint32_t q[3];
+ uint32_t Q;
+
+ // Read the trit encoded block according to
+ // table C.2.15
+ m[0] = bits.ReadBits(nBitsPerValue);
+ Q = bits.ReadBits(3);
+ m[1] = bits.ReadBits(nBitsPerValue);
+ Q |= bits.ReadBits(2) << 3;
+ m[2] = bits.ReadBits(nBitsPerValue);
+ Q |= bits.ReadBits(2) << 5;
+
+ Bits<uint32_t> Qb(Q);
+ if (Qb(1, 2) == 3 && Qb(5, 6) == 0) {
+ q[0] = q[1] = 4;
+ q[2] = (Qb[0] << 2) | ((Qb[4] & ~Qb[0]) << 1) | (Qb[3] & ~Qb[0]);
+ } else {
+ uint32_t C = 0;
+ if (Qb(1, 2) == 3) {
+ q[2] = 4;
+ C = (Qb(3, 4) << 3) | ((~Qb(5, 6) & 3) << 1) | Qb[0];
+ } else {
+ q[2] = Qb(5, 6);
+ C = Qb(0, 4);
+ }
+
+ Bits<uint32_t> Cb(C);
+ if (Cb(0, 2) == 5) {
+ q[1] = 4;
+ q[0] = Cb(3, 4);
+ } else {
+ q[1] = Cb(3, 4);
+ q[0] = Cb(0, 2);
+ }
+ }
+
+ for (uint32_t i = 0; i < 3; i++) {
+ IntegerEncodedValue val(eIntegerEncoding_Quint, nBitsPerValue);
+ val.m_BitValue = m[i];
+ val.m_QuintValue = q[i];
+ result.push_back(val);
+ }
+ }
+};
+
+namespace ASTCC {
+
+struct TexelWeightParams {
+ uint32_t m_Width;
+ uint32_t m_Height;
+ bool m_bDualPlane;
+ uint32_t m_MaxWeight;
+ bool m_bError;
+ bool m_bVoidExtentLDR;
+ bool m_bVoidExtentHDR;
+
+ TexelWeightParams() {
+ memset(this, 0, sizeof(*this));
+ }
+
+ uint32_t GetPackedBitSize() {
+ // How many indices do we have?
+ uint32_t nIdxs = m_Height * m_Width;
+ if (m_bDualPlane) {
+ nIdxs *= 2;
+ }
+
+ return IntegerEncodedValue::CreateEncoding(m_MaxWeight).GetBitLength(nIdxs);
+ }
+
+ uint32_t GetNumWeightValues() const {
+ uint32_t ret = m_Width * m_Height;
+ if (m_bDualPlane) {
+ ret *= 2;
+ }
+ return ret;
+ }
+};
+
+TexelWeightParams DecodeBlockInfo(BitStream& strm) {
+ TexelWeightParams params;
+
+ // Read the entire block mode all at once
+ uint16_t modeBits = strm.ReadBits(11);
+
+ // Does this match the void extent block mode?
+ if ((modeBits & 0x01FF) == 0x1FC) {
+ if (modeBits & 0x200) {
+ params.m_bVoidExtentHDR = true;
+ } else {
+ params.m_bVoidExtentLDR = true;
+ }
+
+ // Next two bits must be one.
+ if (!(modeBits & 0x400) || !strm.ReadBit()) {
+ params.m_bError = true;
+ }
+
+ return params;
+ }
+
+ // First check if the last four bits are zero
+ if ((modeBits & 0xF) == 0) {
+ params.m_bError = true;
+ return params;
+ }
+
+ // If the last two bits are zero, then if bits
+ // [6-8] are all ones, this is also reserved.
+ if ((modeBits & 0x3) == 0 && (modeBits & 0x1C0) == 0x1C0) {
+ params.m_bError = true;
+ return params;
+ }
+
+ // Otherwise, there is no error... Figure out the layout
+ // of the block mode. Layout is determined by a number
+ // between 0 and 9 corresponding to table C.2.8 of the
+ // ASTC spec.
+ uint32_t layout = 0;
+
+ if ((modeBits & 0x1) || (modeBits & 0x2)) {
+ // layout is in [0-4]
+ if (modeBits & 0x8) {
+ // layout is in [2-4]
+ if (modeBits & 0x4) {
+ // layout is in [3-4]
+ if (modeBits & 0x100) {
+ layout = 4;
+ } else {
+ layout = 3;
+ }
+ } else {
+ layout = 2;
+ }
+ } else {
+ // layout is in [0-1]
+ if (modeBits & 0x4) {
+ layout = 1;
+ } else {
+ layout = 0;
+ }
+ }
+ } else {
+ // layout is in [5-9]
+ if (modeBits & 0x100) {
+ // layout is in [7-9]
+ if (modeBits & 0x80) {
+ // layout is in [7-8]
+ assert((modeBits & 0x40) == 0U);
+ if (modeBits & 0x20) {
+ layout = 8;
+ } else {
+ layout = 7;
+ }
+ } else {
+ layout = 9;
+ }
+ } else {
+ // layout is in [5-6]
+ if (modeBits & 0x80) {
+ layout = 6;
+ } else {
+ layout = 5;
+ }
+ }
+ }
+
+ assert(layout < 10);
+
+ // Determine R
+ uint32_t R = !!(modeBits & 0x10);
+ if (layout < 5) {
+ R |= (modeBits & 0x3) << 1;
+ } else {
+ R |= (modeBits & 0xC) >> 1;
+ }
+ assert(2 <= R && R <= 7);
+
+ // Determine width & height
+ switch (layout) {
+ case 0: {
+ uint32_t A = (modeBits >> 5) & 0x3;
+ uint32_t B = (modeBits >> 7) & 0x3;
+ params.m_Width = B + 4;
+ params.m_Height = A + 2;
+ break;
+ }
+
+ case 1: {
+ uint32_t A = (modeBits >> 5) & 0x3;
+ uint32_t B = (modeBits >> 7) & 0x3;
+ params.m_Width = B + 8;
+ params.m_Height = A + 2;
+ break;
+ }
+
+ case 2: {
+ uint32_t A = (modeBits >> 5) & 0x3;
+ uint32_t B = (modeBits >> 7) & 0x3;
+ params.m_Width = A + 2;
+ params.m_Height = B + 8;
+ break;
+ }
+
+ case 3: {
+ uint32_t A = (modeBits >> 5) & 0x3;
+ uint32_t B = (modeBits >> 7) & 0x1;
+ params.m_Width = A + 2;
+ params.m_Height = B + 6;
+ break;
+ }
+
+ case 4: {
+ uint32_t A = (modeBits >> 5) & 0x3;
+ uint32_t B = (modeBits >> 7) & 0x1;
+ params.m_Width = B + 2;
+ params.m_Height = A + 2;
+ break;
+ }
+
+ case 5: {
+ uint32_t A = (modeBits >> 5) & 0x3;
+ params.m_Width = 12;
+ params.m_Height = A + 2;
+ break;
+ }
+
+ case 6: {
+ uint32_t A = (modeBits >> 5) & 0x3;
+ params.m_Width = A + 2;
+ params.m_Height = 12;
+ break;
+ }
+
+ case 7: {
+ params.m_Width = 6;
+ params.m_Height = 10;
+ break;
+ }
+
+ case 8: {
+ params.m_Width = 10;
+ params.m_Height = 6;
+ break;
+ }
+
+ case 9: {
+ uint32_t A = (modeBits >> 5) & 0x3;
+ uint32_t B = (modeBits >> 9) & 0x3;
+ params.m_Width = A + 6;
+ params.m_Height = B + 6;
+ break;
+ }
+
+ default:
+ assert(!"Don't know this layout...");
+ params.m_bError = true;
+ break;
+ }
+
+ // Determine whether or not we're using dual planes
+ // and/or high precision layouts.
+ bool D = (layout != 9) && (modeBits & 0x400);
+ bool H = (layout != 9) && (modeBits & 0x200);
+
+ if (H) {
+ const uint32_t maxWeights[6] = {9, 11, 15, 19, 23, 31};
+ params.m_MaxWeight = maxWeights[R - 2];
+ } else {
+ const uint32_t maxWeights[6] = {1, 2, 3, 4, 5, 7};
+ params.m_MaxWeight = maxWeights[R - 2];
+ }
+
+ params.m_bDualPlane = D;
+
+ return params;
+}
+
+void FillVoidExtentLDR(BitStream& strm, uint32_t* const outBuf, uint32_t blockWidth,
+ uint32_t blockHeight) {
+ // Don't actually care about the void extent, just read the bits...
+ for (int i = 0; i < 4; ++i) {
+ strm.ReadBits(13);
+ }
+
+ // Decode the RGBA components and renormalize them to the range [0, 255]
+ uint16_t r = strm.ReadBits(16);
+ uint16_t g = strm.ReadBits(16);
+ uint16_t b = strm.ReadBits(16);
+ uint16_t a = strm.ReadBits(16);
+
+ uint32_t rgba = (r >> 8) | (g & 0xFF00) | (static_cast<uint32_t>(b) & 0xFF00) << 8 |
+ (static_cast<uint32_t>(a) & 0xFF00) << 16;
+
+ for (uint32_t j = 0; j < blockHeight; j++)
+ for (uint32_t i = 0; i < blockWidth; i++) {
+ outBuf[j * blockWidth + i] = rgba;
+ }
+}
+
+void FillError(uint32_t* outBuf, uint32_t blockWidth, uint32_t blockHeight) {
+ for (uint32_t j = 0; j < blockHeight; j++)
+ for (uint32_t i = 0; i < blockWidth; i++) {
+ outBuf[j * blockWidth + i] = 0xFFFF00FF;
+ }
+}
+
+// Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)]
+// is the same as [(numBits - 1):0] and repeats all the way down.
+template <typename IntType>
+IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) {
+ if (numBits == 0)
+ return 0;
+ if (toBit == 0)
+ return 0;
+ IntType v = val & ((1 << numBits) - 1);
+ IntType res = v;
+ uint32_t reslen = numBits;
+ while (reslen < toBit) {
+ uint32_t comp = 0;
+ if (numBits > toBit - reslen) {
+ uint32_t newshift = toBit - reslen;
+ comp = numBits - newshift;
+ numBits = newshift;
+ }
+ res <<= numBits;
+ res |= v >> comp;
+ reslen += numBits;
+ }
+ return res;
+}
+
+class Pixel {
+protected:
+ typedef int16_t ChannelType;
+ uint8_t m_BitDepth[4];
+ int16_t color[4];
+
+public:
+ Pixel() {
+ for (int i = 0; i < 4; i++) {
+ m_BitDepth[i] = 8;
+ color[i] = 0;
+ }
+ }
+
+ Pixel(ChannelType a, ChannelType r, ChannelType g, ChannelType b, unsigned bitDepth = 8) {
+ for (int i = 0; i < 4; i++)
+ m_BitDepth[i] = bitDepth;
+
+ color[0] = a;
+ color[1] = r;
+ color[2] = g;
+ color[3] = b;
+ }
+
+ // Changes the depth of each pixel. This scales the values to
+ // the appropriate bit depth by either truncating the least
+ // significant bits when going from larger to smaller bit depth
+ // or by repeating the most significant bits when going from
+ // smaller to larger bit depths.
+ void ChangeBitDepth(const uint8_t (&depth)[4]) {
+ for (uint32_t i = 0; i < 4; i++) {
+ Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]);
+ m_BitDepth[i] = depth[i];
+ }
+ }
+
+ template <typename IntType>
+ static float ConvertChannelToFloat(IntType channel, uint8_t bitDepth) {
+ float denominator = static_cast<float>((1 << bitDepth) - 1);
+ return static_cast<float>(channel) / denominator;
+ }
+
+ // Changes the bit depth of a single component. See the comment
+ // above for how we do this.
+ static ChannelType ChangeBitDepth(Pixel::ChannelType val, uint8_t oldDepth, uint8_t newDepth) {
+ assert(newDepth <= 8);
+ assert(oldDepth <= 8);
+
+ if (oldDepth == newDepth) {
+ // Do nothing
+ return val;
+ } else if (oldDepth == 0 && newDepth != 0) {
+ return (1 << newDepth) - 1;
+ } else if (newDepth > oldDepth) {
+ return Replicate(val, oldDepth, newDepth);
+ } else {
+ // oldDepth > newDepth
+ if (newDepth == 0) {
+ return 0xFF;
+ } else {
+ uint8_t bitsWasted = oldDepth - newDepth;
+ uint16_t v = static_cast<uint16_t>(val);
+ v = (v + (1 << (bitsWasted - 1))) >> bitsWasted;
+ v = ::std::min<uint16_t>(::std::max<uint16_t>(0, v), (1 << newDepth) - 1);
+ return static_cast<uint8_t>(v);
+ }
+ }
+
+ assert(!"We shouldn't get here.");
+ return 0;
+ }
+
+ const ChannelType& A() const {
+ return color[0];
+ }
+ ChannelType& A() {
+ return color[0];
+ }
+ const ChannelType& R() const {
+ return color[1];
+ }
+ ChannelType& R() {
+ return color[1];
+ }
+ const ChannelType& G() const {
+ return color[2];
+ }
+ ChannelType& G() {
+ return color[2];
+ }
+ const ChannelType& B() const {
+ return color[3];
+ }
+ ChannelType& B() {
+ return color[3];
+ }
+ const ChannelType& Component(uint32_t idx) const {
+ return color[idx];
+ }
+ ChannelType& Component(uint32_t idx) {
+ return color[idx];
+ }
+
+ void GetBitDepth(uint8_t (&outDepth)[4]) const {
+ for (int i = 0; i < 4; i++) {
+ outDepth[i] = m_BitDepth[i];
+ }
+ }
+
+ // Take all of the components, transform them to their 8-bit variants,
+ // and then pack each channel into an R8G8B8A8 32-bit integer. We assume
+ // that the architecture is little-endian, so the alpha channel will end
+ // up in the most-significant byte.
+ uint32_t Pack() const {
+ Pixel eightBit(*this);
+ const uint8_t eightBitDepth[4] = {8, 8, 8, 8};
+ eightBit.ChangeBitDepth(eightBitDepth);
+
+ uint32_t r = 0;
+ r |= eightBit.A();
+ r <<= 8;
+ r |= eightBit.B();
+ r <<= 8;
+ r |= eightBit.G();
+ r <<= 8;
+ r |= eightBit.R();
+ return r;
+ }
+
+ // Clamps the pixel to the range [0,255]
+ void ClampByte() {
+ for (uint32_t i = 0; i < 4; i++) {
+ color[i] = (color[i] < 0) ? 0 : ((color[i] > 255) ? 255 : color[i]);
+ }
+ }
+
+ void MakeOpaque() {
+ A() = 255;
+ }
+};
+
+void DecodeColorValues(uint32_t* out, uint8_t* data, uint32_t* modes, const uint32_t nPartitions,
+ const uint32_t nBitsForColorData) {
+ // First figure out how many color values we have
+ uint32_t nValues = 0;
+ for (uint32_t i = 0; i < nPartitions; i++) {
+ nValues += ((modes[i] >> 2) + 1) << 1;
+ }
+
+ // Then based on the number of values and the remaining number of bits,
+ // figure out the max value for each of them...
+ uint32_t range = 256;
+ while (--range > 0) {
+ IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(range);
+ uint32_t bitLength = val.GetBitLength(nValues);
+ if (bitLength <= nBitsForColorData) {
+ // Find the smallest possible range that matches the given encoding
+ while (--range > 0) {
+ IntegerEncodedValue newval = IntegerEncodedValue::CreateEncoding(range);
+ if (!newval.MatchesEncoding(val)) {
+ break;
+ }
+ }
+
+ // Return to last matching range.
+ range++;
+ break;
+ }
+ }
+
+ // We now have enough to decode our integer sequence.
+ std::vector<IntegerEncodedValue> decodedColorValues;
+ BitStream colorStream(data);
+ IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues);
+
+ // Once we have the decoded values, we need to dequantize them to the 0-255 range
+ // This procedure is outlined in ASTC spec C.2.13
+ uint32_t outIdx = 0;
+ std::vector<IntegerEncodedValue>::const_iterator itr;
+ for (itr = decodedColorValues.begin(); itr != decodedColorValues.end(); itr++) {
+ // Have we already decoded all that we need?
+ if (outIdx >= nValues) {
+ break;
+ }
+
+ const IntegerEncodedValue& val = *itr;
+ uint32_t bitlen = val.BaseBitLength();
+ uint32_t bitval = val.GetBitValue();
+
+ assert(bitlen >= 1);
+
+ uint32_t A = 0, B = 0, C = 0, D = 0;
+ // A is just the lsb replicated 9 times.
+ A = Replicate(bitval & 1, 1, 9);
+
+ switch (val.GetEncoding()) {
+ // Replicate bits
+ case eIntegerEncoding_JustBits:
+ out[outIdx++] = Replicate(bitval, bitlen, 8);
+ break;
+
+ // Use algorithm in C.2.13
+ case eIntegerEncoding_Trit: {
+
+ D = val.GetTritValue();
+
+ switch (bitlen) {
+ case 1: {
+ C = 204;
+ } break;
+
+ case 2: {
+ C = 93;
+ // B = b000b0bb0
+ uint32_t b = (bitval >> 1) & 1;
+ B = (b << 8) | (b << 4) | (b << 2) | (b << 1);
+ } break;
+
+ case 3: {
+ C = 44;
+ // B = cb000cbcb
+ uint32_t cb = (bitval >> 1) & 3;
+ B = (cb << 7) | (cb << 2) | cb;
+ } break;
+
+ case 4: {
+ C = 22;
+ // B = dcb000dcb
+ uint32_t dcb = (bitval >> 1) & 7;
+ B = (dcb << 6) | dcb;
+ } break;
+
+ case 5: {
+ C = 11;
+ // B = edcb000ed
+ uint32_t edcb = (bitval >> 1) & 0xF;
+ B = (edcb << 5) | (edcb >> 2);
+ } break;
+
+ case 6: {
+ C = 5;
+ // B = fedcb000f
+ uint32_t fedcb = (bitval >> 1) & 0x1F;
+ B = (fedcb << 4) | (fedcb >> 4);
+ } break;
+
+ default:
+ assert(!"Unsupported trit encoding for color values!");
+ break;
+ } // switch(bitlen)
+ } // case eIntegerEncoding_Trit
+ break;
+
+ case eIntegerEncoding_Quint: {
+
+ D = val.GetQuintValue();
+
+ switch (bitlen) {
+ case 1: {
+ C = 113;
+ } break;
+
+ case 2: {
+ C = 54;
+ // B = b0000bb00
+ uint32_t b = (bitval >> 1) & 1;
+ B = (b << 8) | (b << 3) | (b << 2);
+ } break;
+
+ case 3: {
+ C = 26;
+ // B = cb0000cbc
+ uint32_t cb = (bitval >> 1) & 3;
+ B = (cb << 7) | (cb << 1) | (cb >> 1);
+ } break;
+
+ case 4: {
+ C = 13;
+ // B = dcb0000dc
+ uint32_t dcb = (bitval >> 1) & 7;
+ B = (dcb << 6) | (dcb >> 1);
+ } break;
+
+ case 5: {
+ C = 6;
+ // B = edcb0000e
+ uint32_t edcb = (bitval >> 1) & 0xF;
+ B = (edcb << 5) | (edcb >> 3);
+ } break;
+
+ default:
+ assert(!"Unsupported quint encoding for color values!");
+ break;
+ } // switch(bitlen)
+ } // case eIntegerEncoding_Quint
+ break;
+ } // switch(val.GetEncoding())
+
+ if (val.GetEncoding() != eIntegerEncoding_JustBits) {
+ uint32_t T = D * C + B;
+ T ^= A;
+ T = (A & 0x80) | (T >> 2);
+ out[outIdx++] = T;
+ }
+ }
+
+ // Make sure that each of our values is in the proper range...
+ for (uint32_t i = 0; i < nValues; i++) {
+ assert(out[i] <= 255);
+ }
+}
+
+uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) {
+ uint32_t bitval = val.GetBitValue();
+ uint32_t bitlen = val.BaseBitLength();
+
+ uint32_t A = Replicate(bitval & 1, 1, 7);
+ uint32_t B = 0, C = 0, D = 0;
+
+ uint32_t result = 0;
+ switch (val.GetEncoding()) {
+ case eIntegerEncoding_JustBits:
+ result = Replicate(bitval, bitlen, 6);
+ break;
+
+ case eIntegerEncoding_Trit: {
+ D = val.GetTritValue();
+ assert(D < 3);
+
+ switch (bitlen) {
+ case 0: {
+ uint32_t results[3] = {0, 32, 63};
+ result = results[D];
+ } break;
+
+ case 1: {
+ C = 50;
+ } break;
+
+ case 2: {
+ C = 23;
+ uint32_t b = (bitval >> 1) & 1;
+ B = (b << 6) | (b << 2) | b;
+ } break;
+
+ case 3: {
+ C = 11;
+ uint32_t cb = (bitval >> 1) & 3;
+ B = (cb << 5) | cb;
+ } break;
+
+ default:
+ assert(!"Invalid trit encoding for texel weight");
+ break;
+ }
+ } break;
+
+ case eIntegerEncoding_Quint: {
+ D = val.GetQuintValue();
+ assert(D < 5);
+
+ switch (bitlen) {
+ case 0: {
+ uint32_t results[5] = {0, 16, 32, 47, 63};
+ result = results[D];
+ } break;
+
+ case 1: {
+ C = 28;
+ } break;
+
+ case 2: {
+ C = 13;
+ uint32_t b = (bitval >> 1) & 1;
+ B = (b << 6) | (b << 1);
+ } break;
+
+ default:
+ assert(!"Invalid quint encoding for texel weight");
+ break;
+ }
+ } break;
+ }
+
+ if (val.GetEncoding() != eIntegerEncoding_JustBits && bitlen > 0) {
+ // Decode the value...
+ result = D * C + B;
+ result ^= A;
+ result = (A & 0x20) | (result >> 2);
+ }
+
+ assert(result < 64);
+
+ // Change from [0,63] to [0,64]
+ if (result > 32) {
+ result += 1;
+ }
+
+ return result;
+}
+
+void UnquantizeTexelWeights(uint32_t out[2][144], std::vector<IntegerEncodedValue>& weights,
+ const TexelWeightParams& params, const uint32_t blockWidth,
+ const uint32_t blockHeight) {
+ uint32_t weightIdx = 0;
+ uint32_t unquantized[2][144];
+ std::vector<IntegerEncodedValue>::const_iterator itr;
+ for (itr = weights.begin(); itr != weights.end(); itr++) {
+ unquantized[0][weightIdx] = UnquantizeTexelWeight(*itr);
+
+ if (params.m_bDualPlane) {
+ itr++;
+ unquantized[1][weightIdx] = UnquantizeTexelWeight(*itr);
+ if (itr == weights.end()) {
+ break;
+ }
+ }
+
+ if (++weightIdx >= (params.m_Width * params.m_Height))
+ break;
+ }
+
+ // Do infill if necessary (Section C.2.18) ...
+ uint32_t Ds = (1024 + (blockWidth / 2)) / (blockWidth - 1);
+ uint32_t Dt = (1024 + (blockHeight / 2)) / (blockHeight - 1);
+
+ const uint32_t kPlaneScale = params.m_bDualPlane ? 2U : 1U;
+ for (uint32_t plane = 0; plane < kPlaneScale; plane++)
+ for (uint32_t t = 0; t < blockHeight; t++)
+ for (uint32_t s = 0; s < blockWidth; s++) {
+ uint32_t cs = Ds * s;
+ uint32_t ct = Dt * t;
+
+ uint32_t gs = (cs * (params.m_Width - 1) + 32) >> 6;
+ uint32_t gt = (ct * (params.m_Height - 1) + 32) >> 6;
+
+ uint32_t js = gs >> 4;
+ uint32_t fs = gs & 0xF;
+
+ uint32_t jt = gt >> 4;
+ uint32_t ft = gt & 0x0F;
+
+ uint32_t w11 = (fs * ft + 8) >> 4;
+ uint32_t w10 = ft - w11;
+ uint32_t w01 = fs - w11;
+ uint32_t w00 = 16 - fs - ft + w11;
+
+ uint32_t v0 = js + jt * params.m_Width;
+
+#define FIND_TEXEL(tidx, bidx) \
+ uint32_t p##bidx = 0; \
+ do { \
+ if ((tidx) < (params.m_Width * params.m_Height)) { \
+ p##bidx = unquantized[plane][(tidx)]; \
+ } \
+ } while (0)
+
+ FIND_TEXEL(v0, 00);
+ FIND_TEXEL(v0 + 1, 01);
+ FIND_TEXEL(v0 + params.m_Width, 10);
+ FIND_TEXEL(v0 + params.m_Width + 1, 11);
+
+#undef FIND_TEXEL
+
+ out[plane][t * blockWidth + s] =
+ (p00 * w00 + p01 * w01 + p10 * w10 + p11 * w11 + 8) >> 4;
+ }
+}
+
+// Transfers a bit as described in C.2.14
+static inline void BitTransferSigned(int32_t& a, int32_t& b) {
+ b >>= 1;
+ b |= a & 0x80;
+ a >>= 1;
+ a &= 0x3F;
+ if (a & 0x20)
+ a -= 0x40;
+}
+
+// Adds more precision to the blue channel as described
+// in C.2.14
+static inline Pixel BlueContract(int32_t a, int32_t r, int32_t g, int32_t b) {
+ return Pixel(static_cast<int16_t>(a), static_cast<int16_t>((r + b) >> 1),
+ static_cast<int16_t>((g + b) >> 1), static_cast<int16_t>(b));
+}
+
+// Partition selection functions as specified in
+// C.2.21
+static inline uint32_t hash52(uint32_t p) {
+ p ^= p >> 15;
+ p -= p << 17;
+ p += p << 7;
+ p += p << 4;
+ p ^= p >> 5;
+ p += p << 16;
+ p ^= p >> 7;
+ p ^= p >> 3;
+ p ^= p << 6;
+ p ^= p >> 17;
+ return p;
+}
+
+static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z,
+ int32_t partitionCount, int32_t smallBlock) {
+ if (1 == partitionCount)
+ return 0;
+
+ if (smallBlock) {
+ x <<= 1;
+ y <<= 1;
+ z <<= 1;
+ }
+
+ seed += (partitionCount - 1) * 1024;
+
+ uint32_t rnum = hash52(static_cast<uint32_t>(seed));
+ uint8_t seed1 = static_cast<uint8_t>(rnum & 0xF);
+ uint8_t seed2 = static_cast<uint8_t>((rnum >> 4) & 0xF);
+ uint8_t seed3 = static_cast<uint8_t>((rnum >> 8) & 0xF);
+ uint8_t seed4 = static_cast<uint8_t>((rnum >> 12) & 0xF);
+ uint8_t seed5 = static_cast<uint8_t>((rnum >> 16) & 0xF);
+ uint8_t seed6 = static_cast<uint8_t>((rnum >> 20) & 0xF);
+ uint8_t seed7 = static_cast<uint8_t>((rnum >> 24) & 0xF);
+ uint8_t seed8 = static_cast<uint8_t>((rnum >> 28) & 0xF);
+ uint8_t seed9 = static_cast<uint8_t>((rnum >> 18) & 0xF);
+ uint8_t seed10 = static_cast<uint8_t>((rnum >> 22) & 0xF);
+ uint8_t seed11 = static_cast<uint8_t>((rnum >> 26) & 0xF);
+ uint8_t seed12 = static_cast<uint8_t>(((rnum >> 30) | (rnum << 2)) & 0xF);
+
+ seed1 *= seed1;
+ seed2 *= seed2;
+ seed3 *= seed3;
+ seed4 *= seed4;
+ seed5 *= seed5;
+ seed6 *= seed6;
+ seed7 *= seed7;
+ seed8 *= seed8;
+ seed9 *= seed9;
+ seed10 *= seed10;
+ seed11 *= seed11;
+ seed12 *= seed12;
+
+ int32_t sh1, sh2, sh3;
+ if (seed & 1) {
+ sh1 = (seed & 2) ? 4 : 5;
+ sh2 = (partitionCount == 3) ? 6 : 5;
+ } else {
+ sh1 = (partitionCount == 3) ? 6 : 5;
+ sh2 = (seed & 2) ? 4 : 5;
+ }
+ sh3 = (seed & 0x10) ? sh1 : sh2;
+
+ seed1 >>= sh1;
+ seed2 >>= sh2;
+ seed3 >>= sh1;
+ seed4 >>= sh2;
+ seed5 >>= sh1;
+ seed6 >>= sh2;
+ seed7 >>= sh1;
+ seed8 >>= sh2;
+ seed9 >>= sh3;
+ seed10 >>= sh3;
+ seed11 >>= sh3;
+ seed12 >>= sh3;
+
+ int32_t a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
+ int32_t b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
+ int32_t c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
+ int32_t d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
+
+ a &= 0x3F;
+ b &= 0x3F;
+ c &= 0x3F;
+ d &= 0x3F;
+
+ if (partitionCount < 4)
+ d = 0;
+ if (partitionCount < 3)
+ c = 0;
+
+ if (a >= b && a >= c && a >= d)
+ return 0;
+ else if (b >= c && b >= d)
+ return 1;
+ else if (c >= d)
+ return 2;
+ return 3;
+}
+
+static inline uint32_t Select2DPartition(int32_t seed, int32_t x, int32_t y, int32_t partitionCount,
+ int32_t smallBlock) {
+ return SelectPartition(seed, x, y, 0, partitionCount, smallBlock);
+}
+
+// Section C.2.14
+void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValues,
+ uint32_t colorEndpointMode) {
+#define READ_UINT_VALUES(N) \
+ uint32_t v[N]; \
+ for (uint32_t i = 0; i < N; i++) { \
+ v[i] = *(colorValues++); \
+ }
+
+#define READ_INT_VALUES(N) \
+ int32_t v[N]; \
+ for (uint32_t i = 0; i < N; i++) { \
+ v[i] = static_cast<int32_t>(*(colorValues++)); \
+ }
+
+ switch (colorEndpointMode) {
+ case 0: {
+ READ_UINT_VALUES(2)
+ ep1 = Pixel(0xFF, v[0], v[0], v[0]);
+ ep2 = Pixel(0xFF, v[1], v[1], v[1]);
+ } break;
+
+ case 1: {
+ READ_UINT_VALUES(2)
+ uint32_t L0 = (v[0] >> 2) | (v[1] & 0xC0);
+ uint32_t L1 = std::max(L0 + (v[1] & 0x3F), 0xFFU);
+ ep1 = Pixel(0xFF, L0, L0, L0);
+ ep2 = Pixel(0xFF, L1, L1, L1);
+ } break;
+
+ case 4: {
+ READ_UINT_VALUES(4)
+ ep1 = Pixel(v[2], v[0], v[0], v[0]);
+ ep2 = Pixel(v[3], v[1], v[1], v[1]);
+ } break;
+
+ case 5: {
+ READ_INT_VALUES(4)
+ BitTransferSigned(v[1], v[0]);
+ BitTransferSigned(v[3], v[2]);
+ ep1 = Pixel(v[2], v[0], v[0], v[0]);
+ ep2 = Pixel(v[2] + v[3], v[0] + v[1], v[0] + v[1], v[0] + v[1]);
+ ep1.ClampByte();
+ ep2.ClampByte();
+ } break;
+
+ case 6: {
+ READ_UINT_VALUES(4)
+ ep1 = Pixel(0xFF, v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8);
+ ep2 = Pixel(0xFF, v[0], v[1], v[2]);
+ } break;
+
+ case 8: {
+ READ_UINT_VALUES(6)
+ if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4]) {
+ ep1 = Pixel(0xFF, v[0], v[2], v[4]);
+ ep2 = Pixel(0xFF, v[1], v[3], v[5]);
+ } else {
+ ep1 = BlueContract(0xFF, v[1], v[3], v[5]);
+ ep2 = BlueContract(0xFF, v[0], v[2], v[4]);
+ }
+ } break;
+
+ case 9: {
+ READ_INT_VALUES(6)
+ BitTransferSigned(v[1], v[0]);
+ BitTransferSigned(v[3], v[2]);
+ BitTransferSigned(v[5], v[4]);
+ if (v[1] + v[3] + v[5] >= 0) {
+ ep1 = Pixel(0xFF, v[0], v[2], v[4]);
+ ep2 = Pixel(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5]);
+ } else {
+ ep1 = BlueContract(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5]);
+ ep2 = BlueContract(0xFF, v[0], v[2], v[4]);
+ }
+ ep1.ClampByte();
+ ep2.ClampByte();
+ } break;
+
+ case 10: {
+ READ_UINT_VALUES(6)
+ ep1 = Pixel(v[4], v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8);
+ ep2 = Pixel(v[5], v[0], v[1], v[2]);
+ } break;
+
+ case 12: {
+ READ_UINT_VALUES(8)
+ if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4]) {
+ ep1 = Pixel(v[6], v[0], v[2], v[4]);
+ ep2 = Pixel(v[7], v[1], v[3], v[5]);
+ } else {
+ ep1 = BlueContract(v[7], v[1], v[3], v[5]);
+ ep2 = BlueContract(v[6], v[0], v[2], v[4]);
+ }
+ } break;
+
+ case 13: {
+ READ_INT_VALUES(8)
+ BitTransferSigned(v[1], v[0]);
+ BitTransferSigned(v[3], v[2]);
+ BitTransferSigned(v[5], v[4]);
+ BitTransferSigned(v[7], v[6]);
+ if (v[1] + v[3] + v[5] >= 0) {
+ ep1 = Pixel(v[6], v[0], v[2], v[4]);
+ ep2 = Pixel(v[7] + v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5]);
+ } else {
+ ep1 = BlueContract(v[6] + v[7], v[0] + v[1], v[2] + v[3], v[4] + v[5]);
+ ep2 = BlueContract(v[6], v[0], v[2], v[4]);
+ }
+ ep1.ClampByte();
+ ep2.ClampByte();
+ } break;
+
+ default:
+ assert(!"Unsupported color endpoint mode (is it HDR?)");
+ break;
+ }
+
+#undef READ_UINT_VALUES
+#undef READ_INT_VALUES
+}
+
+void DecompressBlock(uint8_t inBuf[16], const uint32_t blockWidth, const uint32_t blockHeight,
+ uint32_t* outBuf) {
+ BitStream strm(inBuf);
+ TexelWeightParams weightParams = DecodeBlockInfo(strm);
+
+ // Was there an error?
+ if (weightParams.m_bError) {
+ assert(!"Invalid block mode");
+ FillError(outBuf, blockWidth, blockHeight);
+ return;
+ }
+
+ if (weightParams.m_bVoidExtentLDR) {
+ FillVoidExtentLDR(strm, outBuf, blockWidth, blockHeight);
+ return;
+ }
+
+ if (weightParams.m_bVoidExtentHDR) {
+ assert(!"HDR void extent blocks are unsupported!");
+ FillError(outBuf, blockWidth, blockHeight);
+ return;
+ }
+
+ if (weightParams.m_Width > blockWidth) {
+ assert(!"Texel weight grid width should be smaller than block width");
+ FillError(outBuf, blockWidth, blockHeight);
+ return;
+ }
+
+ if (weightParams.m_Height > blockHeight) {
+ assert(!"Texel weight grid height should be smaller than block height");
+ FillError(outBuf, blockWidth, blockHeight);
+ return;
+ }
+
+ // Read num partitions
+ uint32_t nPartitions = strm.ReadBits(2) + 1;
+ assert(nPartitions <= 4);
+
+ if (nPartitions == 4 && weightParams.m_bDualPlane) {
+ assert(!"Dual plane mode is incompatible with four partition blocks");
+ FillError(outBuf, blockWidth, blockHeight);
+ return;
+ }
+
+ // Based on the number of partitions, read the color endpoint mode for
+ // each partition.
+
+ // Determine partitions, partition index, and color endpoint modes
+ int32_t planeIdx = -1;
+ uint32_t partitionIndex;
+ uint32_t colorEndpointMode[4] = {0, 0, 0, 0};
+
+ // Define color data.
+ uint8_t colorEndpointData[16];
+ memset(colorEndpointData, 0, sizeof(colorEndpointData));
+ BitStream colorEndpointStream(colorEndpointData, 16 * 8, 0);
+
+ // Read extra config data...
+ uint32_t baseCEM = 0;
+ if (nPartitions == 1) {
+ colorEndpointMode[0] = strm.ReadBits(4);
+ partitionIndex = 0;
+ } else {
+ partitionIndex = strm.ReadBits(10);
+ baseCEM = strm.ReadBits(6);
+ }
+ uint32_t baseMode = (baseCEM & 3);
+
+ // Remaining bits are color endpoint data...
+ uint32_t nWeightBits = weightParams.GetPackedBitSize();
+ int32_t remainingBits = 128 - nWeightBits - strm.GetBitsRead();
+
+ // Consider extra bits prior to texel data...
+ uint32_t extraCEMbits = 0;
+ if (baseMode) {
+ switch (nPartitions) {
+ case 2:
+ extraCEMbits += 2;
+ break;
+ case 3:
+ extraCEMbits += 5;
+ break;
+ case 4:
+ extraCEMbits += 8;
+ break;
+ default:
+ assert(false);
+ break;
+ }
+ }
+ remainingBits -= extraCEMbits;
+
+ // Do we have a dual plane situation?
+ uint32_t planeSelectorBits = 0;
+ if (weightParams.m_bDualPlane) {
+ planeSelectorBits = 2;
+ }
+ remainingBits -= planeSelectorBits;
+
+ // Read color data...
+ uint32_t colorDataBits = remainingBits;
+ while (remainingBits > 0) {
+ uint32_t nb = std::min(remainingBits, 8);
+ uint32_t b = strm.ReadBits(nb);
+ colorEndpointStream.WriteBits(b, nb);
+ remainingBits -= 8;
+ }
+
+ // Read the plane selection bits
+ planeIdx = strm.ReadBits(planeSelectorBits);
+
+ // Read the rest of the CEM
+ if (baseMode) {
+ uint32_t extraCEM = strm.ReadBits(extraCEMbits);
+ uint32_t CEM = (extraCEM << 6) | baseCEM;
+ CEM >>= 2;
+
+ bool C[4] = {0};
+ for (uint32_t i = 0; i < nPartitions; i++) {
+ C[i] = CEM & 1;
+ CEM >>= 1;
+ }
+
+ uint8_t M[4] = {0};
+ for (uint32_t i = 0; i < nPartitions; i++) {
+ M[i] = CEM & 3;
+ CEM >>= 2;
+ assert(M[i] <= 3);
+ }
+
+ for (uint32_t i = 0; i < nPartitions; i++) {
+ colorEndpointMode[i] = baseMode;
+ if (!(C[i]))
+ colorEndpointMode[i] -= 1;
+ colorEndpointMode[i] <<= 2;
+ colorEndpointMode[i] |= M[i];
+ }
+ } else if (nPartitions > 1) {
+ uint32_t CEM = baseCEM >> 2;
+ for (uint32_t i = 0; i < nPartitions; i++) {
+ colorEndpointMode[i] = CEM;
+ }
+ }
+
+ // Make sure everything up till here is sane.
+ for (uint32_t i = 0; i < nPartitions; i++) {
+ assert(colorEndpointMode[i] < 16);
+ }
+ assert(strm.GetBitsRead() + weightParams.GetPackedBitSize() == 128);
+
+ // Decode both color data and texel weight data
+ uint32_t colorValues[32]; // Four values, two endpoints, four maximum paritions
+ DecodeColorValues(colorValues, colorEndpointData, colorEndpointMode, nPartitions,
+ colorDataBits);
+
+ Pixel endpoints[4][2];
+ const uint32_t* colorValuesPtr = colorValues;
+ for (uint32_t i = 0; i < nPartitions; i++) {
+ ComputeEndpoints(endpoints[i][0], endpoints[i][1], colorValuesPtr, colorEndpointMode[i]);
+ }
+
+ // Read the texel weight data..
+ uint8_t texelWeightData[16];
+ memcpy(texelWeightData, inBuf, sizeof(texelWeightData));
+
+ // Reverse everything
+ for (uint32_t i = 0; i < 8; i++) {
+// Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits
+#define REVERSE_BYTE(b) (((b)*0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32
+ unsigned char a = static_cast<unsigned char>(REVERSE_BYTE(texelWeightData[i]));
+ unsigned char b = static_cast<unsigned char>(REVERSE_BYTE(texelWeightData[15 - i]));
+#undef REVERSE_BYTE
+
+ texelWeightData[i] = b;
+ texelWeightData[15 - i] = a;
+ }
+
+ // Make sure that higher non-texel bits are set to zero
+ const uint32_t clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;
+ texelWeightData[clearByteStart - 1] &= (1 << (weightParams.GetPackedBitSize() % 8)) - 1;
+ memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
+
+ std::vector<IntegerEncodedValue> texelWeightValues;
+ BitStream weightStream(texelWeightData);
+
+ IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream,
+ weightParams.m_MaxWeight,
+ weightParams.GetNumWeightValues());
+
+ // Blocks can be at most 12x12, so we can have as many as 144 weights
+ uint32_t weights[2][144];
+ UnquantizeTexelWeights(weights, texelWeightValues, weightParams, blockWidth, blockHeight);
+
+ // Now that we have endpoints and weights, we can interpolate and generate
+ // the proper decoding...
+ for (uint32_t j = 0; j < blockHeight; j++)
+ for (uint32_t i = 0; i < blockWidth; i++) {
+ uint32_t partition = Select2DPartition(partitionIndex, i, j, nPartitions,
+ (blockHeight * blockWidth) < 32);
+ assert(partition < nPartitions);
+
+ Pixel p;
+ for (uint32_t c = 0; c < 4; c++) {
+ uint32_t C0 = endpoints[partition][0].Component(c);
+ C0 = Replicate(C0, 8, 16);
+ uint32_t C1 = endpoints[partition][1].Component(c);
+ C1 = Replicate(C1, 8, 16);
+
+ uint32_t plane = 0;
+ if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) {
+ plane = 1;
+ }
+
+ uint32_t weight = weights[plane][j * blockWidth + i];
+ uint32_t C = (C0 * (64 - weight) + C1 * weight + 32) / 64;
+ if (C == 65535) {
+ p.Component(c) = 255;
+ } else {
+ double Cf = static_cast<double>(C);
+ p.Component(c) = static_cast<uint16_t>(255.0 * (Cf / 65536.0) + 0.5);
+ }
+ }
+
+ outBuf[j * blockWidth + i] = p.Pack();
+ }
+}
+
+} // namespace ASTCC
+
+namespace Tegra::Texture::ASTC {
+
+std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height,
+ uint32_t block_width, uint32_t block_height) {
+ uint32_t blockIdx = 0;
+ std::vector<uint8_t> outData;
+ outData.resize(height * width * 4);
+ for (uint32_t j = 0; j < height; j += block_height) {
+ for (uint32_t i = 0; i < width; i += block_width) {
+
+ uint8_t* blockPtr = data.data() + blockIdx * 16;
+
+ // Blocks can be at most 12x12
+ uint32_t uncompData[144];
+ ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData);
+
+ uint32_t decompWidth = std::min(block_width, width - i);
+ uint32_t decompHeight = std::min(block_height, height - j);
+
+ uint8_t* outRow = outData.data() + (j * width + i) * 4;
+ for (uint32_t jj = 0; jj < decompHeight; jj++) {
+ memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4);
+ }
+
+ blockIdx++;
+ }
+ }
+
+ return outData;
+}
+
+} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h
new file mode 100644
index 000000000..f0d7c0e56
--- /dev/null
+++ b/src/video_core/textures/astc.h
@@ -0,0 +1,15 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstdint>
+#include <vector>
+
+namespace Tegra::Texture::ASTC {
+
+std::vector<uint8_t> Decompress(std::vector<uint8_t>& data, uint32_t width, uint32_t height,
+ uint32_t block_width, uint32_t block_height);
+
+} // namespace Tegra::Texture::ASTC
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 2d2af5554..0db4367f1 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -53,8 +53,10 @@ u32 BytesPerPixel(TextureFormat format) {
case TextureFormat::DXT45:
// In this case a 'pixel' actually refers to a 4x4 tile.
return 16;
+ case TextureFormat::ASTC_2D_4X4:
case TextureFormat::A8R8G8B8:
case TextureFormat::A2B10G10R10:
+ case TextureFormat::BF10GF11RF11:
return 4;
case TextureFormat::A1B5G5R5:
case TextureFormat::B5G6R5:
@@ -92,6 +94,8 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,
case TextureFormat::B5G6R5:
case TextureFormat::R8:
case TextureFormat::R16_G16_B16_A16:
+ case TextureFormat::BF10GF11RF11:
+ case TextureFormat::ASTC_2D_4X4:
CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
unswizzled_data.data(), true, block_height);
break;
@@ -113,11 +117,13 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
case TextureFormat::DXT23:
case TextureFormat::DXT45:
case TextureFormat::DXN1:
+ case TextureFormat::ASTC_2D_4X4:
case TextureFormat::A8R8G8B8:
case TextureFormat::A2B10G10R10:
case TextureFormat::A1B5G5R5:
case TextureFormat::B5G6R5:
case TextureFormat::R8:
+ case TextureFormat::BF10GF11RF11:
// TODO(Subv): For the time being just forward the same data without any decoding.
rgba_data = texture_data;
break;
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index f48ca30b8..a17eaf19d 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -122,6 +122,17 @@ enum class ComponentType : u32 {
FLOAT = 7
};
+enum class SwizzleSource : u32 {
+ Zero = 0,
+
+ R = 2,
+ G = 3,
+ B = 4,
+ A = 5,
+ OneInt = 6,
+ OneFloat = 7,
+};
+
union TextureHandle {
u32 raw;
BitField<0, 20, u32> tic_id;
@@ -139,6 +150,11 @@ struct TICEntry {
BitField<10, 3, ComponentType> g_type;
BitField<13, 3, ComponentType> b_type;
BitField<16, 3, ComponentType> a_type;
+
+ BitField<19, 3, SwizzleSource> x_source;
+ BitField<22, 3, SwizzleSource> y_source;
+ BitField<25, 3, SwizzleSource> z_source;
+ BitField<28, 3, SwizzleSource> w_source;
};
u32 address_low;
union {
diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt
index 5af3154d7..c662570d2 100644
--- a/src/yuzu/CMakeLists.txt
+++ b/src/yuzu/CMakeLists.txt
@@ -32,8 +32,6 @@ add_executable(yuzu
debugger/graphics/graphics_surface.h
debugger/profiler.cpp
debugger/profiler.h
- debugger/registers.cpp
- debugger/registers.h
debugger/wait_tree.cpp
debugger/wait_tree.h
game_list.cpp
@@ -60,7 +58,6 @@ set(UIS
configuration/configure_graphics.ui
configuration/configure_input.ui
configuration/configure_system.ui
- debugger/registers.ui
hotkeys.ui
main.ui
)
diff --git a/src/yuzu/debugger/registers.cpp b/src/yuzu/debugger/registers.cpp
deleted file mode 100644
index 178cc65a7..000000000
--- a/src/yuzu/debugger/registers.cpp
+++ /dev/null
@@ -1,190 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <QTreeWidgetItem>
-#include "core/arm/arm_interface.h"
-#include "core/core.h"
-#include "yuzu/debugger/registers.h"
-#include "yuzu/util/util.h"
-
-RegistersWidget::RegistersWidget(QWidget* parent) : QDockWidget(parent) {
- cpu_regs_ui.setupUi(this);
-
- tree = cpu_regs_ui.treeWidget;
- tree->addTopLevelItem(core_registers = new QTreeWidgetItem(QStringList(tr("Registers"))));
- tree->addTopLevelItem(vfp_registers = new QTreeWidgetItem(QStringList(tr("VFP Registers"))));
- tree->addTopLevelItem(vfp_system_registers =
- new QTreeWidgetItem(QStringList(tr("VFP System Registers"))));
- tree->addTopLevelItem(cpsr = new QTreeWidgetItem(QStringList("CPSR")));
-
- for (int i = 0; i < 16; ++i) {
- QTreeWidgetItem* child = new QTreeWidgetItem(QStringList(QString("R[%1]").arg(i)));
- core_registers->addChild(child);
- }
-
- for (int i = 0; i < 32; ++i) {
- QTreeWidgetItem* child = new QTreeWidgetItem(QStringList(QString("S[%1]").arg(i)));
- vfp_registers->addChild(child);
- }
-
- QFont font = GetMonospaceFont();
-
- CreateCPSRChildren();
- CreateVFPSystemRegisterChildren();
-
- // Set Registers to display in monospace font
- for (int i = 0; i < core_registers->childCount(); ++i)
- core_registers->child(i)->setFont(1, font);
-
- for (int i = 0; i < vfp_registers->childCount(); ++i)
- vfp_registers->child(i)->setFont(1, font);
-
- for (int i = 0; i < vfp_system_registers->childCount(); ++i) {
- vfp_system_registers->child(i)->setFont(1, font);
- for (int x = 0; x < vfp_system_registers->child(i)->childCount(); ++x) {
- vfp_system_registers->child(i)->child(x)->setFont(1, font);
- }
- }
- // Set CSPR to display in monospace font
- cpsr->setFont(1, font);
- for (int i = 0; i < cpsr->childCount(); ++i) {
- cpsr->child(i)->setFont(1, font);
- for (int x = 0; x < cpsr->child(i)->childCount(); ++x) {
- cpsr->child(i)->child(x)->setFont(1, font);
- }
- }
- setEnabled(false);
-}
-
-void RegistersWidget::OnDebugModeEntered() {
- if (!Core::System::GetInstance().IsPoweredOn())
- return;
-
- for (int i = 0; i < core_registers->childCount(); ++i)
- core_registers->child(i)->setText(
- 1, QString("0x%1").arg(Core::CurrentArmInterface().GetReg(i), 8, 16, QLatin1Char('0')));
-
- UpdateCPSRValues();
-}
-
-void RegistersWidget::OnDebugModeLeft() {}
-
-void RegistersWidget::OnEmulationStarting(EmuThread* emu_thread) {
- setEnabled(true);
-}
-
-void RegistersWidget::OnEmulationStopping() {
- // Reset widget text
- for (int i = 0; i < core_registers->childCount(); ++i)
- core_registers->child(i)->setText(1, QString(""));
-
- for (int i = 0; i < vfp_registers->childCount(); ++i)
- vfp_registers->child(i)->setText(1, QString(""));
-
- for (int i = 0; i < cpsr->childCount(); ++i)
- cpsr->child(i)->setText(1, QString(""));
-
- cpsr->setText(1, QString(""));
-
- // FPSCR
- for (int i = 0; i < vfp_system_registers->child(0)->childCount(); ++i)
- vfp_system_registers->child(0)->child(i)->setText(1, QString(""));
-
- // FPEXC
- for (int i = 0; i < vfp_system_registers->child(1)->childCount(); ++i)
- vfp_system_registers->child(1)->child(i)->setText(1, QString(""));
-
- vfp_system_registers->child(0)->setText(1, QString(""));
- vfp_system_registers->child(1)->setText(1, QString(""));
- vfp_system_registers->child(2)->setText(1, QString(""));
- vfp_system_registers->child(3)->setText(1, QString(""));
-
- setEnabled(false);
-}
-
-void RegistersWidget::CreateCPSRChildren() {
- cpsr->addChild(new QTreeWidgetItem(QStringList("M")));
- cpsr->addChild(new QTreeWidgetItem(QStringList("T")));
- cpsr->addChild(new QTreeWidgetItem(QStringList("F")));
- cpsr->addChild(new QTreeWidgetItem(QStringList("I")));
- cpsr->addChild(new QTreeWidgetItem(QStringList("A")));
- cpsr->addChild(new QTreeWidgetItem(QStringList("E")));
- cpsr->addChild(new QTreeWidgetItem(QStringList("IT")));
- cpsr->addChild(new QTreeWidgetItem(QStringList("GE")));
- cpsr->addChild(new QTreeWidgetItem(QStringList("DNM")));
- cpsr->addChild(new QTreeWidgetItem(QStringList("J")));
- cpsr->addChild(new QTreeWidgetItem(QStringList("Q")));
- cpsr->addChild(new QTreeWidgetItem(QStringList("V")));
- cpsr->addChild(new QTreeWidgetItem(QStringList("C")));
- cpsr->addChild(new QTreeWidgetItem(QStringList("Z")));
- cpsr->addChild(new QTreeWidgetItem(QStringList("N")));
-}
-
-void RegistersWidget::UpdateCPSRValues() {
- const u32 cpsr_val = Core::CurrentArmInterface().GetCPSR();
-
- cpsr->setText(1, QString("0x%1").arg(cpsr_val, 8, 16, QLatin1Char('0')));
- cpsr->child(0)->setText(
- 1, QString("b%1").arg(cpsr_val & 0x1F, 5, 2, QLatin1Char('0'))); // M - Mode
- cpsr->child(1)->setText(1, QString::number((cpsr_val >> 5) & 1)); // T - State
- cpsr->child(2)->setText(1, QString::number((cpsr_val >> 6) & 1)); // F - FIQ disable
- cpsr->child(3)->setText(1, QString::number((cpsr_val >> 7) & 1)); // I - IRQ disable
- cpsr->child(4)->setText(1, QString::number((cpsr_val >> 8) & 1)); // A - Imprecise abort
- cpsr->child(5)->setText(1, QString::number((cpsr_val >> 9) & 1)); // E - Data endianness
- cpsr->child(6)->setText(1,
- QString::number((cpsr_val >> 10) & 0x3F)); // IT - If-Then state (DNM)
- cpsr->child(7)->setText(1,
- QString::number((cpsr_val >> 16) & 0xF)); // GE - Greater-than-or-Equal
- cpsr->child(8)->setText(1, QString::number((cpsr_val >> 20) & 0xF)); // DNM - Do not modify
- cpsr->child(9)->setText(1, QString::number((cpsr_val >> 24) & 1)); // J - Jazelle
- cpsr->child(10)->setText(1, QString::number((cpsr_val >> 27) & 1)); // Q - Saturation
- cpsr->child(11)->setText(1, QString::number((cpsr_val >> 28) & 1)); // V - Overflow
- cpsr->child(12)->setText(1, QString::number((cpsr_val >> 29) & 1)); // C - Carry/Borrow/Extend
- cpsr->child(13)->setText(1, QString::number((cpsr_val >> 30) & 1)); // Z - Zero
- cpsr->child(14)->setText(1, QString::number((cpsr_val >> 31) & 1)); // N - Negative/Less than
-}
-
-void RegistersWidget::CreateVFPSystemRegisterChildren() {
- QTreeWidgetItem* const fpscr = new QTreeWidgetItem(QStringList("FPSCR"));
- fpscr->addChild(new QTreeWidgetItem(QStringList("IOC")));
- fpscr->addChild(new QTreeWidgetItem(QStringList("DZC")));
- fpscr->addChild(new QTreeWidgetItem(QStringList("OFC")));
- fpscr->addChild(new QTreeWidgetItem(QStringList("UFC")));
- fpscr->addChild(new QTreeWidgetItem(QStringList("IXC")));
- fpscr->addChild(new QTreeWidgetItem(QStringList("IDC")));
- fpscr->addChild(new QTreeWidgetItem(QStringList("IOE")));
- fpscr->addChild(new QTreeWidgetItem(QStringList("DZE")));
- fpscr->addChild(new QTreeWidgetItem(QStringList("OFE")));
- fpscr->addChild(new QTreeWidgetItem(QStringList("UFE")));
- fpscr->addChild(new QTreeWidgetItem(QStringList("IXE")));
- fpscr->addChild(new QTreeWidgetItem(QStringList("IDE")));
- fpscr->addChild(new QTreeWidgetItem(QStringList(tr("Vector Length"))));
- fpscr->addChild(new QTreeWidgetItem(QStringList(tr("Vector Stride"))));
- fpscr->addChild(new QTreeWidgetItem(QStringList(tr("Rounding Mode"))));
- fpscr->addChild(new QTreeWidgetItem(QStringList("FZ")));
- fpscr->addChild(new QTreeWidgetItem(QStringList("DN")));
- fpscr->addChild(new QTreeWidgetItem(QStringList("V")));
- fpscr->addChild(new QTreeWidgetItem(QStringList("C")));
- fpscr->addChild(new QTreeWidgetItem(QStringList("Z")));
- fpscr->addChild(new QTreeWidgetItem(QStringList("N")));
-
- QTreeWidgetItem* const fpexc = new QTreeWidgetItem(QStringList("FPEXC"));
- fpexc->addChild(new QTreeWidgetItem(QStringList("IOC")));
- fpexc->addChild(new QTreeWidgetItem(QStringList("OFC")));
- fpexc->addChild(new QTreeWidgetItem(QStringList("UFC")));
- fpexc->addChild(new QTreeWidgetItem(QStringList("INV")));
- fpexc->addChild(new QTreeWidgetItem(QStringList(tr("Vector Iteration Count"))));
- fpexc->addChild(new QTreeWidgetItem(QStringList("FP2V")));
- fpexc->addChild(new QTreeWidgetItem(QStringList("EN")));
- fpexc->addChild(new QTreeWidgetItem(QStringList("EX")));
-
- vfp_system_registers->addChild(fpscr);
- vfp_system_registers->addChild(fpexc);
- vfp_system_registers->addChild(new QTreeWidgetItem(QStringList("FPINST")));
- vfp_system_registers->addChild(new QTreeWidgetItem(QStringList("FPINST2")));
-}
-
-void RegistersWidget::UpdateVFPSystemRegisterValues() {
- UNIMPLEMENTED();
-}
diff --git a/src/yuzu/debugger/registers.h b/src/yuzu/debugger/registers.h
deleted file mode 100644
index 55bda5b59..000000000
--- a/src/yuzu/debugger/registers.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <QDockWidget>
-#include "ui_registers.h"
-
-class QTreeWidget;
-class QTreeWidgetItem;
-class EmuThread;
-
-class RegistersWidget : public QDockWidget {
- Q_OBJECT
-
-public:
- explicit RegistersWidget(QWidget* parent = nullptr);
-
-public slots:
- void OnDebugModeEntered();
- void OnDebugModeLeft();
-
- void OnEmulationStarting(EmuThread* emu_thread);
- void OnEmulationStopping();
-
-private:
- void CreateCPSRChildren();
- void UpdateCPSRValues();
-
- void CreateVFPSystemRegisterChildren();
- void UpdateVFPSystemRegisterValues();
-
- Ui::ARMRegisters cpu_regs_ui;
-
- QTreeWidget* tree;
-
- QTreeWidgetItem* core_registers;
- QTreeWidgetItem* vfp_registers;
- QTreeWidgetItem* vfp_system_registers;
- QTreeWidgetItem* cpsr;
-};
diff --git a/src/yuzu/debugger/registers.ui b/src/yuzu/debugger/registers.ui
deleted file mode 100644
index c81ae03f9..000000000
--- a/src/yuzu/debugger/registers.ui
+++ /dev/null
@@ -1,40 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<ui version="4.0">
- <class>ARMRegisters</class>
- <widget class="QDockWidget" name="ARMRegisters">
- <property name="geometry">
- <rect>
- <x>0</x>
- <y>0</y>
- <width>400</width>
- <height>300</height>
- </rect>
- </property>
- <property name="windowTitle">
- <string>ARM Registers</string>
- </property>
- <widget class="QWidget" name="dockWidgetContents">
- <layout class="QVBoxLayout" name="verticalLayout">
- <item>
- <widget class="QTreeWidget" name="treeWidget">
- <property name="alternatingRowColors">
- <bool>true</bool>
- </property>
- <column>
- <property name="text">
- <string>Register</string>
- </property>
- </column>
- <column>
- <property name="text">
- <string>Value</string>
- </property>
- </column>
- </widget>
- </item>
- </layout>
- </widget>
- </widget>
- <resources/>
- <connections/>
-</ui>
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index 017bef13c..7101b381e 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -213,6 +213,9 @@ QString WaitTreeThread::GetText() const {
case THREADSTATUS_WAIT_MUTEX:
status = tr("waiting for mutex");
break;
+ case THREADSTATUS_WAIT_ARB:
+ status = tr("waiting for address arbiter");
+ break;
case THREADSTATUS_DORMANT:
status = tr("dormant");
break;
@@ -240,6 +243,7 @@ QColor WaitTreeThread::GetColor() const {
case THREADSTATUS_WAIT_SYNCH_ALL:
case THREADSTATUS_WAIT_SYNCH_ANY:
case THREADSTATUS_WAIT_MUTEX:
+ case THREADSTATUS_WAIT_ARB:
return QColor(Qt::GlobalColor::red);
case THREADSTATUS_DORMANT:
return QColor(Qt::GlobalColor::darkCyan);
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index bbd681eae..55dce6d47 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <QApplication>
+#include <QDir>
#include <QFileInfo>
#include <QHeaderView>
#include <QKeyEvent>
@@ -264,8 +265,17 @@ void GameList::ValidateEntry(const QModelIndex& item) {
if (file_path.isEmpty())
return;
std::string std_file_path(file_path.toStdString());
- if (!FileUtil::Exists(std_file_path) || FileUtil::IsDirectory(std_file_path))
+ if (!FileUtil::Exists(std_file_path))
return;
+ if (FileUtil::IsDirectory(std_file_path)) {
+ QDir dir(std_file_path.c_str());
+ QStringList matching_main = dir.entryList(QStringList("main"), QDir::Files);
+ if (matching_main.size() == 1) {
+ emit GameChosen(dir.path() + DIR_SEP + matching_main[0]);
+ }
+ return;
+ }
+
// Users usually want to run a diffrent game after closing one
search_field->clear();
emit GameChosen(file_path);
@@ -356,13 +366,26 @@ void GameList::LoadInterfaceLayout() {
item_model->sort(header->sortIndicatorSection(), header->sortIndicatorOrder());
}
-const QStringList GameList::supported_file_extensions = {"nso", "nro"};
+const QStringList GameList::supported_file_extensions = {"nso", "nro", "nca"};
static bool HasSupportedFileExtension(const std::string& file_name) {
QFileInfo file = QFileInfo(file_name.c_str());
return GameList::supported_file_extensions.contains(file.suffix(), Qt::CaseInsensitive);
}
+static bool IsExtractedNCAMain(const std::string& file_name) {
+ return QFileInfo(file_name.c_str()).fileName() == "main";
+}
+
+static QString FormatGameName(const std::string& physical_name) {
+ QFileInfo file_info(physical_name.c_str());
+ if (IsExtractedNCAMain(physical_name)) {
+ return file_info.dir().path();
+ } else {
+ return QString::fromStdString(physical_name);
+ }
+}
+
void GameList::RefreshGameDirectory() {
if (!UISettings::values.gamedir.isEmpty() && current_worker != nullptr) {
NGLOG_INFO(Frontend, "Change detected in the games directory. Reloading game list.");
@@ -380,7 +403,8 @@ void GameListWorker::AddFstEntriesToGameList(const std::string& dir_path, unsign
return false; // Breaks the callback loop.
bool is_dir = FileUtil::IsDirectory(physical_name);
- if (!is_dir && HasSupportedFileExtension(physical_name)) {
+ if (!is_dir &&
+ (HasSupportedFileExtension(physical_name) || IsExtractedNCAMain(physical_name))) {
std::unique_ptr<Loader::AppLoader> loader = Loader::GetLoader(physical_name);
if (!loader)
return true;
@@ -392,7 +416,7 @@ void GameListWorker::AddFstEntriesToGameList(const std::string& dir_path, unsign
loader->ReadProgramId(program_id);
emit EntryReady({
- new GameListItemPath(QString::fromStdString(physical_name), smdh, program_id),
+ new GameListItemPath(FormatGameName(physical_name), smdh, program_id),
new GameListItem(
QString::fromStdString(Loader::GetFileTypeString(loader->GetFileType()))),
new GameListItemSize(FileUtil::GetSize(physical_name)),
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 3038bd6da..97be548d7 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -13,6 +13,7 @@
#include <QMessageBox>
#include <QtGui>
#include <QtWidgets>
+#include "common/common_paths.h"
#include "common/logging/backend.h"
#include "common/logging/filter.h"
#include "common/logging/log.h"
@@ -33,7 +34,6 @@
#include "yuzu/debugger/graphics/graphics_breakpoints.h"
#include "yuzu/debugger/graphics/graphics_surface.h"
#include "yuzu/debugger/profiler.h"
-#include "yuzu/debugger/registers.h"
#include "yuzu/debugger/wait_tree.h"
#include "yuzu/game_list.h"
#include "yuzu/hotkeys.h"
@@ -169,15 +169,6 @@ void GMainWindow::InitializeDebugWidgets() {
debug_menu->addAction(microProfileDialog->toggleViewAction());
#endif
- registersWidget = new RegistersWidget(this);
- addDockWidget(Qt::RightDockWidgetArea, registersWidget);
- registersWidget->hide();
- debug_menu->addAction(registersWidget->toggleViewAction());
- connect(this, &GMainWindow::EmulationStarting, registersWidget,
- &RegistersWidget::OnEmulationStarting);
- connect(this, &GMainWindow::EmulationStopping, registersWidget,
- &RegistersWidget::OnEmulationStopping);
-
graphicsBreakpointsWidget = new GraphicsBreakPointsWidget(debug_context, this);
addDockWidget(Qt::RightDockWidgetArea, graphicsBreakpointsWidget);
graphicsBreakpointsWidget->hide();
@@ -288,6 +279,7 @@ void GMainWindow::ConnectWidgetEvents() {
void GMainWindow::ConnectMenuEvents() {
// File
connect(ui.action_Load_File, &QAction::triggered, this, &GMainWindow::OnMenuLoadFile);
+ connect(ui.action_Load_Folder, &QAction::triggered, this, &GMainWindow::OnMenuLoadFolder);
connect(ui.action_Select_Game_List_Root, &QAction::triggered, this,
&GMainWindow::OnMenuSelectGameListRoot);
connect(ui.action_Exit, &QAction::triggered, this, &QMainWindow::close);
@@ -460,17 +452,12 @@ void GMainWindow::BootGame(const QString& filename) {
connect(render_window, &GRenderWindow::Closed, this, &GMainWindow::OnStopGame);
// BlockingQueuedConnection is important here, it makes sure we've finished refreshing our views
// before the CPU continues
- connect(emu_thread.get(), &EmuThread::DebugModeEntered, registersWidget,
- &RegistersWidget::OnDebugModeEntered, Qt::BlockingQueuedConnection);
connect(emu_thread.get(), &EmuThread::DebugModeEntered, waitTreeWidget,
&WaitTreeWidget::OnDebugModeEntered, Qt::BlockingQueuedConnection);
- connect(emu_thread.get(), &EmuThread::DebugModeLeft, registersWidget,
- &RegistersWidget::OnDebugModeLeft, Qt::BlockingQueuedConnection);
connect(emu_thread.get(), &EmuThread::DebugModeLeft, waitTreeWidget,
&WaitTreeWidget::OnDebugModeLeft, Qt::BlockingQueuedConnection);
// Update the GUI
- registersWidget->OnDebugModeEntered();
if (ui.action_Single_Window_Mode->isChecked()) {
game_list->hide();
}
@@ -565,6 +552,8 @@ void GMainWindow::OnMenuLoadFile() {
for (const auto& piece : game_list->supported_file_extensions)
extensions += "*." + piece + " ";
+ extensions += "main ";
+
QString file_filter = tr("Switch Executable") + " (" + extensions + ")";
file_filter += ";;" + tr("All Files (*.*)");
@@ -577,6 +566,18 @@ void GMainWindow::OnMenuLoadFile() {
}
}
+void GMainWindow::OnMenuLoadFolder() {
+ QDir dir = QFileDialog::getExistingDirectory(this, tr("Open Extracted ROM Directory"));
+
+ QStringList matching_main = dir.entryList(QStringList("main"), QDir::Files);
+ if (matching_main.size() == 1) {
+ BootGame(dir.path() + DIR_SEP + matching_main[0]);
+ } else {
+ QMessageBox::warning(this, tr("Invalid Directory Selected"),
+ tr("The directory you have selected does not contain a 'main' file."));
+ }
+}
+
void GMainWindow::OnMenuSelectGameListRoot() {
QString dir_path = QFileDialog::getExistingDirectory(this, tr("Select Directory"));
if (!dir_path.isEmpty()) {
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index ac3024d8a..074bba3f9 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -19,7 +19,6 @@ class GraphicsSurfaceWidget;
class GRenderWindow;
class MicroProfileDialog;
class ProfilerWidget;
-class RegistersWidget;
class WaitTreeWidget;
namespace Tegra {
@@ -124,6 +123,7 @@ private slots:
void OnGameListLoadFile(QString game_path);
void OnGameListOpenSaveFolder(u64 program_id);
void OnMenuLoadFile();
+ void OnMenuLoadFolder();
/// Called whenever a user selects the "File->Select Game List Root" menu item
void OnMenuSelectGameListRoot();
void OnMenuRecentFile();
@@ -163,7 +163,6 @@ private:
// Debugger panes
ProfilerWidget* profilerWidget;
MicroProfileDialog* microProfileDialog;
- RegistersWidget* registersWidget;
GraphicsBreakPointsWidget* graphicsBreakpointsWidget;
GraphicsSurfaceWidget* graphicsSurfaceWidget;
WaitTreeWidget* waitTreeWidget;
diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui
index 0fcd93cc2..22c4cad08 100644
--- a/src/yuzu/main.ui
+++ b/src/yuzu/main.ui
@@ -58,6 +58,7 @@
</property>
</widget>
<addaction name="action_Load_File"/>
+ <addaction name="action_Load_Folder"/>
<addaction name="separator"/>
<addaction name="action_Select_Game_List_Root"/>
<addaction name="menu_recent_files"/>
@@ -106,6 +107,11 @@
<string>Load File...</string>
</property>
</action>
+ <action name="action_Load_Folder">
+ <property name="text">
+ <string>Load Folder...</string>
+ </property>
+ </action>
<action name="action_Load_Symbol_Map">
<property name="text">
<string>Load Symbol Map...</string>