summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
m---------externals/Vulkan-Headers0
-rw-r--r--src/core/core.cpp3
-rw-r--r--src/core/hle/kernel/vm_manager.cpp72
-rw-r--r--src/core/hle/kernel/vm_manager.h8
-rw-r--r--src/core/hle/service/acc/acc.cpp124
-rw-r--r--src/core/hle/service/acc/acc.h1
-rw-r--r--src/core/hle/service/acc/acc_su.cpp2
-rw-r--r--src/core/hle/service/acc/profile_manager.cpp11
-rw-r--r--src/core/hle/service/acc/profile_manager.h2
-rw-r--r--src/core/hle/service/am/am.cpp50
-rw-r--r--src/core/hle/service/am/am.h28
-rw-r--r--src/core/hle/service/am/applet_ae.cpp14
-rw-r--r--src/core/hle/service/am/applet_oe.cpp9
-rw-r--r--src/core/hle/service/am/applets/applets.cpp25
-rw-r--r--src/core/hle/service/am/applets/applets.h17
-rw-r--r--src/core/hle/service/am/applets/error.cpp7
-rw-r--r--src/core/hle/service/am/applets/error.h7
-rw-r--r--src/core/hle/service/am/applets/general_backend.cpp13
-rw-r--r--src/core/hle/service/am/applets/general_backend.h12
-rw-r--r--src/core/hle/service/am/applets/profile_select.cpp5
-rw-r--r--src/core/hle/service/am/applets/profile_select.h7
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.cpp5
-rw-r--r--src/core/hle/service/am/applets/software_keyboard.h7
-rw-r--r--src/core/hle/service/am/applets/web_browser.cpp19
-rw-r--r--src/core/hle/service/am/applets/web_browser.h12
-rw-r--r--src/core/loader/nro.cpp9
-rw-r--r--src/core/loader/nro.h1
-rw-r--r--src/video_core/engines/kepler_compute.cpp53
-rw-r--r--src/video_core/engines/kepler_compute.h23
-rw-r--r--src/video_core/engines/maxwell_3d.cpp22
-rw-r--r--src/video_core/engines/maxwell_3d.h12
-rw-r--r--src/video_core/engines/shader_bytecode.h54
-rw-r--r--src/video_core/macro_interpreter.cpp22
-rw-r--r--src/video_core/macro_interpreter.h8
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp141
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h19
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp249
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp19
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp89
-rw-r--r--src/video_core/renderer_opengl/gl_state.h29
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h22
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp15
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp301
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h62
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp98
-rw-r--r--src/video_core/shader/decode/image.cpp104
-rw-r--r--src/video_core/shader/decode/memory.cpp48
-rw-r--r--src/video_core/shader/decode/shift.cpp19
-rw-r--r--src/video_core/shader/decode/warp.cpp47
-rw-r--r--src/video_core/shader/node.h109
-rw-r--r--src/video_core/shader/shader_ir.cpp9
-rw-r--r--src/video_core/shader/shader_ir.h20
-rw-r--r--src/video_core/surface.cpp20
-rw-r--r--src/video_core/surface.h2
-rw-r--r--src/video_core/texture_cache/surface_base.h12
-rw-r--r--src/video_core/texture_cache/surface_params.cpp134
-rw-r--r--src/video_core/texture_cache/surface_params.h9
-rw-r--r--src/video_core/texture_cache/surface_view.cpp2
-rw-r--r--src/video_core/texture_cache/surface_view.h20
-rw-r--r--src/video_core/texture_cache/texture_cache.h21
62 files changed, 1688 insertions, 602 deletions
diff --git a/externals/Vulkan-Headers b/externals/Vulkan-Headers
-Subproject d05c8df88da98ec1ab3bc600d7f5783b4060895
+Subproject fd568d51ed3d9bc6132e1639d7492453a08fe1b
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 20d64f3b0..3d0978cbf 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -104,7 +104,8 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
return vfs->OpenFile(path, FileSys::Mode::Read);
}
struct System::Impl {
- explicit Impl(System& system) : kernel{system}, cpu_core_manager{system}, reporter{system} {}
+ explicit Impl(System& system)
+ : kernel{system}, cpu_core_manager{system}, applet_manager{system}, reporter{system} {}
Cpu& CurrentCpuCore() {
return cpu_core_manager.GetCurrentCore();
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index 40cea1e7c..c7af87073 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -296,12 +296,6 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {
}
ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) {
- const auto end_addr = target + size;
- const auto last_addr = end_addr - 1;
- VAddr cur_addr = target;
-
- ResultCode result = RESULT_SUCCESS;
-
// Check how much memory we've already mapped.
const auto mapped_size_result = SizeOfAllocatedVMAsInRange(target, size);
if (mapped_size_result.Failed()) {
@@ -324,13 +318,16 @@ ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) {
// Keep track of the memory regions we unmap.
std::vector<std::pair<u64, u64>> mapped_regions;
+ ResultCode result = RESULT_SUCCESS;
// Iterate, trying to map memory.
{
- cur_addr = target;
+ const auto end_addr = target + size;
+ const auto last_addr = end_addr - 1;
+ VAddr cur_addr = target;
auto iter = FindVMA(target);
- ASSERT_MSG(iter != vma_map.end(), "MapPhysicalMemory iter != end");
+ ASSERT(iter != vma_map.end());
while (true) {
const auto& vma = iter->second;
@@ -342,7 +339,7 @@ ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) {
const auto map_size = std::min(end_addr - cur_addr, vma_end - cur_addr);
if (vma.state == MemoryState::Unmapped) {
const auto map_res =
- MapMemoryBlock(cur_addr, std::make_shared<PhysicalMemory>(map_size, 0), 0,
+ MapMemoryBlock(cur_addr, std::make_shared<PhysicalMemory>(map_size), 0,
map_size, MemoryState::Heap, VMAPermission::ReadWrite);
result = map_res.Code();
if (result.IsError()) {
@@ -360,7 +357,7 @@ ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) {
// Advance to the next block.
cur_addr = vma_end;
iter = FindVMA(cur_addr);
- ASSERT_MSG(iter != vma_map.end(), "MapPhysicalMemory iter != end");
+ ASSERT(iter != vma_map.end());
}
}
@@ -368,7 +365,7 @@ ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) {
if (result.IsError()) {
for (const auto [unmap_address, unmap_size] : mapped_regions) {
ASSERT_MSG(UnmapRange(unmap_address, unmap_size).IsSuccess(),
- "MapPhysicalMemory un-map on error");
+ "Failed to unmap memory range.");
}
return result;
@@ -381,12 +378,6 @@ ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) {
}
ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) {
- const auto end_addr = target + size;
- const auto last_addr = end_addr - 1;
- VAddr cur_addr = target;
-
- ResultCode result = RESULT_SUCCESS;
-
// Check how much memory is currently mapped.
const auto mapped_size_result = SizeOfUnmappablePhysicalMemoryInRange(target, size);
if (mapped_size_result.Failed()) {
@@ -401,13 +392,16 @@ ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) {
// Keep track of the memory regions we unmap.
std::vector<std::pair<u64, u64>> unmapped_regions;
+ ResultCode result = RESULT_SUCCESS;
// Try to unmap regions.
{
- cur_addr = target;
+ const auto end_addr = target + size;
+ const auto last_addr = end_addr - 1;
+ VAddr cur_addr = target;
auto iter = FindVMA(target);
- ASSERT_MSG(iter != vma_map.end(), "UnmapPhysicalMemory iter != end");
+ ASSERT(iter != vma_map.end());
while (true) {
const auto& vma = iter->second;
@@ -434,7 +428,7 @@ ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) {
// Advance to the next block.
cur_addr = vma_end;
iter = FindVMA(cur_addr);
- ASSERT_MSG(iter != vma_map.end(), "UnmapPhysicalMemory iter != end");
+ ASSERT(iter != vma_map.end());
}
}
@@ -443,10 +437,12 @@ ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) {
if (result.IsError()) {
for (const auto [map_address, map_size] : unmapped_regions) {
const auto remap_res =
- MapMemoryBlock(map_address, std::make_shared<PhysicalMemory>(map_size, 0), 0,
- map_size, MemoryState::Heap, VMAPermission::None);
- ASSERT_MSG(remap_res.Succeeded(), "UnmapPhysicalMemory re-map on error");
+ MapMemoryBlock(map_address, std::make_shared<PhysicalMemory>(map_size), 0, map_size,
+ MemoryState::Heap, VMAPermission::None);
+ ASSERT_MSG(remap_res.Succeeded(), "Failed to remap a memory block.");
}
+
+ return result;
}
// Update mapped amount
@@ -757,20 +753,26 @@ void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryAre
// Always merge allocated memory blocks, even when they don't share the same backing block.
if (left.type == VMAType::AllocatedMemoryBlock &&
(left.backing_block != right.backing_block || left.offset + left.size != right.offset)) {
+ const auto right_begin = right.backing_block->begin() + right.offset;
+ const auto right_end = right_begin + right.size;
+
// Check if we can save work.
if (left.offset == 0 && left.size == left.backing_block->size()) {
// Fast case: left is an entire backing block.
- left.backing_block->insert(left.backing_block->end(),
- right.backing_block->begin() + right.offset,
- right.backing_block->begin() + right.offset + right.size);
+ left.backing_block->insert(left.backing_block->end(), right_begin, right_end);
} else {
// Slow case: make a new memory block for left and right.
+ const auto left_begin = left.backing_block->begin() + left.offset;
+ const auto left_end = left_begin + left.size;
+ const auto left_size = static_cast<std::size_t>(std::distance(left_begin, left_end));
+ const auto right_size = static_cast<std::size_t>(std::distance(right_begin, right_end));
+
auto new_memory = std::make_shared<PhysicalMemory>();
- new_memory->insert(new_memory->end(), left.backing_block->begin() + left.offset,
- left.backing_block->begin() + left.offset + left.size);
- new_memory->insert(new_memory->end(), right.backing_block->begin() + right.offset,
- right.backing_block->begin() + right.offset + right.size);
- left.backing_block = new_memory;
+ new_memory->reserve(left_size + right_size);
+ new_memory->insert(new_memory->end(), left_begin, left_end);
+ new_memory->insert(new_memory->end(), right_begin, right_end);
+
+ left.backing_block = std::move(new_memory);
left.offset = 0;
}
@@ -965,7 +967,7 @@ ResultVal<std::size_t> VMManager::SizeOfAllocatedVMAsInRange(VAddr address,
VAddr cur_addr = address;
auto iter = FindVMA(cur_addr);
- ASSERT_MSG(iter != vma_map.end(), "SizeOfAllocatedVMAsInRange iter != end");
+ ASSERT(iter != vma_map.end());
while (true) {
const auto& vma = iter->second;
@@ -986,7 +988,7 @@ ResultVal<std::size_t> VMManager::SizeOfAllocatedVMAsInRange(VAddr address,
// Advance to the next block.
cur_addr = vma_end;
iter = std::next(iter);
- ASSERT_MSG(iter != vma_map.end(), "SizeOfAllocatedVMAsInRange iter != end");
+ ASSERT(iter != vma_map.end());
}
return MakeResult(mapped_size);
@@ -1000,7 +1002,7 @@ ResultVal<std::size_t> VMManager::SizeOfUnmappablePhysicalMemoryInRange(VAddr ad
VAddr cur_addr = address;
auto iter = FindVMA(cur_addr);
- ASSERT_MSG(iter != vma_map.end(), "SizeOfUnmappablePhysicalMemoryInRange iter != end");
+ ASSERT(iter != vma_map.end());
while (true) {
const auto& vma = iter->second;
@@ -1029,7 +1031,7 @@ ResultVal<std::size_t> VMManager::SizeOfUnmappablePhysicalMemoryInRange(VAddr ad
// Advance to the next block.
cur_addr = vma_end;
iter = std::next(iter);
- ASSERT_MSG(iter != vma_map.end(), "SizeOfUnmappablePhysicalMemoryInRange iter != end");
+ ASSERT(iter != vma_map.end());
}
return MakeResult(mapped_size);
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index b18cde619..850a7ebc3 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -454,8 +454,8 @@ public:
/// Maps memory at a given address.
///
- /// @param addr The virtual address to map memory at.
- /// @param size The amount of memory to map.
+ /// @param target The virtual address to map memory at.
+ /// @param size The amount of memory to map.
///
/// @note The destination address must lie within the Map region.
///
@@ -468,8 +468,8 @@ public:
/// Unmaps memory at a given address.
///
- /// @param addr The virtual address to unmap memory at.
- /// @param size The amount of memory to unmap.
+ /// @param target The virtual address to unmap memory at.
+ /// @param size The amount of memory to unmap.
///
/// @note The destination address must lie within the Map region.
///
diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp
index c01ee3eda..a7c55e116 100644
--- a/src/core/hle/service/acc/acc.cpp
+++ b/src/core/hle/service/acc/acc.cpp
@@ -31,6 +31,9 @@
namespace Service::Account {
+constexpr ResultCode ERR_INVALID_BUFFER_SIZE{ErrorModule::Account, 30};
+constexpr ResultCode ERR_FAILED_SAVE_DATA{ErrorModule::Account, 100};
+
static std::string GetImagePath(Common::UUID uuid) {
return FileUtil::GetUserPath(FileUtil::UserPath::NANDDir) +
"/system/save/8000000000000010/su/avators/" + uuid.FormatSwitch() + ".jpg";
@@ -41,20 +44,31 @@ static constexpr u32 SanitizeJPEGSize(std::size_t size) {
return static_cast<u32>(std::min(size, max_jpeg_image_size));
}
-class IProfile final : public ServiceFramework<IProfile> {
+class IProfileCommon : public ServiceFramework<IProfileCommon> {
public:
- explicit IProfile(Common::UUID user_id, ProfileManager& profile_manager)
- : ServiceFramework("IProfile"), profile_manager(profile_manager), user_id(user_id) {
+ explicit IProfileCommon(const char* name, bool editor_commands, Common::UUID user_id,
+ ProfileManager& profile_manager)
+ : ServiceFramework(name), profile_manager(profile_manager), user_id(user_id) {
static const FunctionInfo functions[] = {
- {0, &IProfile::Get, "Get"},
- {1, &IProfile::GetBase, "GetBase"},
- {10, &IProfile::GetImageSize, "GetImageSize"},
- {11, &IProfile::LoadImage, "LoadImage"},
+ {0, &IProfileCommon::Get, "Get"},
+ {1, &IProfileCommon::GetBase, "GetBase"},
+ {10, &IProfileCommon::GetImageSize, "GetImageSize"},
+ {11, &IProfileCommon::LoadImage, "LoadImage"},
};
+
RegisterHandlers(functions);
+
+ if (editor_commands) {
+ static const FunctionInfo editor_functions[] = {
+ {100, &IProfileCommon::Store, "Store"},
+ {101, &IProfileCommon::StoreWithImage, "StoreWithImage"},
+ };
+
+ RegisterHandlers(editor_functions);
+ }
}
-private:
+protected:
void Get(Kernel::HLERequestContext& ctx) {
LOG_INFO(Service_ACC, "called user_id={}", user_id.Format());
ProfileBase profile_base{};
@@ -127,10 +141,91 @@ private:
}
}
- const ProfileManager& profile_manager;
+ void Store(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto base = rp.PopRaw<ProfileBase>();
+
+ const auto user_data = ctx.ReadBuffer();
+
+ LOG_DEBUG(Service_ACC, "called, username='{}', timestamp={:016X}, uuid={}",
+ Common::StringFromFixedZeroTerminatedBuffer(
+ reinterpret_cast<const char*>(base.username.data()), base.username.size()),
+ base.timestamp, base.user_uuid.Format());
+
+ if (user_data.size() < sizeof(ProfileData)) {
+ LOG_ERROR(Service_ACC, "ProfileData buffer too small!");
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ERR_INVALID_BUFFER_SIZE);
+ return;
+ }
+
+ ProfileData data;
+ std::memcpy(&data, user_data.data(), sizeof(ProfileData));
+
+ if (!profile_manager.SetProfileBaseAndData(user_id, base, data)) {
+ LOG_ERROR(Service_ACC, "Failed to update profile data and base!");
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ERR_FAILED_SAVE_DATA);
+ return;
+ }
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(RESULT_SUCCESS);
+ }
+
+ void StoreWithImage(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ const auto base = rp.PopRaw<ProfileBase>();
+
+ const auto user_data = ctx.ReadBuffer();
+ const auto image_data = ctx.ReadBuffer(1);
+
+ LOG_DEBUG(Service_ACC, "called, username='{}', timestamp={:016X}, uuid={}",
+ Common::StringFromFixedZeroTerminatedBuffer(
+ reinterpret_cast<const char*>(base.username.data()), base.username.size()),
+ base.timestamp, base.user_uuid.Format());
+
+ if (user_data.size() < sizeof(ProfileData)) {
+ LOG_ERROR(Service_ACC, "ProfileData buffer too small!");
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ERR_INVALID_BUFFER_SIZE);
+ return;
+ }
+
+ ProfileData data;
+ std::memcpy(&data, user_data.data(), sizeof(ProfileData));
+
+ FileUtil::IOFile image(GetImagePath(user_id), "wb");
+
+ if (!image.IsOpen() || !image.Resize(image_data.size()) ||
+ image.WriteBytes(image_data.data(), image_data.size()) != image_data.size() ||
+ !profile_manager.SetProfileBaseAndData(user_id, base, data)) {
+ LOG_ERROR(Service_ACC, "Failed to update profile data, base, and image!");
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(ERR_FAILED_SAVE_DATA);
+ return;
+ }
+
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(RESULT_SUCCESS);
+ }
+
+ ProfileManager& profile_manager;
Common::UUID user_id; ///< The user id this profile refers to.
};
+class IProfile final : public IProfileCommon {
+public:
+ IProfile(Common::UUID user_id, ProfileManager& profile_manager)
+ : IProfileCommon("IProfile", false, user_id, profile_manager) {}
+};
+
+class IProfileEditor final : public IProfileCommon {
+public:
+ IProfileEditor(Common::UUID user_id, ProfileManager& profile_manager)
+ : IProfileCommon("IProfileEditor", true, user_id, profile_manager) {}
+};
+
class IManagerForApplication final : public ServiceFramework<IManagerForApplication> {
public:
IManagerForApplication() : ServiceFramework("IManagerForApplication") {
@@ -322,6 +417,17 @@ void Module::Interface::IsUserAccountSwitchLocked(Kernel::HLERequestContext& ctx
rb.Push(is_locked);
}
+void Module::Interface::GetProfileEditor(Kernel::HLERequestContext& ctx) {
+ IPC::RequestParser rp{ctx};
+ Common::UUID user_id = rp.PopRaw<Common::UUID>();
+
+ LOG_DEBUG(Service_ACC, "called, user_id={}", user_id.Format());
+
+ IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+ rb.Push(RESULT_SUCCESS);
+ rb.PushIpcInterface<IProfileEditor>(user_id, *profile_manager);
+}
+
void Module::Interface::TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_ACC, "called");
// A u8 is passed into this function which we can safely ignore. It's to determine if we have
diff --git a/src/core/hle/service/acc/acc.h b/src/core/hle/service/acc/acc.h
index f651773b7..7a7dc9ec6 100644
--- a/src/core/hle/service/acc/acc.h
+++ b/src/core/hle/service/acc/acc.h
@@ -32,6 +32,7 @@ public:
void IsUserRegistrationRequestPermitted(Kernel::HLERequestContext& ctx);
void TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx);
void IsUserAccountSwitchLocked(Kernel::HLERequestContext& ctx);
+ void GetProfileEditor(Kernel::HLERequestContext& ctx);
private:
ResultCode InitializeApplicationInfoBase(u64 process_id);
diff --git a/src/core/hle/service/acc/acc_su.cpp b/src/core/hle/service/acc/acc_su.cpp
index 1b7ec3ed0..0d1663657 100644
--- a/src/core/hle/service/acc/acc_su.cpp
+++ b/src/core/hle/service/acc/acc_su.cpp
@@ -41,7 +41,7 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p
{202, nullptr, "CancelUserRegistration"},
{203, nullptr, "DeleteUser"},
{204, nullptr, "SetUserPosition"},
- {205, nullptr, "GetProfileEditor"},
+ {205, &ACC_SU::GetProfileEditor, "GetProfileEditor"},
{206, nullptr, "CompleteUserRegistrationForcibly"},
{210, nullptr, "CreateFloatingRegistrationRequest"},
{230, nullptr, "AuthenticateServiceAsync"},
diff --git a/src/core/hle/service/acc/profile_manager.cpp b/src/core/hle/service/acc/profile_manager.cpp
index 49aa5908b..8f9986326 100644
--- a/src/core/hle/service/acc/profile_manager.cpp
+++ b/src/core/hle/service/acc/profile_manager.cpp
@@ -305,6 +305,17 @@ bool ProfileManager::SetProfileBase(UUID uuid, const ProfileBase& profile_new) {
return true;
}
+bool ProfileManager::SetProfileBaseAndData(Common::UUID uuid, const ProfileBase& profile_new,
+ const ProfileData& data_new) {
+ const auto index = GetUserIndex(uuid);
+ if (index.has_value() && SetProfileBase(uuid, profile_new)) {
+ profiles[*index].data = data_new;
+ return true;
+ }
+
+ return false;
+}
+
void ProfileManager::ParseUserSaveFile() {
FileUtil::IOFile save(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir) +
ACC_SAVE_AVATORS_BASE_PATH + "profiles.dat",
diff --git a/src/core/hle/service/acc/profile_manager.h b/src/core/hle/service/acc/profile_manager.h
index fd7abb541..5a6d28925 100644
--- a/src/core/hle/service/acc/profile_manager.h
+++ b/src/core/hle/service/acc/profile_manager.h
@@ -91,6 +91,8 @@ public:
bool RemoveUser(Common::UUID uuid);
bool SetProfileBase(Common::UUID uuid, const ProfileBase& profile_new);
+ bool SetProfileBaseAndData(Common::UUID uuid, const ProfileBase& profile_new,
+ const ProfileData& data_new);
private:
void ParseUserSaveFile();
diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp
index 111633ba3..aa2c83937 100644
--- a/src/core/hle/service/am/am.cpp
+++ b/src/core/hle/service/am/am.cpp
@@ -56,7 +56,8 @@ struct LaunchParameters {
};
static_assert(sizeof(LaunchParameters) == 0x88);
-IWindowController::IWindowController() : ServiceFramework("IWindowController") {
+IWindowController::IWindowController(Core::System& system_)
+ : ServiceFramework("IWindowController"), system{system_} {
// clang-format off
static const FunctionInfo functions[] = {
{0, nullptr, "CreateWindow"},
@@ -75,7 +76,7 @@ IWindowController::IWindowController() : ServiceFramework("IWindowController") {
IWindowController::~IWindowController() = default;
void IWindowController::GetAppletResourceUserId(Kernel::HLERequestContext& ctx) {
- const u64 process_id = Core::System::GetInstance().Kernel().CurrentProcess()->GetProcessID();
+ const u64 process_id = system.CurrentProcess()->GetProcessID();
LOG_DEBUG(Service_AM, "called. Process ID=0x{:016X}", process_id);
@@ -231,8 +232,9 @@ IDebugFunctions::IDebugFunctions() : ServiceFramework{"IDebugFunctions"} {
IDebugFunctions::~IDebugFunctions() = default;
-ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger)
- : ServiceFramework("ISelfController"), nvflinger(std::move(nvflinger)) {
+ISelfController::ISelfController(Core::System& system_,
+ std::shared_ptr<NVFlinger::NVFlinger> nvflinger_)
+ : ServiceFramework("ISelfController"), nvflinger(std::move(nvflinger_)) {
// clang-format off
static const FunctionInfo functions[] = {
{0, nullptr, "Exit"},
@@ -280,7 +282,7 @@ ISelfController::ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger
RegisterHandlers(functions);
- auto& kernel = Core::System::GetInstance().Kernel();
+ auto& kernel = system_.Kernel();
launchable_event = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
"ISelfController:LaunchableEvent");
@@ -501,8 +503,7 @@ void ISelfController::GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequest
rb.PushCopyObjects(accumulated_suspended_tick_changed_event.readable);
}
-AppletMessageQueue::AppletMessageQueue() {
- auto& kernel = Core::System::GetInstance().Kernel();
+AppletMessageQueue::AppletMessageQueue(Kernel::KernelCore& kernel) {
on_new_message = Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual,
"AMMessageQueue:OnMessageRecieved");
on_operation_mode_changed = Kernel::WritableEvent::CreateEventPair(
@@ -937,9 +938,8 @@ void IStorageAccessor::Read(Kernel::HLERequestContext& ctx) {
rb.Push(RESULT_SUCCESS);
}
-ILibraryAppletCreator::ILibraryAppletCreator(u64 current_process_title_id)
- : ServiceFramework("ILibraryAppletCreator"),
- current_process_title_id(current_process_title_id) {
+ILibraryAppletCreator::ILibraryAppletCreator(Core::System& system_)
+ : ServiceFramework("ILibraryAppletCreator"), system{system_} {
static const FunctionInfo functions[] = {
{0, &ILibraryAppletCreator::CreateLibraryApplet, "CreateLibraryApplet"},
{1, nullptr, "TerminateAllLibraryApplets"},
@@ -961,8 +961,8 @@ void ILibraryAppletCreator::CreateLibraryApplet(Kernel::HLERequestContext& ctx)
LOG_DEBUG(Service_AM, "called with applet_id={:08X}, applet_mode={:08X}",
static_cast<u32>(applet_id), applet_mode);
- const auto& applet_manager{Core::System::GetInstance().GetAppletManager()};
- const auto applet = applet_manager.GetApplet(applet_id, current_process_title_id);
+ const auto& applet_manager{system.GetAppletManager()};
+ const auto applet = applet_manager.GetApplet(applet_id);
if (applet == nullptr) {
LOG_ERROR(Service_AM, "Applet doesn't exist! applet_id={}", static_cast<u32>(applet_id));
@@ -999,8 +999,7 @@ void ILibraryAppletCreator::CreateTransferMemoryStorage(Kernel::HLERequestContex
const auto handle{rp.Pop<Kernel::Handle>()};
const auto transfer_mem =
- Core::System::GetInstance().CurrentProcess()->GetHandleTable().Get<Kernel::TransferMemory>(
- handle);
+ system.CurrentProcess()->GetHandleTable().Get<Kernel::TransferMemory>(handle);
if (transfer_mem == nullptr) {
LOG_ERROR(Service_AM, "shared_mem is a nullpr for handle={:08X}", handle);
@@ -1018,7 +1017,8 @@ void ILibraryAppletCreator::CreateTransferMemoryStorage(Kernel::HLERequestContex
rb.PushIpcInterface(std::make_shared<IStorage>(std::move(memory)));
}
-IApplicationFunctions::IApplicationFunctions() : ServiceFramework("IApplicationFunctions") {
+IApplicationFunctions::IApplicationFunctions(Core::System& system_)
+ : ServiceFramework("IApplicationFunctions"), system{system_} {
// clang-format off
static const FunctionInfo functions[] = {
{1, &IApplicationFunctions::PopLaunchParameter, "PopLaunchParameter"},
@@ -1180,7 +1180,7 @@ void IApplicationFunctions::GetDesiredLanguage(Kernel::HLERequestContext& ctx) {
// Get supported languages from NACP, if possible
// Default to 0 (all languages supported)
u32 supported_languages = 0;
- FileSys::PatchManager pm{Core::System::GetInstance().CurrentProcess()->GetTitleID()};
+ FileSys::PatchManager pm{system.CurrentProcess()->GetTitleID()};
const auto res = pm.GetControlMetadata();
if (res.first != nullptr) {
@@ -1188,8 +1188,8 @@ void IApplicationFunctions::GetDesiredLanguage(Kernel::HLERequestContext& ctx) {
}
// Call IApplicationManagerInterface implementation.
- auto& service_manager = Core::System::GetInstance().ServiceManager();
- auto ns_am2 = service_manager.GetService<Service::NS::NS>("ns:am2");
+ auto& service_manager = system.ServiceManager();
+ auto ns_am2 = service_manager.GetService<NS::NS>("ns:am2");
auto app_man = ns_am2->GetApplicationManagerInterface();
// Get desired application language
@@ -1261,8 +1261,8 @@ void IApplicationFunctions::ExtendSaveData(Kernel::HLERequestContext& ctx) {
"new_journal={:016X}",
static_cast<u8>(type), user_id[1], user_id[0], new_normal_size, new_journal_size);
- FileSystem::WriteSaveDataSize(type, Core::CurrentProcess()->GetTitleID(), user_id,
- {new_normal_size, new_journal_size});
+ const auto title_id = system.CurrentProcess()->GetTitleID();
+ FileSystem::WriteSaveDataSize(type, title_id, user_id, {new_normal_size, new_journal_size});
IPC::ResponseBuilder rb{ctx, 4};
rb.Push(RESULT_SUCCESS);
@@ -1281,8 +1281,8 @@ void IApplicationFunctions::GetSaveDataSize(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_AM, "called with type={:02X}, user_id={:016X}{:016X}", static_cast<u8>(type),
user_id[1], user_id[0]);
- const auto size =
- FileSystem::ReadSaveDataSize(type, Core::CurrentProcess()->GetTitleID(), user_id);
+ const auto title_id = system.CurrentProcess()->GetTitleID();
+ const auto size = FileSystem::ReadSaveDataSize(type, title_id, user_id);
IPC::ResponseBuilder rb{ctx, 6};
rb.Push(RESULT_SUCCESS);
@@ -1300,9 +1300,9 @@ void IApplicationFunctions::GetGpuErrorDetectedSystemEvent(Kernel::HLERequestCon
void InstallInterfaces(SM::ServiceManager& service_manager,
std::shared_ptr<NVFlinger::NVFlinger> nvflinger, Core::System& system) {
- auto message_queue = std::make_shared<AppletMessageQueue>();
- message_queue->PushMessage(AppletMessageQueue::AppletMessage::FocusStateChanged); // Needed on
- // game boot
+ auto message_queue = std::make_shared<AppletMessageQueue>(system.Kernel());
+ // Needed on game boot
+ message_queue->PushMessage(AppletMessageQueue::AppletMessage::FocusStateChanged);
std::make_shared<AppletAE>(nvflinger, message_queue, system)->InstallAsService(service_manager);
std::make_shared<AppletOE>(nvflinger, message_queue, system)->InstallAsService(service_manager);
diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h
index cbc9da7b6..28f870302 100644
--- a/src/core/hle/service/am/am.h
+++ b/src/core/hle/service/am/am.h
@@ -10,12 +10,15 @@
#include "core/hle/kernel/writable_event.h"
#include "core/hle/service/service.h"
-namespace Service {
-namespace NVFlinger {
+namespace Kernel {
+class KernelCore;
+}
+
+namespace Service::NVFlinger {
class NVFlinger;
}
-namespace AM {
+namespace Service::AM {
enum SystemLanguage {
Japanese = 0,
@@ -47,7 +50,7 @@ public:
PerformanceModeChanged = 31,
};
- AppletMessageQueue();
+ explicit AppletMessageQueue(Kernel::KernelCore& kernel);
~AppletMessageQueue();
const Kernel::SharedPtr<Kernel::ReadableEvent>& GetMesssageRecieveEvent() const;
@@ -65,12 +68,14 @@ private:
class IWindowController final : public ServiceFramework<IWindowController> {
public:
- IWindowController();
+ explicit IWindowController(Core::System& system_);
~IWindowController() override;
private:
void GetAppletResourceUserId(Kernel::HLERequestContext& ctx);
void AcquireForegroundRights(Kernel::HLERequestContext& ctx);
+
+ Core::System& system;
};
class IAudioController final : public ServiceFramework<IAudioController> {
@@ -113,7 +118,8 @@ public:
class ISelfController final : public ServiceFramework<ISelfController> {
public:
- explicit ISelfController(std::shared_ptr<NVFlinger::NVFlinger> nvflinger);
+ explicit ISelfController(Core::System& system_,
+ std::shared_ptr<NVFlinger::NVFlinger> nvflinger_);
~ISelfController() override;
private:
@@ -208,7 +214,7 @@ private:
class ILibraryAppletCreator final : public ServiceFramework<ILibraryAppletCreator> {
public:
- ILibraryAppletCreator(u64 current_process_title_id);
+ explicit ILibraryAppletCreator(Core::System& system_);
~ILibraryAppletCreator() override;
private:
@@ -216,12 +222,12 @@ private:
void CreateStorage(Kernel::HLERequestContext& ctx);
void CreateTransferMemoryStorage(Kernel::HLERequestContext& ctx);
- u64 current_process_title_id;
+ Core::System& system;
};
class IApplicationFunctions final : public ServiceFramework<IApplicationFunctions> {
public:
- IApplicationFunctions();
+ explicit IApplicationFunctions(Core::System& system_);
~IApplicationFunctions() override;
private:
@@ -245,6 +251,7 @@ private:
void GetGpuErrorDetectedSystemEvent(Kernel::HLERequestContext& ctx);
Kernel::EventPair gpu_error_detected_event;
+ Core::System& system;
};
class IHomeMenuFunctions final : public ServiceFramework<IHomeMenuFunctions> {
@@ -278,5 +285,4 @@ public:
void InstallInterfaces(SM::ServiceManager& service_manager,
std::shared_ptr<NVFlinger::NVFlinger> nvflinger, Core::System& system);
-} // namespace AM
-} // namespace Service
+} // namespace Service::AM
diff --git a/src/core/hle/service/am/applet_ae.cpp b/src/core/hle/service/am/applet_ae.cpp
index a34368c8b..e454b77d8 100644
--- a/src/core/hle/service/am/applet_ae.cpp
+++ b/src/core/hle/service/am/applet_ae.cpp
@@ -50,7 +50,7 @@ private:
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(RESULT_SUCCESS);
- rb.PushIpcInterface<ISelfController>(nvflinger);
+ rb.PushIpcInterface<ISelfController>(system, nvflinger);
}
void GetWindowController(Kernel::HLERequestContext& ctx) {
@@ -58,7 +58,7 @@ private:
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(RESULT_SUCCESS);
- rb.PushIpcInterface<IWindowController>();
+ rb.PushIpcInterface<IWindowController>(system);
}
void GetAudioController(Kernel::HLERequestContext& ctx) {
@@ -98,7 +98,7 @@ private:
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(RESULT_SUCCESS);
- rb.PushIpcInterface<ILibraryAppletCreator>(system.CurrentProcess()->GetTitleID());
+ rb.PushIpcInterface<ILibraryAppletCreator>(system);
}
void GetApplicationFunctions(Kernel::HLERequestContext& ctx) {
@@ -106,7 +106,7 @@ private:
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(RESULT_SUCCESS);
- rb.PushIpcInterface<IApplicationFunctions>();
+ rb.PushIpcInterface<IApplicationFunctions>(system);
}
std::shared_ptr<NVFlinger::NVFlinger> nvflinger;
@@ -154,7 +154,7 @@ private:
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(RESULT_SUCCESS);
- rb.PushIpcInterface<ISelfController>(nvflinger);
+ rb.PushIpcInterface<ISelfController>(system, nvflinger);
}
void GetWindowController(Kernel::HLERequestContext& ctx) {
@@ -162,7 +162,7 @@ private:
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(RESULT_SUCCESS);
- rb.PushIpcInterface<IWindowController>();
+ rb.PushIpcInterface<IWindowController>(system);
}
void GetAudioController(Kernel::HLERequestContext& ctx) {
@@ -194,7 +194,7 @@ private:
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(RESULT_SUCCESS);
- rb.PushIpcInterface<ILibraryAppletCreator>(system.CurrentProcess()->GetTitleID());
+ rb.PushIpcInterface<ILibraryAppletCreator>(system);
}
void GetHomeMenuFunctions(Kernel::HLERequestContext& ctx) {
diff --git a/src/core/hle/service/am/applet_oe.cpp b/src/core/hle/service/am/applet_oe.cpp
index 5d53ef113..a2ffaa440 100644
--- a/src/core/hle/service/am/applet_oe.cpp
+++ b/src/core/hle/service/am/applet_oe.cpp
@@ -4,7 +4,6 @@
#include "common/logging/log.h"
#include "core/hle/ipc_helpers.h"
-#include "core/hle/kernel/process.h"
#include "core/hle/service/am/am.h"
#include "core/hle/service/am/applet_oe.h"
#include "core/hle/service/nvflinger/nvflinger.h"
@@ -64,7 +63,7 @@ private:
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(RESULT_SUCCESS);
- rb.PushIpcInterface<IWindowController>();
+ rb.PushIpcInterface<IWindowController>(system);
}
void GetSelfController(Kernel::HLERequestContext& ctx) {
@@ -72,7 +71,7 @@ private:
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(RESULT_SUCCESS);
- rb.PushIpcInterface<ISelfController>(nvflinger);
+ rb.PushIpcInterface<ISelfController>(system, nvflinger);
}
void GetCommonStateGetter(Kernel::HLERequestContext& ctx) {
@@ -88,7 +87,7 @@ private:
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(RESULT_SUCCESS);
- rb.PushIpcInterface<ILibraryAppletCreator>(system.CurrentProcess()->GetTitleID());
+ rb.PushIpcInterface<ILibraryAppletCreator>(system);
}
void GetApplicationFunctions(Kernel::HLERequestContext& ctx) {
@@ -96,7 +95,7 @@ private:
IPC::ResponseBuilder rb{ctx, 2, 0, 1};
rb.Push(RESULT_SUCCESS);
- rb.PushIpcInterface<IApplicationFunctions>();
+ rb.PushIpcInterface<IApplicationFunctions>(system);
}
std::shared_ptr<NVFlinger::NVFlinger> nvflinger;
diff --git a/src/core/hle/service/am/applets/applets.cpp b/src/core/hle/service/am/applets/applets.cpp
index 6bdba2468..d2e35362f 100644
--- a/src/core/hle/service/am/applets/applets.cpp
+++ b/src/core/hle/service/am/applets/applets.cpp
@@ -23,8 +23,7 @@
namespace Service::AM::Applets {
-AppletDataBroker::AppletDataBroker() {
- auto& kernel = Core::System::GetInstance().Kernel();
+AppletDataBroker::AppletDataBroker(Kernel::KernelCore& kernel) {
state_changed_event = Kernel::WritableEvent::CreateEventPair(
kernel, Kernel::ResetType::Manual, "ILibraryAppletAccessor:StateChangedEvent");
pop_out_data_event = Kernel::WritableEvent::CreateEventPair(
@@ -121,7 +120,7 @@ Kernel::SharedPtr<Kernel::ReadableEvent> AppletDataBroker::GetStateChangedEvent(
return state_changed_event.readable;
}
-Applet::Applet() = default;
+Applet::Applet(Kernel::KernelCore& kernel_) : broker{kernel_} {}
Applet::~Applet() = default;
@@ -154,7 +153,7 @@ AppletFrontendSet::AppletFrontendSet(AppletFrontendSet&&) noexcept = default;
AppletFrontendSet& AppletFrontendSet::operator=(AppletFrontendSet&&) noexcept = default;
-AppletManager::AppletManager() = default;
+AppletManager::AppletManager(Core::System& system_) : system{system_} {}
AppletManager::~AppletManager() = default;
@@ -216,28 +215,28 @@ void AppletManager::ClearAll() {
frontend = {};
}
-std::shared_ptr<Applet> AppletManager::GetApplet(AppletId id, u64 current_process_title_id) const {
+std::shared_ptr<Applet> AppletManager::GetApplet(AppletId id) const {
switch (id) {
case AppletId::Auth:
- return std::make_shared<Auth>(*frontend.parental_controls);
+ return std::make_shared<Auth>(system, *frontend.parental_controls);
case AppletId::Error:
- return std::make_shared<Error>(*frontend.error);
+ return std::make_shared<Error>(system, *frontend.error);
case AppletId::ProfileSelect:
- return std::make_shared<ProfileSelect>(*frontend.profile_select);
+ return std::make_shared<ProfileSelect>(system, *frontend.profile_select);
case AppletId::SoftwareKeyboard:
- return std::make_shared<SoftwareKeyboard>(*frontend.software_keyboard);
+ return std::make_shared<SoftwareKeyboard>(system, *frontend.software_keyboard);
case AppletId::PhotoViewer:
- return std::make_shared<PhotoViewer>(*frontend.photo_viewer);
+ return std::make_shared<PhotoViewer>(system, *frontend.photo_viewer);
case AppletId::LibAppletShop:
- return std::make_shared<WebBrowser>(*frontend.web_browser, current_process_title_id,
+ return std::make_shared<WebBrowser>(system, *frontend.web_browser,
frontend.e_commerce.get());
case AppletId::LibAppletOff:
- return std::make_shared<WebBrowser>(*frontend.web_browser, current_process_title_id);
+ return std::make_shared<WebBrowser>(system, *frontend.web_browser);
default:
UNIMPLEMENTED_MSG(
"No backend implementation exists for applet_id={:02X}! Falling back to stub applet.",
static_cast<u8>(id));
- return std::make_shared<StubApplet>(id);
+ return std::make_shared<StubApplet>(system, id);
}
}
diff --git a/src/core/hle/service/am/applets/applets.h b/src/core/hle/service/am/applets/applets.h
index adc973dad..764c3418c 100644
--- a/src/core/hle/service/am/applets/applets.h
+++ b/src/core/hle/service/am/applets/applets.h
@@ -12,6 +12,10 @@
union ResultCode;
+namespace Core {
+class System;
+}
+
namespace Core::Frontend {
class ECommerceApplet;
class ErrorApplet;
@@ -22,6 +26,10 @@ class SoftwareKeyboardApplet;
class WebBrowserApplet;
} // namespace Core::Frontend
+namespace Kernel {
+class KernelCore;
+}
+
namespace Service::AM {
class IStorage;
@@ -53,7 +61,7 @@ enum class AppletId : u32 {
class AppletDataBroker final {
public:
- AppletDataBroker();
+ explicit AppletDataBroker(Kernel::KernelCore& kernel_);
~AppletDataBroker();
struct RawChannelData {
@@ -108,7 +116,7 @@ private:
class Applet {
public:
- Applet();
+ explicit Applet(Kernel::KernelCore& kernel_);
virtual ~Applet();
virtual void Initialize();
@@ -179,7 +187,7 @@ struct AppletFrontendSet {
class AppletManager {
public:
- AppletManager();
+ explicit AppletManager(Core::System& system_);
~AppletManager();
void SetAppletFrontendSet(AppletFrontendSet set);
@@ -187,10 +195,11 @@ public:
void SetDefaultAppletsIfMissing();
void ClearAll();
- std::shared_ptr<Applet> GetApplet(AppletId id, u64 current_process_title_id) const;
+ std::shared_ptr<Applet> GetApplet(AppletId id) const;
private:
AppletFrontendSet frontend;
+ Core::System& system;
};
} // namespace Applets
diff --git a/src/core/hle/service/am/applets/error.cpp b/src/core/hle/service/am/applets/error.cpp
index af3a900f8..a7db26725 100644
--- a/src/core/hle/service/am/applets/error.cpp
+++ b/src/core/hle/service/am/applets/error.cpp
@@ -85,7 +85,8 @@ ResultCode Decode64BitError(u64 error) {
} // Anonymous namespace
-Error::Error(const Core::Frontend::ErrorApplet& frontend) : frontend(frontend) {}
+Error::Error(Core::System& system_, const Core::Frontend::ErrorApplet& frontend_)
+ : Applet{system_.Kernel()}, frontend(frontend_), system{system_} {}
Error::~Error() = default;
@@ -145,8 +146,8 @@ void Error::Execute() {
}
const auto callback = [this] { DisplayCompleted(); };
- const auto title_id = Core::CurrentProcess()->GetTitleID();
- const auto& reporter{Core::System::GetInstance().GetReporter()};
+ const auto title_id = system.CurrentProcess()->GetTitleID();
+ const auto& reporter{system.GetReporter()};
switch (mode) {
case ErrorAppletMode::ShowError:
diff --git a/src/core/hle/service/am/applets/error.h b/src/core/hle/service/am/applets/error.h
index a3590d181..a105cdb0c 100644
--- a/src/core/hle/service/am/applets/error.h
+++ b/src/core/hle/service/am/applets/error.h
@@ -7,6 +7,10 @@
#include "core/hle/result.h"
#include "core/hle/service/am/applets/applets.h"
+namespace Core {
+class System;
+}
+
namespace Service::AM::Applets {
enum class ErrorAppletMode : u8 {
@@ -21,7 +25,7 @@ enum class ErrorAppletMode : u8 {
class Error final : public Applet {
public:
- explicit Error(const Core::Frontend::ErrorApplet& frontend);
+ explicit Error(Core::System& system_, const Core::Frontend::ErrorApplet& frontend_);
~Error() override;
void Initialize() override;
@@ -42,6 +46,7 @@ private:
std::unique_ptr<ErrorArguments> args;
bool complete = false;
+ Core::System& system;
};
} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/am/applets/general_backend.cpp b/src/core/hle/service/am/applets/general_backend.cpp
index e0def8dff..328438a1d 100644
--- a/src/core/hle/service/am/applets/general_backend.cpp
+++ b/src/core/hle/service/am/applets/general_backend.cpp
@@ -37,7 +37,8 @@ static void LogCurrentStorage(AppletDataBroker& broker, std::string_view prefix)
}
}
-Auth::Auth(Core::Frontend::ParentalControlsApplet& frontend) : frontend(frontend) {}
+Auth::Auth(Core::System& system_, Core::Frontend::ParentalControlsApplet& frontend_)
+ : Applet{system_.Kernel()}, frontend(frontend_) {}
Auth::~Auth() = default;
@@ -151,7 +152,8 @@ void Auth::AuthFinished(bool successful) {
broker.SignalStateChanged();
}
-PhotoViewer::PhotoViewer(const Core::Frontend::PhotoViewerApplet& frontend) : frontend(frontend) {}
+PhotoViewer::PhotoViewer(Core::System& system_, const Core::Frontend::PhotoViewerApplet& frontend_)
+ : Applet{system_.Kernel()}, frontend(frontend_), system{system_} {}
PhotoViewer::~PhotoViewer() = default;
@@ -185,7 +187,7 @@ void PhotoViewer::Execute() {
const auto callback = [this] { ViewFinished(); };
switch (mode) {
case PhotoViewerAppletMode::CurrentApp:
- frontend.ShowPhotosForApplication(Core::CurrentProcess()->GetTitleID(), callback);
+ frontend.ShowPhotosForApplication(system.CurrentProcess()->GetTitleID(), callback);
break;
case PhotoViewerAppletMode::AllApps:
frontend.ShowAllPhotos(callback);
@@ -200,7 +202,8 @@ void PhotoViewer::ViewFinished() {
broker.SignalStateChanged();
}
-StubApplet::StubApplet(AppletId id) : id(id) {}
+StubApplet::StubApplet(Core::System& system_, AppletId id_)
+ : Applet{system_.Kernel()}, id(id_), system{system_} {}
StubApplet::~StubApplet() = default;
@@ -209,7 +212,7 @@ void StubApplet::Initialize() {
Applet::Initialize();
const auto data = broker.PeekDataToAppletForDebug();
- Core::System::GetInstance().GetReporter().SaveUnimplementedAppletReport(
+ system.GetReporter().SaveUnimplementedAppletReport(
static_cast<u32>(id), common_args.arguments_version, common_args.library_version,
common_args.theme_color, common_args.play_startup_sound, common_args.system_tick,
data.normal, data.interactive);
diff --git a/src/core/hle/service/am/applets/general_backend.h b/src/core/hle/service/am/applets/general_backend.h
index 0da252044..cfa2df369 100644
--- a/src/core/hle/service/am/applets/general_backend.h
+++ b/src/core/hle/service/am/applets/general_backend.h
@@ -6,6 +6,10 @@
#include "core/hle/service/am/applets/applets.h"
+namespace Core {
+class System;
+}
+
namespace Service::AM::Applets {
enum class AuthAppletType : u32 {
@@ -16,7 +20,7 @@ enum class AuthAppletType : u32 {
class Auth final : public Applet {
public:
- explicit Auth(Core::Frontend::ParentalControlsApplet& frontend);
+ explicit Auth(Core::System& system_, Core::Frontend::ParentalControlsApplet& frontend_);
~Auth() override;
void Initialize() override;
@@ -45,7 +49,7 @@ enum class PhotoViewerAppletMode : u8 {
class PhotoViewer final : public Applet {
public:
- explicit PhotoViewer(const Core::Frontend::PhotoViewerApplet& frontend);
+ explicit PhotoViewer(Core::System& system_, const Core::Frontend::PhotoViewerApplet& frontend_);
~PhotoViewer() override;
void Initialize() override;
@@ -60,11 +64,12 @@ private:
const Core::Frontend::PhotoViewerApplet& frontend;
bool complete = false;
PhotoViewerAppletMode mode = PhotoViewerAppletMode::CurrentApp;
+ Core::System& system;
};
class StubApplet final : public Applet {
public:
- explicit StubApplet(AppletId id);
+ explicit StubApplet(Core::System& system_, AppletId id_);
~StubApplet() override;
void Initialize() override;
@@ -76,6 +81,7 @@ public:
private:
AppletId id;
+ Core::System& system;
};
} // namespace Service::AM::Applets
diff --git a/src/core/hle/service/am/applets/profile_select.cpp b/src/core/hle/service/am/applets/profile_select.cpp
index 57b5419e8..3eba696ca 100644
--- a/src/core/hle/service/am/applets/profile_select.cpp
+++ b/src/core/hle/service/am/applets/profile_select.cpp
@@ -15,8 +15,9 @@ namespace Service::AM::Applets {
constexpr ResultCode ERR_USER_CANCELLED_SELECTION{ErrorModule::Account, 1};
-ProfileSelect::ProfileSelect(const Core::Frontend::ProfileSelectApplet& frontend)
- : frontend(frontend) {}
+ProfileSelect::ProfileSelect(Core::System& system_,
+ const Core::Frontend::ProfileSelectApplet& frontend_)
+ : Applet{system_.Kernel()}, frontend(frontend_) {}
ProfileSelect::~ProfileSelect() = default;
diff --git a/src/core/hle/service/am/applets/profile_select.h b/src/core/hle/service/am/applets/profile_select.h
index 563cd744a..16364ead7 100644
--- a/src/core/hle/service/am/applets/profile_select.h
+++ b/src/core/hle/service/am/applets/profile_select.h
@@ -11,6 +11,10 @@
#include "core/hle/result.h"
#include "core/hle/service/am/applets/applets.h"
+namespace Core {
+class System;
+}
+
namespace Service::AM::Applets {
struct UserSelectionConfig {
@@ -29,7 +33,8 @@ static_assert(sizeof(UserSelectionOutput) == 0x18, "UserSelectionOutput has inco
class ProfileSelect final : public Applet {
public:
- explicit ProfileSelect(const Core::Frontend::ProfileSelectApplet& frontend);
+ explicit ProfileSelect(Core::System& system_,
+ const Core::Frontend::ProfileSelectApplet& frontend_);
~ProfileSelect() override;
void Initialize() override;
diff --git a/src/core/hle/service/am/applets/software_keyboard.cpp b/src/core/hle/service/am/applets/software_keyboard.cpp
index e197990f7..748559cd0 100644
--- a/src/core/hle/service/am/applets/software_keyboard.cpp
+++ b/src/core/hle/service/am/applets/software_keyboard.cpp
@@ -39,8 +39,9 @@ static Core::Frontend::SoftwareKeyboardParameters ConvertToFrontendParameters(
return params;
}
-SoftwareKeyboard::SoftwareKeyboard(const Core::Frontend::SoftwareKeyboardApplet& frontend)
- : frontend(frontend) {}
+SoftwareKeyboard::SoftwareKeyboard(Core::System& system_,
+ const Core::Frontend::SoftwareKeyboardApplet& frontend_)
+ : Applet{system_.Kernel()}, frontend(frontend_) {}
SoftwareKeyboard::~SoftwareKeyboard() = default;
diff --git a/src/core/hle/service/am/applets/software_keyboard.h b/src/core/hle/service/am/applets/software_keyboard.h
index 0fbc43e51..ef4801fc6 100644
--- a/src/core/hle/service/am/applets/software_keyboard.h
+++ b/src/core/hle/service/am/applets/software_keyboard.h
@@ -16,6 +16,10 @@
union ResultCode;
+namespace Core {
+class System;
+}
+
namespace Service::AM::Applets {
enum class KeysetDisable : u32 {
@@ -55,7 +59,8 @@ static_assert(sizeof(KeyboardConfig) == 0x3E0, "KeyboardConfig has incorrect siz
class SoftwareKeyboard final : public Applet {
public:
- explicit SoftwareKeyboard(const Core::Frontend::SoftwareKeyboardApplet& frontend);
+ explicit SoftwareKeyboard(Core::System& system_,
+ const Core::Frontend::SoftwareKeyboardApplet& frontend_);
~SoftwareKeyboard() override;
void Initialize() override;
diff --git a/src/core/hle/service/am/applets/web_browser.cpp b/src/core/hle/service/am/applets/web_browser.cpp
index f3c9fef0e..32283e819 100644
--- a/src/core/hle/service/am/applets/web_browser.cpp
+++ b/src/core/hle/service/am/applets/web_browser.cpp
@@ -190,8 +190,9 @@ std::map<WebArgTLVType, std::vector<u8>> GetWebArguments(const std::vector<u8>&
return out;
}
-FileSys::VirtualFile GetApplicationRomFS(u64 title_id, FileSys::ContentRecordType type) {
- const auto& installed{Core::System::GetInstance().GetContentProvider()};
+FileSys::VirtualFile GetApplicationRomFS(const Core::System& system, u64 title_id,
+ FileSys::ContentRecordType type) {
+ const auto& installed{system.GetContentProvider()};
const auto res = installed.GetEntry(title_id, type);
if (res != nullptr) {
@@ -207,10 +208,10 @@ FileSys::VirtualFile GetApplicationRomFS(u64 title_id, FileSys::ContentRecordTyp
} // Anonymous namespace
-WebBrowser::WebBrowser(Core::Frontend::WebBrowserApplet& frontend, u64 current_process_title_id,
- Core::Frontend::ECommerceApplet* frontend_e_commerce)
- : frontend(frontend), frontend_e_commerce(frontend_e_commerce),
- current_process_title_id(current_process_title_id) {}
+WebBrowser::WebBrowser(Core::System& system_, Core::Frontend::WebBrowserApplet& frontend_,
+ Core::Frontend::ECommerceApplet* frontend_e_commerce_)
+ : Applet{system_.Kernel()}, frontend(frontend_),
+ frontend_e_commerce(frontend_e_commerce_), system{system_} {}
WebBrowser::~WebBrowser() = default;
@@ -266,7 +267,7 @@ void WebBrowser::UnpackRomFS() {
ASSERT(offline_romfs != nullptr);
const auto dir =
FileSys::ExtractRomFS(offline_romfs, FileSys::RomFSExtractionType::SingleDiscard);
- const auto& vfs{Core::System::GetInstance().GetFilesystem()};
+ const auto& vfs{system.GetFilesystem()};
const auto temp_dir = vfs->CreateDirectory(temporary_dir, FileSys::Mode::ReadWrite);
FileSys::VfsRawCopyD(dir, temp_dir);
@@ -470,10 +471,10 @@ void WebBrowser::InitializeOffline() {
}
if (title_id == 0) {
- title_id = current_process_title_id;
+ title_id = system.CurrentProcess()->GetTitleID();
}
- offline_romfs = GetApplicationRomFS(title_id, type);
+ offline_romfs = GetApplicationRomFS(system, title_id, type);
if (offline_romfs == nullptr) {
status = ResultCode(-1);
LOG_ERROR(Service_AM, "Failed to find offline data for request!");
diff --git a/src/core/hle/service/am/applets/web_browser.h b/src/core/hle/service/am/applets/web_browser.h
index 870f57b64..8d4027411 100644
--- a/src/core/hle/service/am/applets/web_browser.h
+++ b/src/core/hle/service/am/applets/web_browser.h
@@ -9,6 +9,10 @@
#include "core/hle/service/am/am.h"
#include "core/hle/service/am/applets/applets.h"
+namespace Core {
+class System;
+}
+
namespace Service::AM::Applets {
enum class ShimKind : u32;
@@ -17,8 +21,8 @@ enum class WebArgTLVType : u16;
class WebBrowser final : public Applet {
public:
- WebBrowser(Core::Frontend::WebBrowserApplet& frontend, u64 current_process_title_id,
- Core::Frontend::ECommerceApplet* frontend_e_commerce = nullptr);
+ WebBrowser(Core::System& system_, Core::Frontend::WebBrowserApplet& frontend_,
+ Core::Frontend::ECommerceApplet* frontend_e_commerce_ = nullptr);
~WebBrowser() override;
@@ -59,8 +63,6 @@ private:
bool unpacked = false;
ResultCode status = RESULT_SUCCESS;
- u64 current_process_title_id;
-
ShimKind kind;
std::map<WebArgTLVType, std::vector<u8>> args;
@@ -74,6 +76,8 @@ private:
std::optional<u128> user_id;
std::optional<bool> shop_full_display;
std::string shop_extra_parameter;
+
+ Core::System& system;
};
} // namespace Service::AM::Applets
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp
index e92e2e06e..3a5361fdd 100644
--- a/src/core/loader/nro.cpp
+++ b/src/core/loader/nro.cpp
@@ -258,6 +258,15 @@ ResultStatus AppLoader_NRO::ReadTitle(std::string& title) {
return ResultStatus::Success;
}
+ResultStatus AppLoader_NRO::ReadControlData(FileSys::NACP& control) {
+ if (nacp == nullptr) {
+ return ResultStatus::ErrorNoControl;
+ }
+
+ control = *nacp;
+ return ResultStatus::Success;
+}
+
bool AppLoader_NRO::IsRomFSUpdatable() const {
return false;
}
diff --git a/src/core/loader/nro.h b/src/core/loader/nro.h
index 1ffdae805..71811bc29 100644
--- a/src/core/loader/nro.h
+++ b/src/core/loader/nro.h
@@ -43,6 +43,7 @@ public:
ResultStatus ReadProgramId(u64& out_program_id) override;
ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override;
ResultStatus ReadTitle(std::string& title) override;
+ ResultStatus ReadControlData(FileSys::NACP& control) override;
bool IsRomFSUpdatable() const override;
private:
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 08586d33c..63d449135 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <bitset>
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
@@ -49,6 +50,33 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
}
}
+Tegra::Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const {
+ const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value();
+ ASSERT(cbuf_mask[regs.tex_cb_index]);
+
+ const auto& texinfo = launch_description.const_buffer_config[regs.tex_cb_index];
+ ASSERT(texinfo.Address() != 0);
+
+ const GPUVAddr address = texinfo.Address() + offset * sizeof(Texture::TextureHandle);
+ ASSERT(address < texinfo.Address() + texinfo.size);
+
+ const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(address)};
+ return GetTextureInfo(tex_handle, offset);
+}
+
+Texture::FullTextureInfo KeplerCompute::GetTextureInfo(const Texture::TextureHandle tex_handle,
+ std::size_t offset) const {
+ return Texture::FullTextureInfo{static_cast<u32>(offset), GetTICEntry(tex_handle.tic_id),
+ GetTSCEntry(tex_handle.tsc_id)};
+}
+
+u32 KeplerCompute::AccessConstBuffer32(u64 const_buffer, u64 offset) const {
+ const auto& buffer = launch_description.const_buffer_config[const_buffer];
+ u32 result;
+ std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32));
+ return result;
+}
+
void KeplerCompute::ProcessLaunch() {
const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
@@ -60,4 +88,29 @@ void KeplerCompute::ProcessLaunch() {
rasterizer.DispatchCompute(code_addr);
}
+Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
+ const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)};
+
+ Texture::TICEntry tic_entry;
+ memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
+
+ const auto r_type{tic_entry.r_type.Value()};
+ const auto g_type{tic_entry.g_type.Value()};
+ const auto b_type{tic_entry.b_type.Value()};
+ const auto a_type{tic_entry.a_type.Value()};
+
+ // TODO(Subv): Different data types for separate components are not supported
+ DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
+
+ return tic_entry;
+}
+
+Texture::TSCEntry KeplerCompute::GetTSCEntry(u32 tsc_index) const {
+ const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)};
+
+ Texture::TSCEntry tsc_entry;
+ memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
+ return tsc_entry;
+}
+
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 6a3309a2c..90cf650d2 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -12,6 +12,7 @@
#include "common/common_types.h"
#include "video_core/engines/engine_upload.h"
#include "video_core/gpu.h"
+#include "video_core/textures/texture.h"
namespace Core {
class System;
@@ -111,7 +112,7 @@ public:
INSERT_PADDING_WORDS(0x3FE);
- u32 texture_const_buffer_index;
+ u32 tex_cb_index;
INSERT_PADDING_WORDS(0x374);
};
@@ -149,7 +150,7 @@ public:
union {
BitField<0, 8, u32> const_buffer_enable_mask;
BitField<29, 2, u32> cache_layout;
- } memory_config;
+ };
INSERT_PADDING_WORDS(0x8);
@@ -194,6 +195,14 @@ public:
/// Write the value to the register identified by method.
void CallMethod(const GPU::MethodCall& method_call);
+ Tegra::Texture::FullTextureInfo GetTexture(std::size_t offset) const;
+
+ /// Given a Texture Handle, returns the TSC and TIC entries.
+ Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle,
+ std::size_t offset) const;
+
+ u32 AccessConstBuffer32(u64 const_buffer, u64 offset) const;
+
private:
Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
@@ -201,6 +210,12 @@ private:
Upload::State upload_state;
void ProcessLaunch();
+
+ /// Retrieves information about a specific TIC entry from the TIC buffer.
+ Texture::TICEntry GetTICEntry(u32 tic_index) const;
+
+ /// Retrieves information about a specific TSC entry from the TSC buffer.
+ Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
};
#define ASSERT_REG_POSITION(field_name, position) \
@@ -218,12 +233,12 @@ ASSERT_REG_POSITION(launch, 0xAF);
ASSERT_REG_POSITION(tsc, 0x557);
ASSERT_REG_POSITION(tic, 0x55D);
ASSERT_REG_POSITION(code_loc, 0x582);
-ASSERT_REG_POSITION(texture_const_buffer_index, 0x982);
+ASSERT_REG_POSITION(tex_cb_index, 0x982);
ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8);
ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC);
ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11);
ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12);
-ASSERT_LAUNCH_PARAM_POSITION(memory_config, 0x14);
+ASSERT_LAUNCH_PARAM_POSITION(const_buffer_enable_mask, 0x14);
ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D);
#undef ASSERT_REG_POSITION
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index f5158d219..c7a3c85a0 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -89,6 +89,9 @@ void Maxwell3D::InitializeRegisterDefaults() {
// Commercial games seem to assume this value is enabled and nouveau sets this value manually.
regs.rt_separate_frag_data = 1;
+
+ // Some games (like Super Mario Odyssey) assume that SRGB is enabled.
+ regs.framebuffer_srgb = 1;
}
#define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name))
@@ -244,7 +247,7 @@ void Maxwell3D::InitDirtySettings() {
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg;
}
-void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
+void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters) {
// Reset the current macro.
executing_macro = 0;
@@ -252,7 +255,7 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
const u32 entry = ((method - MacroRegistersStart) >> 1) % macro_positions.size();
// Execute the current macro.
- macro_interpreter.Execute(macro_positions[entry], std::move(parameters));
+ macro_interpreter.Execute(macro_positions[entry], num_parameters, parameters);
}
void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
@@ -289,7 +292,8 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
// Call the macro when there are no more parameters in the command buffer
if (method_call.IsLastCall()) {
- CallMacroMethod(executing_macro, std::move(macro_params));
+ CallMacroMethod(executing_macro, macro_params.size(), macro_params.data());
+ macro_params.clear();
}
return;
}
@@ -328,6 +332,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
ProcessMacroBind(method_call.argument);
break;
}
+ case MAXWELL3D_REG_INDEX(firmware[4]): {
+ ProcessFirmwareCall4();
+ break;
+ }
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
@@ -418,6 +426,14 @@ void Maxwell3D::ProcessMacroBind(u32 data) {
macro_positions[regs.macros.entry++] = data;
}
+void Maxwell3D::ProcessFirmwareCall4() {
+ LOG_WARNING(HW_GPU, "(STUBBED) called");
+
+ // Firmware call 4 is a blob that changes some registers depending on its parameters.
+ // These registers don't affect emulation and so are stubbed by setting 0xd00 to 1.
+ regs.reg_array[0xd00] = 1;
+}
+
void Maxwell3D::ProcessQueryGet() {
const GPUVAddr sequence_address{regs.query.QueryAddress()};
// Since the sequence address is given as a GPU VAddr, we have to convert it to an application
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 0184342a0..e5ec90717 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -62,6 +62,7 @@ public:
static constexpr std::size_t NumVertexAttributes = 32;
static constexpr std::size_t NumVaryings = 31;
static constexpr std::size_t NumTextureSamplers = 32;
+ static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number
static constexpr std::size_t NumClipDistances = 8;
static constexpr std::size_t MaxShaderProgram = 6;
static constexpr std::size_t MaxShaderStage = 5;
@@ -1088,7 +1089,9 @@ public:
INSERT_PADDING_WORDS(14);
} shader_config[MaxShaderProgram];
- INSERT_PADDING_WORDS(0x80);
+ INSERT_PADDING_WORDS(0x60);
+
+ u32 firmware[0x20];
struct {
u32 cb_size;
@@ -1307,9 +1310,10 @@ private:
/**
* Call a macro on this engine.
* @param method Method to call
+ * @param num_parameters Number of arguments
* @param parameters Arguments to the method call
*/
- void CallMacroMethod(u32 method, std::vector<u32> parameters);
+ void CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters);
/// Handles writes to the macro uploading register.
void ProcessMacroUpload(u32 data);
@@ -1317,6 +1321,9 @@ private:
/// Handles writes to the macro bind register.
void ProcessMacroBind(u32 data);
+ /// Handles firmware blob 4
+ void ProcessFirmwareCall4();
+
/// Handles a write to the CLEAR_BUFFERS register.
void ProcessClearBuffers();
@@ -1429,6 +1436,7 @@ ASSERT_REG_POSITION(vertex_array[0], 0x700);
ASSERT_REG_POSITION(independent_blend, 0x780);
ASSERT_REG_POSITION(vertex_array_limit[0], 0x7C0);
ASSERT_REG_POSITION(shader_config[0], 0x800);
+ASSERT_REG_POSITION(firmware, 0x8C0);
ASSERT_REG_POSITION(const_buffer, 0x8E0);
ASSERT_REG_POSITION(cb_bind[0], 0x904);
ASSERT_REG_POSITION(tex_cb_index, 0x982);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index c3678b9ea..a6110bd86 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -544,6 +544,35 @@ enum class VoteOperation : u64 {
Eq = 2, // allThreadsEqualNV
};
+enum class ImageAtomicSize : u64 {
+ U32 = 0,
+ S32 = 1,
+ U64 = 2,
+ F32 = 3,
+ S64 = 5,
+ SD32 = 6,
+ SD64 = 7,
+};
+
+enum class ImageAtomicOperation : u64 {
+ Add = 0,
+ Min = 1,
+ Max = 2,
+ Inc = 3,
+ Dec = 4,
+ And = 5,
+ Or = 6,
+ Xor = 7,
+ Exch = 8,
+};
+
+enum class ShuffleOperation : u64 {
+ Idx = 0, // shuffleNV
+ Up = 1, // shuffleUpNV
+ Down = 2, // shuffleDownNV
+ Bfly = 3, // shuffleXorNV
+};
+
union Instruction {
Instruction& operator=(const Instruction& instr) {
value = instr.value;
@@ -578,6 +607,15 @@ union Instruction {
} vote;
union {
+ BitField<30, 2, ShuffleOperation> operation;
+ BitField<48, 3, u64> pred48;
+ BitField<28, 1, u64> is_index_imm;
+ BitField<29, 1, u64> is_mask_imm;
+ BitField<20, 5, u64> index_imm;
+ BitField<34, 13, u64> mask_imm;
+ } shfl;
+
+ union {
BitField<8, 8, Register> gpr;
BitField<20, 24, s64> offset;
} gmem;
@@ -675,6 +713,10 @@ union Instruction {
} shift;
union {
+ BitField<39, 1, u64> wrap;
+ } shr;
+
+ union {
BitField<39, 5, u64> shift_amount;
BitField<48, 1, u64> negate_b;
BitField<49, 1, u64> negate_a;
@@ -1388,6 +1430,14 @@ union Instruction {
} sust;
union {
+ BitField<28, 1, u64> is_ba;
+ BitField<51, 3, ImageAtomicSize> size;
+ BitField<33, 3, ImageType> image_type;
+ BitField<29, 4, ImageAtomicOperation> operation;
+ BitField<49, 2, OutOfBoundsStore> out_of_bounds_store;
+ } suatom_d;
+
+ union {
BitField<20, 24, u64> target;
BitField<5, 1, u64> constant_buffer;
@@ -1508,6 +1558,7 @@ public:
BRK,
DEPBAR,
VOTE,
+ SHFL,
BFE_C,
BFE_R,
BFE_IMM,
@@ -1539,6 +1590,7 @@ public:
TMML_B, // Texture Mip Map Level
TMML, // Texture Mip Map Level
SUST, // Surface Store
+ SUATOM, // Surface Atomic Operation
EXIT,
NOP,
IPA,
@@ -1798,6 +1850,7 @@ private:
INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
+ INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"),
INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
@@ -1822,6 +1875,7 @@ private:
INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
INST("11101011001-----", Id::SUST, Type::Image, "SUST"),
+ INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"),
INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"),
INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index 9f59a2dc1..62afc0d11 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -14,11 +14,18 @@ namespace Tegra {
MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
-void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) {
+void MacroInterpreter::Execute(u32 offset, std::size_t num_parameters, const u32* parameters) {
MICROPROFILE_SCOPE(MacroInterp);
Reset();
+
registers[1] = parameters[0];
- this->parameters = std::move(parameters);
+
+ if (num_parameters > parameters_capacity) {
+ parameters_capacity = num_parameters;
+ this->parameters = std::make_unique<u32[]>(num_parameters);
+ }
+ std::memcpy(this->parameters.get(), parameters, num_parameters * sizeof(u32));
+ this->num_parameters = num_parameters;
// Execute the code until we hit an exit condition.
bool keep_executing = true;
@@ -27,7 +34,7 @@ void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) {
}
// Assert the the macro used all the input parameters
- ASSERT(next_parameter_index == this->parameters.size());
+ ASSERT(next_parameter_index == num_parameters);
}
void MacroInterpreter::Reset() {
@@ -35,7 +42,7 @@ void MacroInterpreter::Reset() {
pc = 0;
delayed_pc = {};
method_address.raw = 0;
- parameters.clear();
+ num_parameters = 0;
// The next parameter index starts at 1, because $r1 already has the value of the first
// parameter.
next_parameter_index = 1;
@@ -124,9 +131,7 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
// An instruction with the Exit flag will not actually
// cause an exit if it's executed inside a delay slot.
- // TODO(Blinkhawk): Reversed to always exit. The behavior explained above requires further
- // testing on the MME code.
- if (opcode.is_exit) {
+ if (opcode.is_exit && !is_delay_slot) {
// Exit has a delay slot, execute the next instruction
Step(offset, true);
return false;
@@ -229,7 +234,8 @@ void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 res
}
u32 MacroInterpreter::FetchParameter() {
- return parameters.at(next_parameter_index++);
+ ASSERT(next_parameter_index < num_parameters);
+ return parameters[next_parameter_index++];
}
u32 MacroInterpreter::GetRegister(u32 register_id) const {
diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h
index cde360288..76b6a895b 100644
--- a/src/video_core/macro_interpreter.h
+++ b/src/video_core/macro_interpreter.h
@@ -25,7 +25,7 @@ public:
* @param offset Offset to start execution at.
* @param parameters The parameters of the macro.
*/
- void Execute(u32 offset, std::vector<u32> parameters);
+ void Execute(u32 offset, std::size_t num_parameters, const u32* parameters);
private:
enum class Operation : u32 {
@@ -162,10 +162,12 @@ private:
MethodAddress method_address = {};
/// Input parameters of the current macro.
- std::vector<u32> parameters;
+ std::unique_ptr<u32[]> parameters;
+ std::size_t num_parameters = 0;
+ std::size_t parameters_capacity = 0;
/// Index of the next parameter that will be fetched by the 'parm' instruction.
u32 next_parameter_index = 0;
- bool carry_flag{};
+ bool carry_flag = false;
};
} // namespace Tegra
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 01d89f47d..4dd08bccb 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -331,7 +331,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
SetupDrawConstBuffers(stage_enum, shader);
SetupDrawGlobalMemory(stage_enum, shader);
- const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)};
+ const auto texture_buffer_usage{SetupDrawTextures(stage_enum, shader, base_bindings)};
const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage};
const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant);
@@ -489,9 +489,6 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
// Assume that a surface will be written to if it is used as a framebuffer, even if
// the shader doesn't actually write to it.
texture_cache.MarkColorBufferInUse(*single_color_target);
- // Workaround for and issue in nvidia drivers
- // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/
- state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion;
}
fbkey.is_single_buffer = true;
@@ -512,11 +509,6 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
// Assume that a surface will be written to if it is used as a framebuffer, even
// if the shader doesn't actually write to it.
texture_cache.MarkColorBufferInUse(index);
- // Enable sRGB only for supported formats
- // Workaround for and issue in nvidia drivers
- // https://devtalk.nvidia.com/default/topic/776591/opengl/gl_framebuffer_srgb-functions-incorrectly/
- state.framebuffer_srgb.enabled |=
- color_surface->GetSurfaceParams().srgb_conversion;
}
fbkey.color_attachments[index] =
@@ -801,7 +793,11 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
}
auto kernel = shader_cache.GetComputeKernel(code_addr);
- const auto [program, next_bindings] = kernel->GetProgramHandle({});
+ ProgramVariant variant;
+ variant.texture_buffer_usage = SetupComputeTextures(kernel);
+ SetupComputeImages(kernel);
+
+ const auto [program, next_bindings] = kernel->GetProgramHandle(variant);
state.draw.shader_program = program;
state.draw.program_pipeline = 0;
@@ -816,13 +812,13 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
SetupComputeConstBuffers(kernel);
SetupComputeGlobalMemory(kernel);
- // TODO(Rodrigo): Bind images and samplers
-
buffer_cache.Unmap();
bind_ubo_pushbuffer.Bind();
bind_ssbo_pushbuffer.Bind();
+ state.ApplyTextures();
+ state.ApplyImages();
state.ApplyShaderProgram();
state.ApplyProgramPipeline();
@@ -902,6 +898,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
}
screen_info.display_texture = surface->GetTexture();
+ screen_info.display_srgb = surface->GetSurfaceParams().srgb_conversion;
return true;
}
@@ -922,7 +919,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
for (const auto& entry : kernel->GetShaderEntries().const_buffers) {
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
- const std::bitset<8> mask = launch_desc.memory_config.const_buffer_enable_mask.Value();
+ const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
Tegra::Engines::ConstBufferInfo buffer;
buffer.address = config.Address();
buffer.size = config.size;
@@ -981,53 +978,125 @@ void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entr
bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size));
}
-TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
- BaseBindings base_bindings) {
+TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stage,
+ const Shader& shader,
+ BaseBindings base_bindings) {
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& gpu = system.GPU();
const auto& maxwell3d = gpu.Maxwell3D();
const auto& entries = shader->GetShaderEntries().samplers;
- ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units),
+ ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.textures),
"Exceeded the number of active textures.");
TextureBufferUsage texture_buffer_usage{0};
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry = entries[bindpoint];
- Tegra::Texture::FullTextureInfo texture;
- if (entry.IsBindless()) {
+ const auto texture = [&]() {
+ if (!entry.IsBindless()) {
+ return maxwell3d.GetStageTexture(stage, entry.GetOffset());
+ }
const auto cbuf = entry.GetBindlessCBuf();
Tegra::Texture::TextureHandle tex_handle;
tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second);
- texture = maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset());
- } else {
- texture = maxwell3d.GetStageTexture(stage, entry.GetOffset());
+ return maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset());
+ }();
+
+ if (SetupTexture(base_bindings.sampler + bindpoint, texture, entry)) {
+ texture_buffer_usage.set(bindpoint);
}
- const u32 current_bindpoint = base_bindings.sampler + bindpoint;
+ }
- auto& unit{state.texture_units[current_bindpoint]};
- unit.sampler = sampler_cache.GetSampler(texture.tsc);
+ return texture_buffer_usage;
+}
- if (const auto view{texture_cache.GetTextureSurface(texture, entry)}; view) {
- if (view->GetSurfaceParams().IsBuffer()) {
- // Record that this texture is a texture buffer.
- texture_buffer_usage.set(bindpoint);
- } else {
- // Apply swizzle to textures that are not buffers.
- view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
- texture.tic.w_source);
+TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
+ MICROPROFILE_SCOPE(OpenGL_Texture);
+ const auto& compute = system.GPU().KeplerCompute();
+ const auto& entries = kernel->GetShaderEntries().samplers;
+
+ ASSERT_MSG(entries.size() <= std::size(state.textures),
+ "Exceeded the number of active textures.");
+
+ TextureBufferUsage texture_buffer_usage{0};
+
+ for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
+ const auto& entry = entries[bindpoint];
+ const auto texture = [&]() {
+ if (!entry.IsBindless()) {
+ return compute.GetTexture(entry.GetOffset());
}
- state.texture_units[current_bindpoint].texture = view->GetTexture();
- } else {
- // Can occur when texture addr is null or its memory is unmapped/invalid
- unit.texture = 0;
+ const auto cbuf = entry.GetBindlessCBuf();
+ Tegra::Texture::TextureHandle tex_handle;
+ tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second);
+ return compute.GetTextureInfo(tex_handle, entry.GetOffset());
+ }();
+
+ if (SetupTexture(bindpoint, texture, entry)) {
+ texture_buffer_usage.set(bindpoint);
}
}
return texture_buffer_usage;
}
+bool RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
+ const GLShader::SamplerEntry& entry) {
+ state.samplers[binding] = sampler_cache.GetSampler(texture.tsc);
+
+ const auto view = texture_cache.GetTextureSurface(texture.tic, entry);
+ if (!view) {
+ // Can occur when texture addr is null or its memory is unmapped/invalid
+ state.textures[binding] = 0;
+ return false;
+ }
+ state.textures[binding] = view->GetTexture();
+
+ if (view->GetSurfaceParams().IsBuffer()) {
+ return true;
+ }
+
+ // Apply swizzle to textures that are not buffers.
+ view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
+ texture.tic.w_source);
+ return false;
+}
+
+void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
+ const auto& compute = system.GPU().KeplerCompute();
+ const auto& entries = shader->GetShaderEntries().images;
+ for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
+ const auto& entry = entries[bindpoint];
+ const auto tic = [&]() {
+ if (!entry.IsBindless()) {
+ return compute.GetTexture(entry.GetOffset()).tic;
+ }
+ const auto cbuf = entry.GetBindlessCBuf();
+ Tegra::Texture::TextureHandle tex_handle;
+ tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second);
+ return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic;
+ }();
+ SetupImage(bindpoint, tic, entry);
+ }
+}
+
+void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
+ const GLShader::ImageEntry& entry) {
+ const auto view = texture_cache.GetImageSurface(tic, entry);
+ if (!view) {
+ state.images[binding] = 0;
+ return;
+ }
+ if (!tic.IsBuffer()) {
+ view->ApplySwizzle(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
+ }
+ if (entry.IsWritten()) {
+ view->MarkAsModified(texture_cache.Tick());
+ }
+ state.images[binding] = view->GetTexture();
+}
+
void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
const auto& regs = system.GPU().Maxwell3D().regs;
const bool geometry_shaders_enabled =
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 9d20a4fbf..eada752e0 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -32,6 +32,7 @@
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/utils.h"
+#include "video_core/textures/texture.h"
namespace Core {
class System;
@@ -137,8 +138,22 @@ private:
/// Configures the current textures to use for the draw command. Returns shaders texture buffer
/// usage.
- TextureBufferUsage SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
- const Shader& shader, BaseBindings base_bindings);
+ TextureBufferUsage SetupDrawTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
+ const Shader& shader, BaseBindings base_bindings);
+
+ /// Configures the textures used in a compute shader. Returns texture buffer usage.
+ TextureBufferUsage SetupComputeTextures(const Shader& kernel);
+
+ /// Configures a texture. Returns true when the texture is a texture buffer.
+ bool SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
+ const GLShader::SamplerEntry& entry);
+
+ /// Configures images in a compute shader.
+ void SetupComputeImages(const Shader& shader);
+
+ /// Configures an image.
+ void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
+ const GLShader::ImageEntry& entry);
/// Syncs the viewport and depth range to match the guest state
void SyncViewport(OpenGLState& current_state);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 909ccb82c..0dbc4c02f 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -214,7 +214,8 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
std::string source = "#version 430 core\n"
"#extension GL_ARB_separate_shader_objects : enable\n"
"#extension GL_NV_gpu_shader5 : enable\n"
- "#extension GL_NV_shader_thread_group : enable\n";
+ "#extension GL_NV_shader_thread_group : enable\n"
+ "#extension GL_NV_shader_thread_shuffle : enable\n";
if (entries.shader_viewport_layer_array) {
source += "#extension GL_ARB_shader_viewport_layer_array : enable\n";
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 4a8c7edc9..76439e7ab 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -325,6 +325,7 @@ public:
DeclareRegisters();
DeclarePredicates();
DeclareLocalMemory();
+ DeclareSharedMemory();
DeclareInternalFlags();
DeclareInputAttributes();
DeclareOutputAttributes();
@@ -389,11 +390,10 @@ public:
for (const auto& sampler : ir.GetSamplers()) {
entries.samplers.emplace_back(sampler);
}
- for (const auto& image : ir.GetImages()) {
+ for (const auto& [offset, image] : ir.GetImages()) {
entries.images.emplace_back(image);
}
- for (const auto& gmem_pair : ir.GetGlobalMemory()) {
- const auto& [base, usage] = gmem_pair;
+ for (const auto& [base, usage] : ir.GetGlobalMemory()) {
entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset,
usage.is_read, usage.is_written);
}
@@ -500,6 +500,13 @@ private:
code.AddNewLine();
}
+ void DeclareSharedMemory() {
+ if (stage != ProgramType::Compute) {
+ return;
+ }
+ code.AddLine("shared uint {}[];", GetSharedMemory());
+ }
+
void DeclareInternalFlags() {
for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) {
const auto flag_code = static_cast<InternalFlag>(flag);
@@ -706,8 +713,8 @@ private:
void DeclareImages() {
const auto& images{ir.GetImages()};
- for (const auto& image : images) {
- const std::string image_type = [&]() {
+ for (const auto& [offset, image] : images) {
+ const char* image_type = [&] {
switch (image.GetType()) {
case Tegra::Shader::ImageType::Texture1D:
return "image1D";
@@ -726,9 +733,33 @@ private:
return "image1D";
}
}();
- code.AddLine("layout (binding = IMAGE_BINDING_{}) coherent volatile writeonly uniform "
+
+ const auto [type_prefix, format] = [&]() -> std::pair<const char*, const char*> {
+ if (!image.IsSizeKnown()) {
+ return {"", ""};
+ }
+ switch (image.GetSize()) {
+ case Tegra::Shader::ImageAtomicSize::U32:
+ return {"u", "r32ui, "};
+ case Tegra::Shader::ImageAtomicSize::S32:
+ return {"i", "r32i, "};
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented atomic size={}",
+ static_cast<u32>(image.GetSize()));
+ return {"", ""};
+ }
+ }();
+
+ std::string qualifier = "coherent volatile";
+ if (image.IsRead() && !image.IsWritten()) {
+ qualifier += " readonly";
+ } else if (image.IsWritten() && !image.IsRead()) {
+ qualifier += " writeonly";
+ }
+
+ code.AddLine("layout (binding = IMAGE_BINDING_{}) {} uniform "
"{} {};",
- image.GetIndex(), image_type, GetImage(image));
+ image.GetIndex(), qualifier, image_type, GetImage(image));
}
if (!images.empty()) {
code.AddNewLine();
@@ -858,6 +889,12 @@ private:
Type::Uint};
}
+ if (const auto smem = std::get_if<SmemNode>(&*node)) {
+ return {
+ fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()),
+ Type::Uint};
+ }
+
if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool};
}
@@ -1174,6 +1211,74 @@ private:
return expr;
}
+ std::string BuildIntegerCoordinates(Operation operation) {
+ constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
+ const std::size_t coords_count{operation.GetOperandsCount()};
+ std::string expr = constructors.at(coords_count - 1);
+ for (std::size_t i = 0; i < coords_count; ++i) {
+ expr += VisitOperand(operation, i).AsInt();
+ if (i + 1 < coords_count) {
+ expr += ", ";
+ }
+ }
+ expr += ')';
+ return expr;
+ }
+
+ std::string BuildImageValues(Operation operation) {
+ const auto meta{std::get<MetaImage>(operation.GetMeta())};
+ const auto [constructors, type] = [&]() -> std::pair<std::array<const char*, 4>, Type> {
+ constexpr std::array float_constructors{"float", "vec2", "vec3", "vec4"};
+ if (!meta.image.IsSizeKnown()) {
+ return {float_constructors, Type::Float};
+ }
+ switch (meta.image.GetSize()) {
+ case Tegra::Shader::ImageAtomicSize::U32:
+ return {{"uint", "uvec2", "uvec3", "uvec4"}, Type::Uint};
+ case Tegra::Shader::ImageAtomicSize::S32:
+ return {{"int", "ivec2", "ivec3", "ivec4"}, Type::Uint};
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented image size={}",
+ static_cast<u32>(meta.image.GetSize()));
+ return {float_constructors, Type::Float};
+ }
+ }();
+
+ const std::size_t values_count{meta.values.size()};
+ std::string expr = fmt::format("{}(", constructors.at(values_count - 1));
+ for (std::size_t i = 0; i < values_count; ++i) {
+ expr += Visit(meta.values.at(i)).As(type);
+ if (i + 1 < values_count) {
+ expr += ", ";
+ }
+ }
+ expr += ')';
+ return expr;
+ }
+
+ Expression AtomicImage(Operation operation, const char* opname) {
+ constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
+ const auto meta{std::get<MetaImage>(operation.GetMeta())};
+ ASSERT(meta.values.size() == 1);
+ ASSERT(meta.image.IsSizeKnown());
+
+ const auto type = [&]() {
+ switch (const auto size = meta.image.GetSize()) {
+ case Tegra::Shader::ImageAtomicSize::U32:
+ return Type::Uint;
+ case Tegra::Shader::ImageAtomicSize::S32:
+ return Type::Int;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented image size={}", static_cast<u32>(size));
+ return Type::Uint;
+ }
+ }();
+
+ return {fmt::format("{}({}, {}, {})", opname, GetImage(meta.image),
+ BuildIntegerCoordinates(operation), Visit(meta.values[0]).As(type)),
+ type};
+ }
+
Expression Assign(Operation operation) {
const Node& dest = operation[0];
const Node& src = operation[1];
@@ -1199,6 +1304,11 @@ private:
target = {
fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
Type::Uint};
+ } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
+ ASSERT(stage == ProgramType::Compute);
+ target = {
+ fmt::format("{}[{} >> 2]", GetSharedMemory(), Visit(smem->GetAddress()).AsUint()),
+ Type::Uint};
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
const std::string real = Visit(gmem->GetRealAddress()).AsUint();
const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
@@ -1692,36 +1802,37 @@ private:
}
Expression ImageStore(Operation operation) {
- constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
const auto meta{std::get<MetaImage>(operation.GetMeta())};
+ code.AddLine("imageStore({}, {}, {});", GetImage(meta.image),
+ BuildIntegerCoordinates(operation), BuildImageValues(operation));
+ return {};
+ }
- std::string expr = "imageStore(";
- expr += GetImage(meta.image);
- expr += ", ";
+ Expression AtomicImageAdd(Operation operation) {
+ return AtomicImage(operation, "imageAtomicAdd");
+ }
- const std::size_t coords_count{operation.GetOperandsCount()};
- expr += constructors.at(coords_count - 1);
- for (std::size_t i = 0; i < coords_count; ++i) {
- expr += VisitOperand(operation, i).AsInt();
- if (i + 1 < coords_count) {
- expr += ", ";
- }
- }
- expr += "), ";
+ Expression AtomicImageMin(Operation operation) {
+ return AtomicImage(operation, "imageAtomicMin");
+ }
- const std::size_t values_count{meta.values.size()};
- UNIMPLEMENTED_IF(values_count != 4);
- expr += "vec4(";
- for (std::size_t i = 0; i < values_count; ++i) {
- expr += Visit(meta.values.at(i)).AsFloat();
- if (i + 1 < values_count) {
- expr += ", ";
- }
- }
- expr += "));";
+ Expression AtomicImageMax(Operation operation) {
+ return AtomicImage(operation, "imageAtomicMax");
+ }
+ Expression AtomicImageAnd(Operation operation) {
+ return AtomicImage(operation, "imageAtomicAnd");
+ }
- code.AddLine(expr);
- return {};
+ Expression AtomicImageOr(Operation operation) {
+ return AtomicImage(operation, "imageAtomicOr");
+ }
+
+ Expression AtomicImageXor(Operation operation) {
+ return AtomicImage(operation, "imageAtomicXor");
+ }
+
+ Expression AtomicImageExchange(Operation operation) {
+ return AtomicImage(operation, "imageAtomicExchange");
}
Expression Branch(Operation operation) {
@@ -1846,8 +1957,7 @@ private:
Expression BallotThread(Operation operation) {
const std::string value = VisitOperand(operation, 0).AsBool();
if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL,
- "Nvidia warp intrinsics are not available and its required by a shader");
+ LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
// Stub on non-Nvidia devices by simulating all threads voting the same as the active
// one.
return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint};
@@ -1858,8 +1968,7 @@ private:
Expression Vote(Operation operation, const char* func) {
const std::string value = VisitOperand(operation, 0).AsBool();
if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL,
- "Nvidia vote intrinsics are not available and its required by a shader");
+ LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
// Stub with a warp size of one.
return {value, Type::Bool};
}
@@ -1876,15 +1985,54 @@ private:
Expression VoteEqual(Operation operation) {
if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL,
- "Nvidia vote intrinsics are not available and its required by a shader");
- // We must return true here since a stub for a theoretical warp size of 1 will always
- // return an equal result for all its votes.
+ LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
+ // We must return true here since a stub for a theoretical warp size of 1.
+ // This will always return an equal result across all votes.
return {"true", Type::Bool};
}
return Vote(operation, "allThreadsEqualNV");
}
+ template <const std::string_view& func>
+ Expression Shuffle(Operation operation) {
+ const std::string value = VisitOperand(operation, 0).AsFloat();
+ if (!device.HasWarpIntrinsics()) {
+ LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader");
+ // On a "single-thread" device we are either on the same thread or out of bounds. Both
+ // cases return the passed value.
+ return {value, Type::Float};
+ }
+
+ const std::string index = VisitOperand(operation, 1).AsUint();
+ const std::string width = VisitOperand(operation, 2).AsUint();
+ return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float};
+ }
+
+ template <const std::string_view& func>
+ Expression InRangeShuffle(Operation operation) {
+ const std::string index = VisitOperand(operation, 0).AsUint();
+ const std::string width = VisitOperand(operation, 1).AsUint();
+ if (!device.HasWarpIntrinsics()) {
+ // On a "single-thread" device we are only in bounds when the requested index is 0.
+ return {fmt::format("({} == 0U)", index), Type::Bool};
+ }
+
+ const std::string in_range = code.GenerateTemporary();
+ code.AddLine("bool {};", in_range);
+ code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range);
+ return {in_range, Type::Bool};
+ }
+
+ struct Func final {
+ Func() = delete;
+ ~Func() = delete;
+
+ static constexpr std::string_view ShuffleIndexed = "shuffleNV";
+ static constexpr std::string_view ShuffleUp = "shuffleUpNV";
+ static constexpr std::string_view ShuffleDown = "shuffleDownNV";
+ static constexpr std::string_view ShuffleButterfly = "shuffleXorNV";
+ };
+
static constexpr std::array operation_decompilers = {
&GLSLDecompiler::Assign,
@@ -2017,6 +2165,13 @@ private:
&GLSLDecompiler::TexelFetch,
&GLSLDecompiler::ImageStore,
+ &GLSLDecompiler::AtomicImageAdd,
+ &GLSLDecompiler::AtomicImageMin,
+ &GLSLDecompiler::AtomicImageMax,
+ &GLSLDecompiler::AtomicImageAnd,
+ &GLSLDecompiler::AtomicImageOr,
+ &GLSLDecompiler::AtomicImageXor,
+ &GLSLDecompiler::AtomicImageExchange,
&GLSLDecompiler::Branch,
&GLSLDecompiler::BranchIndirect,
@@ -2040,6 +2195,16 @@ private:
&GLSLDecompiler::VoteAll,
&GLSLDecompiler::VoteAny,
&GLSLDecompiler::VoteEqual,
+
+ &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>,
+ &GLSLDecompiler::Shuffle<Func::ShuffleUp>,
+ &GLSLDecompiler::Shuffle<Func::ShuffleDown>,
+ &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>,
+
+ &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>,
+ &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>,
+ &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>,
+ &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
@@ -2080,6 +2245,10 @@ private:
return "lmem_" + suffix;
}
+ std::string GetSharedMemory() const {
+ return fmt::format("smem_{}", suffix);
+ }
+
std::string GetInternalFlag(InternalFlag flag) const {
constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag",
"overflow_flag"};
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 969fe9ced..f141c4e3b 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -341,13 +341,22 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
u64 index{};
u32 type{};
u8 is_bindless{};
+ u8 is_written{};
+ u8 is_read{};
+ u8 is_size_known{};
+ u32 size{};
if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
- !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless)) {
+ !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) ||
+ !LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) ||
+ !LoadObjectFromPrecompiled(is_size_known) || !LoadObjectFromPrecompiled(size)) {
return {};
}
entry.entries.images.emplace_back(
static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
- static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0);
+ static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0, is_written != 0,
+ is_read != 0,
+ is_size_known ? std::make_optional(static_cast<Tegra::Shader::ImageAtomicSize>(size))
+ : std::nullopt);
}
u32 global_memory_count{};
@@ -426,10 +435,14 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
return false;
}
for (const auto& image : entries.images) {
+ const u32 size = image.IsSizeKnown() ? static_cast<u32>(image.GetSize()) : 0U;
if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) ||
!SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) ||
!SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) ||
- !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0))) {
+ !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) ||
+ !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0)) ||
+ !SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) ||
+ !SaveObjectToPrecompiled(image.IsSizeKnown()) || !SaveObjectToPrecompiled(size)) {
return false;
}
}
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index f4777d0b0..bf86b5a0b 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -16,7 +16,6 @@ namespace OpenGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
OpenGLState OpenGLState::cur_state;
-bool OpenGLState::s_rgb_used;
namespace {
@@ -34,6 +33,25 @@ bool UpdateTie(T1 current_value, const T2 new_value) {
return changed;
}
+template <typename T>
+std::optional<std::pair<GLuint, GLsizei>> UpdateArray(T& current_values, const T& new_values) {
+ std::optional<std::size_t> first;
+ std::size_t last;
+ for (std::size_t i = 0; i < std::size(current_values); ++i) {
+ if (!UpdateValue(current_values[i], new_values[i])) {
+ continue;
+ }
+ if (!first) {
+ first = i;
+ }
+ last = i;
+ }
+ if (!first) {
+ return std::nullopt;
+ }
+ return std::make_pair(static_cast<GLuint>(*first), static_cast<GLsizei>(last - *first + 1));
+}
+
void Enable(GLenum cap, bool enable) {
if (enable) {
glEnable(cap);
@@ -134,10 +152,6 @@ OpenGLState::OpenGLState() {
logic_op.enabled = false;
logic_op.operation = GL_COPY;
- for (auto& texture_unit : texture_units) {
- texture_unit.Reset();
- }
-
draw.read_framebuffer = 0;
draw.draw_framebuffer = 0;
draw.vertex_array = 0;
@@ -267,8 +281,6 @@ void OpenGLState::ApplySRgb() const {
return;
cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled;
if (framebuffer_srgb.enabled) {
- // Track if sRGB is used
- s_rgb_used = true;
glEnable(GL_FRAMEBUFFER_SRGB);
} else {
glDisable(GL_FRAMEBUFFER_SRGB);
@@ -496,52 +508,20 @@ void OpenGLState::ApplyAlphaTest() const {
}
void OpenGLState::ApplyTextures() const {
- bool has_delta{};
- std::size_t first{};
- std::size_t last{};
- std::array<GLuint, Maxwell::NumTextureSamplers> textures;
-
- for (std::size_t i = 0; i < std::size(texture_units); ++i) {
- const auto& texture_unit = texture_units[i];
- auto& cur_state_texture_unit = cur_state.texture_units[i];
- textures[i] = texture_unit.texture;
- if (cur_state_texture_unit.texture == textures[i]) {
- continue;
- }
- cur_state_texture_unit.texture = textures[i];
- if (!has_delta) {
- first = i;
- has_delta = true;
- }
- last = i;
- }
- if (has_delta) {
- glBindTextures(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
- textures.data() + first);
+ if (const auto update = UpdateArray(cur_state.textures, textures)) {
+ glBindTextures(update->first, update->second, textures.data() + update->first);
}
}
void OpenGLState::ApplySamplers() const {
- bool has_delta{};
- std::size_t first{};
- std::size_t last{};
- std::array<GLuint, Maxwell::NumTextureSamplers> samplers;
-
- for (std::size_t i = 0; i < std::size(samplers); ++i) {
- samplers[i] = texture_units[i].sampler;
- if (cur_state.texture_units[i].sampler == texture_units[i].sampler) {
- continue;
- }
- cur_state.texture_units[i].sampler = texture_units[i].sampler;
- if (!has_delta) {
- first = i;
- has_delta = true;
- }
- last = i;
+ if (const auto update = UpdateArray(cur_state.samplers, samplers)) {
+ glBindSamplers(update->first, update->second, samplers.data() + update->first);
}
- if (has_delta) {
- glBindSamplers(static_cast<GLuint>(first), static_cast<GLsizei>(last - first + 1),
- samplers.data() + first);
+}
+
+void OpenGLState::ApplyImages() const {
+ if (const auto update = UpdateArray(cur_state.images, images)) {
+ glBindImageTextures(update->first, update->second, images.data() + update->first);
}
}
@@ -576,6 +556,7 @@ void OpenGLState::Apply() {
ApplyLogicOp();
ApplyTextures();
ApplySamplers();
+ ApplyImages();
if (dirty.polygon_offset) {
ApplyPolygonOffset();
dirty.polygon_offset = false;
@@ -606,18 +587,18 @@ void OpenGLState::EmulateViewportWithScissor() {
}
OpenGLState& OpenGLState::UnbindTexture(GLuint handle) {
- for (auto& unit : texture_units) {
- if (unit.texture == handle) {
- unit.Unbind();
+ for (auto& texture : textures) {
+ if (texture == handle) {
+ texture = 0;
}
}
return *this;
}
OpenGLState& OpenGLState::ResetSampler(GLuint handle) {
- for (auto& unit : texture_units) {
- if (unit.sampler == handle) {
- unit.sampler = 0;
+ for (auto& sampler : samplers) {
+ if (sampler == handle) {
+ sampler = 0;
}
}
return *this;
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index fdf9a8a12..c358d3b38 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -118,21 +118,9 @@ public:
GLenum operation;
} logic_op;
- // 3 texture units - one for each that is used in PICA fragment shader emulation
- struct TextureUnit {
- GLuint texture; // GL_TEXTURE_BINDING_2D
- GLuint sampler; // GL_SAMPLER_BINDING
-
- void Unbind() {
- texture = 0;
- }
-
- void Reset() {
- Unbind();
- sampler = 0;
- }
- };
- std::array<TextureUnit, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_units;
+ std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures{};
+ std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers{};
+ std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumImages> images{};
struct {
GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
@@ -187,14 +175,6 @@ public:
return cur_state;
}
- static bool GetsRGBUsed() {
- return s_rgb_used;
- }
-
- static void ClearsRGBUsed() {
- s_rgb_used = false;
- }
-
void SetDefaultViewports();
/// Apply this state as the current OpenGL state
void Apply();
@@ -220,6 +200,7 @@ public:
void ApplyLogicOp() const;
void ApplyTextures() const;
void ApplySamplers() const;
+ void ApplyImages() const;
void ApplyDepthClamp() const;
void ApplyPolygonOffset() const;
void ApplyAlphaTest() const;
@@ -264,8 +245,6 @@ public:
private:
static OpenGLState cur_state;
- // Workaround for sRGB problems caused by QT not supporting srgb output
- static bool s_rgb_used;
struct {
bool blend_state;
bool stencil_state;
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 21324488a..8e13ab38b 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -78,6 +78,17 @@ public:
/// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER
void Attach(GLenum attachment, GLenum target) const;
+ void ApplySwizzle(Tegra::Texture::SwizzleSource x_source,
+ Tegra::Texture::SwizzleSource y_source,
+ Tegra::Texture::SwizzleSource z_source,
+ Tegra::Texture::SwizzleSource w_source);
+
+ void DecorateViewName(GPUVAddr gpu_addr, std::string prefix);
+
+ void MarkAsModified(u64 tick) {
+ surface.MarkAsModified(true, tick);
+ }
+
GLuint GetTexture() const {
if (is_proxy) {
return surface.GetTexture();
@@ -89,13 +100,6 @@ public:
return surface.GetSurfaceParams();
}
- void ApplySwizzle(Tegra::Texture::SwizzleSource x_source,
- Tegra::Texture::SwizzleSource y_source,
- Tegra::Texture::SwizzleSource z_source,
- Tegra::Texture::SwizzleSource w_source);
-
- void DecorateViewName(GPUVAddr gpu_addr, std::string prefix);
-
private:
u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,
Tegra::Texture::SwizzleSource y_source,
@@ -111,8 +115,8 @@ private:
GLenum target{};
OGLTextureView texture_view;
- u32 swizzle;
- bool is_proxy;
+ u32 swizzle{};
+ bool is_proxy{};
};
class TextureCacheOpenGL final : public TextureCacheBase {
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index af9684839..1e6ef66ab 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -264,7 +264,6 @@ void RendererOpenGL::CreateRasterizer() {
if (rasterizer) {
return;
}
- OpenGLState::ClearsRGBUsed();
rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info);
}
@@ -342,21 +341,17 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v),
}};
- state.texture_units[0].texture = screen_info.display_texture;
- // Workaround brigthness problems in SMO by enabling sRGB in the final output
- // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987
- state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed();
+ state.textures[0] = screen_info.display_texture;
+ state.framebuffer_srgb.enabled = screen_info.display_srgb;
state.AllDirty();
state.Apply();
glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data());
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
// Restore default state
state.framebuffer_srgb.enabled = false;
- state.texture_units[0].texture = 0;
+ state.textures[0] = 0;
state.AllDirty();
state.Apply();
- // Clear sRGB state for the next frame
- OpenGLState::ClearsRGBUsed();
}
/**
@@ -406,8 +401,8 @@ void RendererOpenGL::CaptureScreenshot() {
GLuint renderbuffer;
glGenRenderbuffers(1, &renderbuffer);
glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
- glRenderbufferStorage(GL_RENDERBUFFER, state.GetsRGBUsed() ? GL_SRGB8 : GL_RGB8, layout.width,
- layout.height);
+ glRenderbufferStorage(GL_RENDERBUFFER, screen_info.display_srgb ? GL_SRGB8 : GL_RGB8,
+ layout.width, layout.height);
glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer);
DrawScreen(layout);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 9bd086368..cf26628ca 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -38,7 +38,8 @@ struct TextureInfo {
/// Structure used for storing information about the display target for the Switch screen
struct ScreenInfo {
- GLuint display_texture;
+ GLuint display_texture{};
+ bool display_srgb{};
const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f};
TextureInfo texture;
};
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 3b966ddc3..897cbb4e8 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -2,9 +2,10 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <map>
+#include <bitset>
#include <optional>
#include <set>
+#include <string_view>
#include <vector>
#include "common/assert.h"
#include "video_core/renderer_vulkan/declarations.h"
@@ -12,13 +13,32 @@
namespace Vulkan {
+namespace {
+
+template <typename T>
+void SetNext(void**& next, T& data) {
+ *next = &data;
+ next = &data.pNext;
+}
+
+template <typename T>
+T GetFeatures(vk::PhysicalDevice physical, vk::DispatchLoaderDynamic dldi) {
+ vk::PhysicalDeviceFeatures2 features;
+ T extension_features;
+ features.pNext = &extension_features;
+ physical.getFeatures2(&features, dldi);
+ return extension_features;
+}
+
+} // Anonymous namespace
+
namespace Alternatives {
-constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = {
- vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}};
-constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = {
- vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}};
-constexpr std::array<vk::Format, 2> Astc = {vk::Format::eA8B8G8R8UnormPack32, {}};
+constexpr std::array Depth24UnormS8Uint = {vk::Format::eD32SfloatS8Uint,
+ vk::Format::eD16UnormS8Uint, vk::Format{}};
+constexpr std::array Depth16UnormS8Uint = {vk::Format::eD24UnormS8Uint,
+ vk::Format::eD32SfloatS8Uint, vk::Format{}};
+constexpr std::array Astc = {vk::Format::eA8B8G8R8UnormPack32, vk::Format{}};
} // namespace Alternatives
@@ -58,16 +78,53 @@ VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice phy
VKDevice::~VKDevice() = default;
bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) {
- vk::PhysicalDeviceFeatures device_features;
- device_features.vertexPipelineStoresAndAtomics = true;
- device_features.independentBlend = true;
- device_features.textureCompressionASTC_LDR = is_optimal_astc_supported;
-
const auto queue_cis = GetDeviceQueueCreateInfos();
- const std::vector<const char*> extensions = LoadExtensions(dldi);
- const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(),
- 0, nullptr, static_cast<u32>(extensions.size()),
- extensions.data(), &device_features);
+ const std::vector extensions = LoadExtensions(dldi);
+
+ vk::PhysicalDeviceFeatures2 features2;
+ void** next = &features2.pNext;
+ auto& features = features2.features;
+ features.vertexPipelineStoresAndAtomics = true;
+ features.independentBlend = true;
+ features.depthClamp = true;
+ features.samplerAnisotropy = true;
+ features.largePoints = true;
+ features.textureCompressionASTC_LDR = is_optimal_astc_supported;
+
+ vk::PhysicalDeviceVertexAttributeDivisorFeaturesEXT vertex_divisor;
+ vertex_divisor.vertexAttributeInstanceRateDivisor = true;
+ vertex_divisor.vertexAttributeInstanceRateZeroDivisor = true;
+ SetNext(next, vertex_divisor);
+
+ vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
+ if (is_float16_supported) {
+ float16_int8.shaderFloat16 = true;
+ SetNext(next, float16_int8);
+ } else {
+ LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively");
+ }
+
+ vk::PhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout;
+ if (khr_uniform_buffer_standard_layout) {
+ std430_layout.uniformBufferStandardLayout = true;
+ SetNext(next, std430_layout);
+ } else {
+ LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs");
+ }
+
+ vk::PhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8;
+ if (ext_index_type_uint8) {
+ index_type_uint8.indexTypeUint8 = true;
+ SetNext(next, index_type_uint8);
+ } else {
+ LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
+ }
+
+ vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), 0,
+ nullptr, static_cast<u32>(extensions.size()), extensions.data(),
+ nullptr);
+ device_ci.pNext = &features2;
+
vk::Device dummy_logical;
if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) {
LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!");
@@ -78,6 +135,17 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
logical = UniqueDevice(
dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld));
+ if (khr_driver_properties) {
+ vk::PhysicalDeviceDriverPropertiesKHR driver;
+ vk::PhysicalDeviceProperties2 properties;
+ properties.pNext = &driver;
+ physical.getProperties2(&properties, dld);
+ driver_id = driver.driverID;
+ LOG_INFO(Render_Vulkan, "Driver: {} {}", driver.driverName, driver.driverInfo);
+ } else {
+ LOG_INFO(Render_Vulkan, "Driver: Unknown");
+ }
+
graphics_queue = logical->getQueue(graphics_family, 0, dld);
present_queue = logical->getQueue(present_family, 0, dld);
return true;
@@ -92,20 +160,19 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
// The wanted format is not supported by hardware, search for alternatives
const vk::Format* alternatives = GetFormatAlternatives(wanted_format);
if (alternatives == nullptr) {
- LOG_CRITICAL(Render_Vulkan,
- "Format={} with usage={} and type={} has no defined alternatives and host "
- "hardware does not support it",
- vk::to_string(wanted_format), vk::to_string(wanted_usage),
- static_cast<u32>(format_type));
- UNREACHABLE();
+ UNREACHABLE_MSG("Format={} with usage={} and type={} has no defined alternatives and host "
+ "hardware does not support it",
+ vk::to_string(wanted_format), vk::to_string(wanted_usage),
+ static_cast<u32>(format_type));
return wanted_format;
}
std::size_t i = 0;
for (vk::Format alternative = alternatives[0]; alternative != vk::Format{};
alternative = alternatives[++i]) {
- if (!IsFormatSupported(alternative, wanted_usage, format_type))
+ if (!IsFormatSupported(alternative, wanted_usage, format_type)) {
continue;
+ }
LOG_WARNING(Render_Vulkan,
"Emulating format={} with alternative format={} with usage={} and type={}",
static_cast<u32>(wanted_format), static_cast<u32>(alternative),
@@ -114,12 +181,10 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
}
// No alternatives found, panic
- LOG_CRITICAL(Render_Vulkan,
- "Format={} with usage={} and type={} is not supported by the host hardware and "
- "doesn't support any of the alternatives",
- static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
- static_cast<u32>(format_type));
- UNREACHABLE();
+ UNREACHABLE_MSG("Format={} with usage={} and type={} is not supported by the host hardware and "
+ "doesn't support any of the alternatives",
+ static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
+ static_cast<u32>(format_type));
return wanted_format;
}
@@ -132,7 +197,7 @@ bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features
vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc |
vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc |
vk::FormatFeatureFlagBits::eTransferDst};
- constexpr std::array<vk::Format, 9> astc_formats = {
+ constexpr std::array astc_formats = {
vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock,
vk::Format::eAstc8x8SrgbBlock, vk::Format::eAstc8x6SrgbBlock,
vk::Format::eAstc5x4SrgbBlock, vk::Format::eAstc5x5UnormBlock,
@@ -151,76 +216,120 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag
FormatType format_type) const {
const auto it = format_properties.find(wanted_format);
if (it == format_properties.end()) {
- LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", vk::to_string(wanted_format));
- UNREACHABLE();
+ UNIMPLEMENTED_MSG("Unimplemented format query={}", vk::to_string(wanted_format));
return true;
}
- const vk::FormatFeatureFlags supported_usage = GetFormatFeatures(it->second, format_type);
+ const auto supported_usage = GetFormatFeatures(it->second, format_type);
return (supported_usage & wanted_usage) == wanted_usage;
}
bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
vk::SurfaceKHR surface) {
- bool has_swapchain{};
+ LOG_INFO(Render_Vulkan, "{}", physical.getProperties(dldi).deviceName);
+ bool is_suitable = true;
+
+ constexpr std::array required_extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME,
+ VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME};
+ std::bitset<required_extensions.size()> available_extensions{};
+
for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
- has_swapchain |= prop.extensionName == std::string(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
+ for (std::size_t i = 0; i < required_extensions.size(); ++i) {
+ if (available_extensions[i]) {
+ continue;
+ }
+ available_extensions[i] =
+ required_extensions[i] == std::string_view{prop.extensionName};
+ }
}
- if (!has_swapchain) {
- // The device doesn't support creating swapchains.
- return false;
+ if (!available_extensions.all()) {
+ for (std::size_t i = 0; i < required_extensions.size(); ++i) {
+ if (available_extensions[i]) {
+ continue;
+ }
+ LOG_INFO(Render_Vulkan, "Missing required extension: {}", required_extensions[i]);
+ is_suitable = false;
+ }
}
bool has_graphics{}, has_present{};
const auto queue_family_properties = physical.getQueueFamilyProperties(dldi);
for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) {
const auto& family = queue_family_properties[i];
- if (family.queueCount == 0)
+ if (family.queueCount == 0) {
continue;
-
+ }
has_graphics |=
(family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0);
has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0;
}
if (!has_graphics || !has_present) {
- // The device doesn't have a graphics and present queue.
- return false;
+ LOG_INFO(Render_Vulkan, "Device lacks a graphics and present queue");
+ is_suitable = false;
}
// TODO(Rodrigo): Check if the device matches all requeriments.
const auto properties{physical.getProperties(dldi)};
- const auto limits{properties.limits};
- if (limits.maxUniformBufferRange < 65536) {
- return false;
+ const auto& limits{properties.limits};
+
+ constexpr u32 required_ubo_size = 65536;
+ if (limits.maxUniformBufferRange < required_ubo_size) {
+ LOG_INFO(Render_Vulkan, "Device UBO size {} is too small, {} is required)",
+ limits.maxUniformBufferRange, required_ubo_size);
+ is_suitable = false;
}
- const vk::PhysicalDeviceFeatures features{physical.getFeatures(dldi)};
- if (!features.vertexPipelineStoresAndAtomics || !features.independentBlend) {
- return false;
+ const auto features{physical.getFeatures(dldi)};
+ const std::array feature_report = {
+ std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
+ std::make_pair(features.independentBlend, "independentBlend"),
+ std::make_pair(features.depthClamp, "depthClamp"),
+ std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"),
+ std::make_pair(features.largePoints, "largePoints"),
+ };
+ for (const auto& [supported, name] : feature_report) {
+ if (supported) {
+ continue;
+ }
+ LOG_INFO(Render_Vulkan, "Missing required feature: {}", name);
+ is_suitable = false;
}
- // Device is suitable.
- return true;
+ return is_suitable;
}
std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) {
std::vector<const char*> extensions;
- extensions.reserve(2);
+ extensions.reserve(7);
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
+ extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
const auto Test = [&](const vk::ExtensionProperties& extension,
std::optional<std::reference_wrapper<bool>> status, const char* name,
- u32 revision) {
- if (extension.extensionName != std::string(name)) {
+ bool push) {
+ if (extension.extensionName != std::string_view(name)) {
return;
}
- extensions.push_back(name);
+ if (push) {
+ extensions.push_back(name);
+ }
if (status) {
status->get() = true;
}
};
+ bool khr_shader_float16_int8{};
for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
- Test(extension, ext_scalar_block_layout, VK_EXT_SCALAR_BLOCK_LAYOUT_EXTENSION_NAME, 1);
+ Test(extension, khr_uniform_buffer_standard_layout,
+ VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
+ Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
+ Test(extension, khr_driver_properties, VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, true);
+ Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
+ }
+
+ if (khr_shader_float16_int8) {
+ is_float16_supported =
+ GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16;
+ extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
}
return extensions;
@@ -250,9 +359,10 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
}
void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) {
- const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
+ const auto props = physical.getProperties(dldi);
device_type = props.deviceType;
uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment);
+ storage_buffer_alignment = static_cast<u64>(props.limits.minStorageBufferOffsetAlignment);
max_storage_buffer_range = static_cast<u64>(props.limits.maxStorageBufferRange);
}
@@ -273,42 +383,53 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con
return queue_cis;
}
-std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
+std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
- static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
- vk::Format::eB5G6R5UnormPack16,
- vk::Format::eA2B10G10R10UnormPack32,
- vk::Format::eR32G32B32A32Sfloat,
- vk::Format::eR16G16Unorm,
- vk::Format::eR16G16Snorm,
- vk::Format::eR8G8B8A8Srgb,
- vk::Format::eR8Unorm,
- vk::Format::eB10G11R11UfloatPack32,
- vk::Format::eR32Sfloat,
- vk::Format::eR16Sfloat,
- vk::Format::eR16G16B16A16Sfloat,
- vk::Format::eD32Sfloat,
- vk::Format::eD16Unorm,
- vk::Format::eD16UnormS8Uint,
- vk::Format::eD24UnormS8Uint,
- vk::Format::eD32SfloatS8Uint,
- vk::Format::eBc1RgbaUnormBlock,
- vk::Format::eBc2UnormBlock,
- vk::Format::eBc3UnormBlock,
- vk::Format::eBc4UnormBlock,
- vk::Format::eBc5UnormBlock,
- vk::Format::eBc5SnormBlock,
- vk::Format::eBc7UnormBlock,
- vk::Format::eAstc4x4UnormBlock,
- vk::Format::eAstc4x4SrgbBlock,
- vk::Format::eAstc8x8SrgbBlock,
- vk::Format::eAstc8x6SrgbBlock,
- vk::Format::eAstc5x4SrgbBlock,
- vk::Format::eAstc5x5UnormBlock,
- vk::Format::eAstc5x5SrgbBlock,
- vk::Format::eAstc10x8UnormBlock,
- vk::Format::eAstc10x8SrgbBlock};
- std::map<vk::Format, vk::FormatProperties> format_properties;
+ constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
+ vk::Format::eA8B8G8R8SnormPack32,
+ vk::Format::eA8B8G8R8SrgbPack32,
+ vk::Format::eB5G6R5UnormPack16,
+ vk::Format::eA2B10G10R10UnormPack32,
+ vk::Format::eR32G32B32A32Sfloat,
+ vk::Format::eR16G16B16A16Uint,
+ vk::Format::eR16G16Unorm,
+ vk::Format::eR16G16Snorm,
+ vk::Format::eR16G16Sfloat,
+ vk::Format::eR16Unorm,
+ vk::Format::eR8G8B8A8Srgb,
+ vk::Format::eR8G8Unorm,
+ vk::Format::eR8G8Snorm,
+ vk::Format::eR8Unorm,
+ vk::Format::eB10G11R11UfloatPack32,
+ vk::Format::eR32Sfloat,
+ vk::Format::eR16Sfloat,
+ vk::Format::eR16G16B16A16Sfloat,
+ vk::Format::eB8G8R8A8Unorm,
+ vk::Format::eD32Sfloat,
+ vk::Format::eD16Unorm,
+ vk::Format::eD16UnormS8Uint,
+ vk::Format::eD24UnormS8Uint,
+ vk::Format::eD32SfloatS8Uint,
+ vk::Format::eBc1RgbaUnormBlock,
+ vk::Format::eBc2UnormBlock,
+ vk::Format::eBc3UnormBlock,
+ vk::Format::eBc4UnormBlock,
+ vk::Format::eBc5UnormBlock,
+ vk::Format::eBc5SnormBlock,
+ vk::Format::eBc7UnormBlock,
+ vk::Format::eBc1RgbaSrgbBlock,
+ vk::Format::eBc3SrgbBlock,
+ vk::Format::eBc7SrgbBlock,
+ vk::Format::eAstc4x4UnormBlock,
+ vk::Format::eAstc4x4SrgbBlock,
+ vk::Format::eAstc8x8SrgbBlock,
+ vk::Format::eAstc8x6SrgbBlock,
+ vk::Format::eAstc5x4SrgbBlock,
+ vk::Format::eAstc5x5UnormBlock,
+ vk::Format::eAstc5x5SrgbBlock,
+ vk::Format::eAstc10x8UnormBlock,
+ vk::Format::eAstc10x8SrgbBlock};
+ std::unordered_map<vk::Format, vk::FormatProperties> format_properties;
for (const auto format : formats) {
format_properties.emplace(format, physical.getFormatProperties(format, dldi));
}
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 537825d8b..010d4c3d6 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -4,7 +4,7 @@
#pragma once
-#include <map>
+#include <unordered_map>
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/declarations.h"
@@ -69,16 +69,26 @@ public:
return present_family;
}
- /// Returns if the device is integrated with the host CPU.
+ /// Returns true if the device is integrated with the host CPU.
bool IsIntegrated() const {
return device_type == vk::PhysicalDeviceType::eIntegratedGpu;
}
+ /// Returns the driver ID.
+ vk::DriverIdKHR GetDriverID() const {
+ return driver_id;
+ }
+
/// Returns uniform buffer alignment requeriment.
u64 GetUniformBufferAlignment() const {
return uniform_buffer_alignment;
}
+ /// Returns storage alignment requeriment.
+ u64 GetStorageBufferAlignment() const {
+ return storage_buffer_alignment;
+ }
+
/// Returns the maximum range for storage buffers.
u64 GetMaxStorageBufferRange() const {
return max_storage_buffer_range;
@@ -89,9 +99,19 @@ public:
return is_optimal_astc_supported;
}
+ /// Returns true if the device supports float16 natively
+ bool IsFloat16Supported() const {
+ return is_float16_supported;
+ }
+
/// Returns true if the device supports VK_EXT_scalar_block_layout.
- bool IsExtScalarBlockLayoutSupported() const {
- return ext_scalar_block_layout;
+ bool IsKhrUniformBufferStandardLayoutSupported() const {
+ return khr_uniform_buffer_standard_layout;
+ }
+
+ /// Returns true if the device supports VK_EXT_index_type_uint8.
+ bool IsExtIndexTypeUint8Supported() const {
+ return ext_index_type_uint8;
}
/// Checks if the physical device is suitable.
@@ -123,22 +143,28 @@ private:
FormatType format_type) const;
/// Returns the device properties for Vulkan formats.
- static std::map<vk::Format, vk::FormatProperties> GetFormatProperties(
+ static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties(
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
- const vk::PhysicalDevice physical; ///< Physical device.
- vk::DispatchLoaderDynamic dld; ///< Device function pointers.
- UniqueDevice logical; ///< Logical device.
- vk::Queue graphics_queue; ///< Main graphics queue.
- vk::Queue present_queue; ///< Main present queue.
- u32 graphics_family{}; ///< Main graphics queue family index.
- u32 present_family{}; ///< Main present queue family index.
- vk::PhysicalDeviceType device_type; ///< Physical device type.
- u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment.
- u64 max_storage_buffer_range{}; ///< Max storage buffer size.
- bool is_optimal_astc_supported{}; ///< Support for native ASTC.
- bool ext_scalar_block_layout{}; ///< Support for VK_EXT_scalar_block_layout.
- std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary.
+ const vk::PhysicalDevice physical; ///< Physical device.
+ vk::DispatchLoaderDynamic dld; ///< Device function pointers.
+ UniqueDevice logical; ///< Logical device.
+ vk::Queue graphics_queue; ///< Main graphics queue.
+ vk::Queue present_queue; ///< Main present queue.
+ u32 graphics_family{}; ///< Main graphics queue family index.
+ u32 present_family{}; ///< Main present queue family index.
+ vk::PhysicalDeviceType device_type; ///< Physical device type.
+ vk::DriverIdKHR driver_id{}; ///< Driver ID.
+ u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment.
+ u64 storage_buffer_alignment{}; ///< Storage buffer alignment requeriment.
+ u64 max_storage_buffer_range{}; ///< Max storage buffer size.
+ bool is_optimal_astc_supported{}; ///< Support for native ASTC.
+ bool is_float16_supported{}; ///< Support for float16 arithmetics.
+ bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
+ bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
+ bool khr_driver_properties{}; ///< Support for VK_KHR_driver_properties.
+ std::unordered_map<vk::Format, vk::FormatProperties>
+ format_properties; ///< Format properties dictionary.
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index a35b45c9c..f7fbbb6e4 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -370,8 +370,8 @@ private:
u32 binding = const_buffers_base_binding;
for (const auto& entry : ir.GetConstantBuffers()) {
const auto [index, size] = entry;
- const Id type =
- device.IsExtScalarBlockLayoutSupported() ? t_cbuf_scalar_ubo : t_cbuf_std140_ubo;
+ const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo
+ : t_cbuf_std140_ubo;
const Id id = OpVariable(type, spv::StorageClass::Uniform);
AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index)));
@@ -565,7 +565,7 @@ private:
const Id buffer_id = constant_buffers.at(cbuf->GetIndex());
Id pointer{};
- if (device.IsExtScalarBlockLayoutSupported()) {
+ if (device.IsKhrUniformBufferStandardLayoutSupported()) {
const Id buffer_offset = Emit(OpShiftRightLogical(
t_uint, BitcastTo<Type::Uint>(Visit(offset)), Constant(t_uint, 2u)));
pointer = Emit(
@@ -944,6 +944,41 @@ private:
return {};
}
+ Id AtomicImageAdd(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id AtomicImageMin(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id AtomicImageMax(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id AtomicImageAnd(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id AtomicImageOr(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id AtomicImageXor(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id AtomicImageExchange(Operation operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
Id Branch(Operation operation) {
const auto target = std::get_if<ImmediateNode>(&*operation[0]);
UNIMPLEMENTED_IF(!target);
@@ -1092,6 +1127,46 @@ private:
return {};
}
+ Id ShuffleIndexed(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id ShuffleUp(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id ShuffleDown(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id ShuffleButterfly(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id InRangeShuffleIndexed(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id InRangeShuffleUp(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id InRangeShuffleDown(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id InRangeShuffleButterfly(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type,
const std::string& name) {
const Id id = OpVariable(type, storage);
@@ -1366,6 +1441,13 @@ private:
&SPIRVDecompiler::TexelFetch,
&SPIRVDecompiler::ImageStore,
+ &SPIRVDecompiler::AtomicImageAdd,
+ &SPIRVDecompiler::AtomicImageMin,
+ &SPIRVDecompiler::AtomicImageMax,
+ &SPIRVDecompiler::AtomicImageAnd,
+ &SPIRVDecompiler::AtomicImageOr,
+ &SPIRVDecompiler::AtomicImageXor,
+ &SPIRVDecompiler::AtomicImageExchange,
&SPIRVDecompiler::Branch,
&SPIRVDecompiler::BranchIndirect,
@@ -1389,6 +1471,16 @@ private:
&SPIRVDecompiler::VoteAll,
&SPIRVDecompiler::VoteAny,
&SPIRVDecompiler::VoteEqual,
+
+ &SPIRVDecompiler::ShuffleIndexed,
+ &SPIRVDecompiler::ShuffleUp,
+ &SPIRVDecompiler::ShuffleDown,
+ &SPIRVDecompiler::ShuffleButterfly,
+
+ &SPIRVDecompiler::InRangeShuffleIndexed,
+ &SPIRVDecompiler::InRangeShuffleUp,
+ &SPIRVDecompiler::InRangeShuffleDown,
+ &SPIRVDecompiler::InRangeShuffleButterfly,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 77151a24b..d54fb88c9 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -44,7 +44,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
switch (opcode->get().GetId()) {
case OpCode::Id::SUST: {
UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P);
- UNIMPLEMENTED_IF(instr.sust.image_type == Tegra::Shader::ImageType::TextureBuffer);
UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore);
UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store
@@ -61,56 +60,105 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
}
const auto type{instr.sust.image_type};
- const auto& image{instr.sust.is_immediate ? GetImage(instr.image, type)
- : GetBindlessImage(instr.gpr39, type)};
+ auto& image{instr.sust.is_immediate ? GetImage(instr.image, type)
+ : GetBindlessImage(instr.gpr39, type)};
+ image.MarkWrite();
+
MetaImage meta{image, values};
- const Node store{Operation(OperationCode::ImageStore, meta, std::move(coords))};
- bb.push_back(store);
+ bb.push_back(Operation(OperationCode::ImageStore, meta, std::move(coords)));
+ break;
+ }
+ case OpCode::Id::SUATOM: {
+ UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0);
+
+ Node value = GetRegister(instr.gpr0);
+
+ std::vector<Node> coords;
+ const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)};
+ for (std::size_t i = 0; i < num_coords; ++i) {
+ coords.push_back(GetRegister(instr.gpr8.Value() + i));
+ }
+
+ const OperationCode operation_code = [instr] {
+ switch (instr.suatom_d.operation) {
+ case Tegra::Shader::ImageAtomicOperation::Add:
+ return OperationCode::AtomicImageAdd;
+ case Tegra::Shader::ImageAtomicOperation::Min:
+ return OperationCode::AtomicImageMin;
+ case Tegra::Shader::ImageAtomicOperation::Max:
+ return OperationCode::AtomicImageMax;
+ case Tegra::Shader::ImageAtomicOperation::And:
+ return OperationCode::AtomicImageAnd;
+ case Tegra::Shader::ImageAtomicOperation::Or:
+ return OperationCode::AtomicImageOr;
+ case Tegra::Shader::ImageAtomicOperation::Xor:
+ return OperationCode::AtomicImageXor;
+ case Tegra::Shader::ImageAtomicOperation::Exch:
+ return OperationCode::AtomicImageExchange;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented operation={}",
+ static_cast<u32>(instr.suatom_d.operation.Value()));
+ return OperationCode::AtomicImageAdd;
+ }
+ }();
+
+ const auto& image{GetImage(instr.image, instr.suatom_d.image_type, instr.suatom_d.size)};
+ MetaImage meta{image, {std::move(value)}};
+ SetRegister(bb, instr.gpr0, Operation(operation_code, meta, std::move(coords)));
break;
}
default:
- UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
+ UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName());
}
return pc;
}
-const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
+Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type,
+ std::optional<Tegra::Shader::ImageAtomicSize> size) {
const auto offset{static_cast<std::size_t>(image.index.Value())};
-
- // If this image has already been used, return the existing mapping.
- const auto itr{std::find_if(used_images.begin(), used_images.end(),
- [=](const Image& entry) { return entry.GetOffset() == offset; })};
- if (itr != used_images.end()) {
- ASSERT(itr->GetType() == type);
- return *itr;
+ if (const auto image = TryUseExistingImage(offset, type, size)) {
+ return *image;
}
- // Otherwise create a new mapping for this image.
const std::size_t next_index{used_images.size()};
- const Image entry{offset, next_index, type};
- return *used_images.emplace(entry).first;
+ return used_images.emplace(offset, Image{offset, next_index, type, size}).first->second;
}
-const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg,
- Tegra::Shader::ImageType type) {
+Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type,
+ std::optional<Tegra::Shader::ImageAtomicSize> size) {
const Node image_register{GetRegister(reg)};
const auto [base_image, cbuf_index, cbuf_offset]{
TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))};
const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)};
- // If this image has already been used, return the existing mapping.
- const auto itr{std::find_if(used_images.begin(), used_images.end(),
- [=](const Image& entry) { return entry.GetOffset() == cbuf_key; })};
- if (itr != used_images.end()) {
- ASSERT(itr->GetType() == type);
- return *itr;
+ if (const auto image = TryUseExistingImage(cbuf_key, type, size)) {
+ return *image;
}
- // Otherwise create a new mapping for this image.
const std::size_t next_index{used_images.size()};
- const Image entry{cbuf_index, cbuf_offset, next_index, type};
- return *used_images.emplace(entry).first;
+ return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type, size})
+ .first->second;
+}
+
+Image* ShaderIR::TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type,
+ std::optional<Tegra::Shader::ImageAtomicSize> size) {
+ auto it = used_images.find(offset);
+ if (it == used_images.end()) {
+ return nullptr;
+ }
+ auto& image = it->second;
+ ASSERT(image.GetType() == type);
+
+ if (size) {
+ // We know the size, if it's known it has to be the same as before, otherwise we can set it.
+ if (image.IsSizeKnown()) {
+ ASSERT(image.GetSize() == size);
+ } else {
+ image.SetSize(*size);
+ }
+ }
+ return &image;
}
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index ed108bea8..7923d4d69 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -35,7 +35,7 @@ u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
return 1;
}
}
-} // namespace
+} // Anonymous namespace
u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
@@ -106,16 +106,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
}
break;
}
- case OpCode::Id::LD_L: {
- LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}",
- static_cast<u64>(instr.ld_l.unknown.Value()));
-
- const auto GetLmem = [&](s32 offset) {
+ case OpCode::Id::LD_L:
+ LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown));
+ [[fallthrough]];
+ case OpCode::Id::LD_S: {
+ const auto GetMemory = [&](s32 offset) {
ASSERT(offset % 4 == 0);
const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8),
immediate_offset);
- return GetLocalMemory(address);
+ return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(address)
+ : GetLocalMemory(address);
};
switch (instr.ldst_sl.type.Value()) {
@@ -135,14 +136,16 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
return 0;
}
}();
- for (u32 i = 0; i < count; ++i)
- SetTemporary(bb, i, GetLmem(i * 4));
- for (u32 i = 0; i < count; ++i)
+ for (u32 i = 0; i < count; ++i) {
+ SetTemporary(bb, i, GetMemory(i * 4));
+ }
+ for (u32 i = 0; i < count; ++i) {
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+ }
break;
}
default:
- UNIMPLEMENTED_MSG("LD_L Unhandled type: {}",
+ UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(),
static_cast<u32>(instr.ldst_sl.type.Value()));
}
break;
@@ -209,27 +212,34 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
break;
}
- case OpCode::Id::ST_L: {
+ case OpCode::Id::ST_L:
LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}",
static_cast<u64>(instr.st_l.cache_management.Value()));
-
- const auto GetLmemAddr = [&](s32 offset) {
+ [[fallthrough]];
+ case OpCode::Id::ST_S: {
+ const auto GetAddress = [&](s32 offset) {
ASSERT(offset % 4 == 0);
const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset);
return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
};
+ const auto set_memory = opcode->get().GetId() == OpCode::Id::ST_L
+ ? &ShaderIR::SetLocalMemory
+ : &ShaderIR::SetSharedMemory;
+
switch (instr.ldst_sl.type.Value()) {
case Tegra::Shader::StoreType::Bits128:
- SetLocalMemory(bb, GetLmemAddr(12), GetRegister(instr.gpr0.Value() + 3));
- SetLocalMemory(bb, GetLmemAddr(8), GetRegister(instr.gpr0.Value() + 2));
+ (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3));
+ (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2));
+ [[fallthrough]];
case Tegra::Shader::StoreType::Bits64:
- SetLocalMemory(bb, GetLmemAddr(4), GetRegister(instr.gpr0.Value() + 1));
+ (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1));
+ [[fallthrough]];
case Tegra::Shader::StoreType::Bits32:
- SetLocalMemory(bb, GetLmemAddr(0), GetRegister(instr.gpr0));
+ (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0));
break;
default:
- UNIMPLEMENTED_MSG("ST_L Unhandled type: {}",
+ UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(),
static_cast<u32>(instr.ldst_sl.type.Value()));
}
break;
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index 2ac16eeb0..f6ee68a54 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -17,8 +17,8 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
- const Node op_a = GetRegister(instr.gpr8);
- const Node op_b = [&]() {
+ Node op_a = GetRegister(instr.gpr8);
+ Node op_b = [&]() {
if (instr.is_b_imm) {
return Immediate(instr.alu.GetSignedImm20_20());
} else if (instr.is_b_gpr) {
@@ -32,16 +32,23 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
case OpCode::Id::SHR_C:
case OpCode::Id::SHR_R:
case OpCode::Id::SHR_IMM: {
- const Node value = SignedOperation(OperationCode::IArithmeticShiftRight,
- instr.shift.is_signed, PRECISE, op_a, op_b);
+ if (instr.shr.wrap) {
+ op_b = Operation(OperationCode::UBitwiseAnd, std::move(op_b), Immediate(0x1f));
+ } else {
+ op_b = Operation(OperationCode::IMax, std::move(op_b), Immediate(0));
+ op_b = Operation(OperationCode::IMin, std::move(op_b), Immediate(31));
+ }
+
+ Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed,
+ std::move(op_a), std::move(op_b));
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
- SetRegister(bb, instr.gpr0, value);
+ SetRegister(bb, instr.gpr0, std::move(value));
break;
}
case OpCode::Id::SHL_C:
case OpCode::Id::SHL_R:
case OpCode::Id::SHL_IMM: {
- const Node value = Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b);
+ const Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b);
SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
SetRegister(bb, instr.gpr0, value);
break;
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
index 04ca74f46..a8e481b3c 100644
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@@ -13,6 +13,7 @@ namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Pred;
+using Tegra::Shader::ShuffleOperation;
using Tegra::Shader::VoteOperation;
namespace {
@@ -44,6 +45,52 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
SetPredicate(bb, instr.vote.dest_pred, vote);
break;
}
+ case OpCode::Id::SHFL: {
+ Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
+ : GetRegister(instr.gpr39);
+ Node width = [&] {
+ // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has
+ // been done reversing Nvidia's math. It won't work on all cases due to SHFL having
+ // different parameters that don't properly map to GLSL's interface, but it should work
+ // for cases emitted by Nvidia's compiler.
+ if (instr.shfl.operation == ShuffleOperation::Up) {
+ return Operation(
+ OperationCode::ILogicalShiftRight,
+ Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)),
+ Immediate(8));
+ } else {
+ return Operation(OperationCode::ILogicalShiftRight,
+ Operation(OperationCode::IAdd, Immediate(0x201F),
+ Operation(OperationCode::INegate, std::move(mask))),
+ Immediate(8));
+ }
+ }();
+
+ const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> {
+ switch (instr.shfl.operation) {
+ case ShuffleOperation::Idx:
+ return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed};
+ case ShuffleOperation::Up:
+ return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp};
+ case ShuffleOperation::Down:
+ return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown};
+ case ShuffleOperation::Bfly:
+ return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly};
+ }
+ UNREACHABLE_MSG("Invalid SHFL operation: {}",
+ static_cast<u64>(instr.shfl.operation.Value()));
+ return {};
+ }();
+
+ // Setting the predicate before the register is intentional to avoid overwriting.
+ Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
+ : GetRegister(instr.gpr20);
+ SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width));
+ SetRegister(
+ bb, instr.gpr0,
+ Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width)));
+ break;
+ }
default:
UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName());
break;
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 5db9313c4..abf2cb1ab 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -7,6 +7,7 @@
#include <array>
#include <cstddef>
#include <memory>
+#include <optional>
#include <string>
#include <tuple>
#include <utility>
@@ -148,7 +149,14 @@ enum class OperationCode {
TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4
TexelFetch, /// (MetaTexture, int[N], int) -> float4
- ImageStore, /// (MetaImage, float[N] coords) -> void
+ ImageStore, /// (MetaImage, int[N] values) -> void
+ AtomicImageAdd, /// (MetaImage, int[N] coords) -> void
+ AtomicImageMin, /// (MetaImage, int[N] coords) -> void
+ AtomicImageMax, /// (MetaImage, int[N] coords) -> void
+ AtomicImageAnd, /// (MetaImage, int[N] coords) -> void
+ AtomicImageOr, /// (MetaImage, int[N] coords) -> void
+ AtomicImageXor, /// (MetaImage, int[N] coords) -> void
+ AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
Branch, /// (uint branch_target) -> void
BranchIndirect, /// (uint branch_target) -> void
@@ -173,6 +181,16 @@ enum class OperationCode {
VoteAny, /// (bool) -> bool
VoteEqual, /// (bool) -> bool
+ ShuffleIndexed, /// (uint value, uint index, uint width) -> uint
+ ShuffleUp, /// (uint value, uint index, uint width) -> uint
+ ShuffleDown, /// (uint value, uint index, uint width) -> uint
+ ShuffleButterfly, /// (uint value, uint index, uint width) -> uint
+
+ InRangeShuffleIndexed, /// (uint index, uint width) -> bool
+ InRangeShuffleUp, /// (uint index, uint width) -> bool
+ InRangeShuffleDown, /// (uint index, uint width) -> bool
+ InRangeShuffleButterfly, /// (uint index, uint width) -> bool
+
Amount,
};
@@ -198,12 +216,13 @@ class PredicateNode;
class AbufNode;
class CbufNode;
class LmemNode;
+class SmemNode;
class GmemNode;
class CommentNode;
using NodeData =
std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode,
- PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>;
+ PredicateNode, AbufNode, CbufNode, LmemNode, SmemNode, GmemNode, CommentNode>;
using Node = std::shared_ptr<NodeData>;
using Node4 = std::array<Node, 4>;
using NodeBlock = std::vector<Node>;
@@ -273,46 +292,85 @@ private:
bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
};
-class Image {
+class Image final {
public:
- explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type)
- : offset{offset}, index{index}, type{type}, is_bindless{false} {}
+ constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type,
+ std::optional<Tegra::Shader::ImageAtomicSize> size)
+ : offset{offset}, index{index}, type{type}, is_bindless{false}, size{size} {}
- explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
- Tegra::Shader::ImageType type)
+ constexpr explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
+ Tegra::Shader::ImageType type,
+ std::optional<Tegra::Shader::ImageAtomicSize> size)
: offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
- is_bindless{true} {}
+ is_bindless{true}, size{size} {}
- explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type,
- bool is_bindless)
- : offset{offset}, index{index}, type{type}, is_bindless{is_bindless} {}
+ constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type,
+ bool is_bindless, bool is_written, bool is_read,
+ std::optional<Tegra::Shader::ImageAtomicSize> size)
+ : offset{offset}, index{index}, type{type}, is_bindless{is_bindless},
+ is_written{is_written}, is_read{is_read}, size{size} {}
- std::size_t GetOffset() const {
+ void MarkWrite() {
+ is_written = true;
+ }
+
+ void MarkRead() {
+ is_read = true;
+ }
+
+ void SetSize(Tegra::Shader::ImageAtomicSize size_) {
+ size = size_;
+ }
+
+ constexpr std::size_t GetOffset() const {
return offset;
}
- std::size_t GetIndex() const {
+ constexpr std::size_t GetIndex() const {
return index;
}
- Tegra::Shader::ImageType GetType() const {
+ constexpr Tegra::Shader::ImageType GetType() const {
return type;
}
- bool IsBindless() const {
+ constexpr bool IsBindless() const {
return is_bindless;
}
- bool operator<(const Image& rhs) const {
- return std::tie(offset, index, type, is_bindless) <
- std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_bindless);
+ constexpr bool IsWritten() const {
+ return is_written;
+ }
+
+ constexpr bool IsRead() const {
+ return is_read;
+ }
+
+ constexpr std::pair<u32, u32> GetBindlessCBuf() const {
+ return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)};
+ }
+
+ constexpr bool IsSizeKnown() const {
+ return size.has_value();
+ }
+
+ constexpr Tegra::Shader::ImageAtomicSize GetSize() const {
+ return size.value();
+ }
+
+ constexpr bool operator<(const Image& rhs) const {
+ return std::tie(offset, index, type, size, is_bindless) <
+ std::tie(rhs.offset, rhs.index, rhs.type, rhs.size, rhs.is_bindless);
}
private:
- std::size_t offset{};
+ u64 offset{};
std::size_t index{};
Tegra::Shader::ImageType type{};
bool is_bindless{};
+ bool is_written{};
+ bool is_read{};
+ std::optional<Tegra::Shader::ImageAtomicSize> size{};
};
struct GlobalMemoryBase {
@@ -536,6 +594,19 @@ private:
Node address;
};
+/// Shared memory node
+class SmemNode final {
+public:
+ explicit SmemNode(Node address) : address{std::move(address)} {}
+
+ const Node& GetAddress() const {
+ return address;
+ }
+
+private:
+ Node address;
+};
+
/// Global memory node
class GmemNode final {
public:
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 1e5c7f660..bbbab0bca 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -137,6 +137,10 @@ Node ShaderIR::GetLocalMemory(Node address) {
return MakeNode<LmemNode>(std::move(address));
}
+Node ShaderIR::GetSharedMemory(Node address) {
+ return MakeNode<SmemNode>(std::move(address));
+}
+
Node ShaderIR::GetTemporary(u32 id) {
return GetRegister(Register::ZeroIndex + 1 + id);
}
@@ -378,6 +382,11 @@ void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) {
Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value)));
}
+void ShaderIR::SetSharedMemory(NodeBlock& bb, Node address, Node value) {
+ bb.push_back(
+ Operation(OperationCode::Assign, GetSharedMemory(std::move(address)), std::move(value)));
+}
+
void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) {
SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value));
}
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index bcc9b79b6..6aed9bb84 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -95,7 +95,7 @@ public:
return used_samplers;
}
- const std::set<Image>& GetImages() const {
+ const std::map<u64, Image>& GetImages() const {
return used_images;
}
@@ -208,6 +208,8 @@ private:
Node GetInternalFlag(InternalFlag flag, bool negated = false);
/// Generates a node representing a local memory address
Node GetLocalMemory(Node address);
+ /// Generates a node representing a shared memory address
+ Node GetSharedMemory(Node address);
/// Generates a temporary, internally it uses a post-RZ register
Node GetTemporary(u32 id);
@@ -217,8 +219,10 @@ private:
void SetPredicate(NodeBlock& bb, u64 dest, Node src);
/// Sets an internal flag. src value must be a bool-evaluated node
void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value);
- /// Sets a local memory address. address and value must be a number-evaluated node
+ /// Sets a local memory address with a value.
void SetLocalMemory(NodeBlock& bb, Node address, Node value);
+ /// Sets a shared memory address with a value.
+ void SetSharedMemory(NodeBlock& bb, Node address, Node value);
/// Sets a temporary. Internally it uses a post-RZ register
void SetTemporary(NodeBlock& bb, u32 id, Node value);
@@ -272,10 +276,16 @@ private:
bool is_shadow);
/// Accesses an image.
- const Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
+ Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type,
+ std::optional<Tegra::Shader::ImageAtomicSize> size = {});
/// Access a bindless image sampler.
- const Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
+ Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type,
+ std::optional<Tegra::Shader::ImageAtomicSize> size = {});
+
+ /// Tries to access an existing image, updating it's state as needed
+ Image* TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type,
+ std::optional<Tegra::Shader::ImageAtomicSize> size);
/// Extracts a sequence of bits from a node
Node BitfieldExtract(Node value, u32 offset, u32 bits);
@@ -356,7 +366,7 @@ private:
std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
std::map<u32, ConstBuffer> used_cbufs;
std::set<Sampler> used_samplers;
- std::set<Image> used_images;
+ std::map<u64, Image> used_images;
std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
bool uses_layer{};
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 4ceb219be..53d0142cb 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -513,6 +513,26 @@ bool IsPixelFormatASTC(PixelFormat format) {
}
}
+bool IsPixelFormatSRGB(PixelFormat format) {
+ switch (format) {
+ case PixelFormat::RGBA8_SRGB:
+ case PixelFormat::BGRA8_SRGB:
+ case PixelFormat::DXT1_SRGB:
+ case PixelFormat::DXT23_SRGB:
+ case PixelFormat::DXT45_SRGB:
+ case PixelFormat::BC7U_SRGB:
+ case PixelFormat::ASTC_2D_4X4_SRGB:
+ case PixelFormat::ASTC_2D_8X8_SRGB:
+ case PixelFormat::ASTC_2D_8X5_SRGB:
+ case PixelFormat::ASTC_2D_5X4_SRGB:
+ case PixelFormat::ASTC_2D_5X5_SRGB:
+ case PixelFormat::ASTC_2D_10X8_SRGB:
+ return true;
+ default:
+ return false;
+ }
+}
+
std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)};
}
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 83f31c12c..19268b7cd 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -547,6 +547,8 @@ SurfaceType GetFormatType(PixelFormat pixel_format);
bool IsPixelFormatASTC(PixelFormat format);
+bool IsPixelFormatSRGB(PixelFormat format);
+
std::pair<u32, u32> GetASTCBlockSize(PixelFormat format);
/// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
index bcce8d863..5e497e49f 100644
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -195,18 +195,18 @@ public:
virtual void DownloadTexture(std::vector<u8>& staging_buffer) = 0;
- void MarkAsModified(const bool is_modified_, const u64 tick) {
+ void MarkAsModified(bool is_modified_, u64 tick) {
is_modified = is_modified_ || is_target;
modification_tick = tick;
}
- void MarkAsRenderTarget(const bool is_target, const u32 index) {
- this->is_target = is_target;
- this->index = index;
+ void MarkAsRenderTarget(bool is_target_, u32 index_) {
+ is_target = is_target_;
+ index = index_;
}
- void MarkAsPicked(const bool is_picked) {
- this->is_picked = is_picked;
+ void MarkAsPicked(bool is_picked_) {
+ is_picked = is_picked_;
}
bool IsModified() const {
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index fd5472451..1e4d3fb79 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -24,55 +24,62 @@ using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceTargetFromTextureType;
using VideoCore::Surface::SurfaceType;
-SurfaceTarget TextureType2SurfaceTarget(Tegra::Shader::TextureType type, bool is_array) {
+namespace {
+
+SurfaceTarget TextureTypeToSurfaceTarget(Tegra::Shader::TextureType type, bool is_array) {
switch (type) {
- case Tegra::Shader::TextureType::Texture1D: {
- if (is_array)
- return SurfaceTarget::Texture1DArray;
- else
- return SurfaceTarget::Texture1D;
- }
- case Tegra::Shader::TextureType::Texture2D: {
- if (is_array)
- return SurfaceTarget::Texture2DArray;
- else
- return SurfaceTarget::Texture2D;
- }
- case Tegra::Shader::TextureType::Texture3D: {
+ case Tegra::Shader::TextureType::Texture1D:
+ return is_array ? SurfaceTarget::Texture1DArray : SurfaceTarget::Texture1D;
+ case Tegra::Shader::TextureType::Texture2D:
+ return is_array ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
+ case Tegra::Shader::TextureType::Texture3D:
ASSERT(!is_array);
return SurfaceTarget::Texture3D;
- }
- case Tegra::Shader::TextureType::TextureCube: {
- if (is_array)
- return SurfaceTarget::TextureCubeArray;
- else
- return SurfaceTarget::TextureCubemap;
- }
- default: {
+ case Tegra::Shader::TextureType::TextureCube:
+ return is_array ? SurfaceTarget::TextureCubeArray : SurfaceTarget::TextureCubemap;
+ default:
UNREACHABLE();
return SurfaceTarget::Texture2D;
}
+}
+
+SurfaceTarget ImageTypeToSurfaceTarget(Tegra::Shader::ImageType type) {
+ switch (type) {
+ case Tegra::Shader::ImageType::Texture1D:
+ return SurfaceTarget::Texture1D;
+ case Tegra::Shader::ImageType::TextureBuffer:
+ return SurfaceTarget::TextureBuffer;
+ case Tegra::Shader::ImageType::Texture1DArray:
+ return SurfaceTarget::Texture1DArray;
+ case Tegra::Shader::ImageType::Texture2D:
+ return SurfaceTarget::Texture2D;
+ case Tegra::Shader::ImageType::Texture2DArray:
+ return SurfaceTarget::Texture2DArray;
+ case Tegra::Shader::ImageType::Texture3D:
+ return SurfaceTarget::Texture3D;
+ default:
+ UNREACHABLE();
+ return SurfaceTarget::Texture2D;
}
}
-namespace {
constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) {
return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile);
}
+
} // Anonymous namespace
-SurfaceParams SurfaceParams::CreateForTexture(Core::System& system,
- const Tegra::Texture::FullTextureInfo& config,
+SurfaceParams SurfaceParams::CreateForTexture(const Tegra::Texture::TICEntry& tic,
const VideoCommon::Shader::Sampler& entry) {
SurfaceParams params;
- params.is_tiled = config.tic.IsTiled();
- params.srgb_conversion = config.tic.IsSrgbConversionEnabled();
- params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0,
- params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0,
- params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0,
- params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1;
- params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(),
- params.srgb_conversion);
+ params.is_tiled = tic.IsTiled();
+ params.srgb_conversion = tic.IsSrgbConversionEnabled();
+ params.block_width = params.is_tiled ? tic.BlockWidth() : 0,
+ params.block_height = params.is_tiled ? tic.BlockHeight() : 0,
+ params.block_depth = params.is_tiled ? tic.BlockDepth() : 0,
+ params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
+ params.pixel_format =
+ PixelFormatFromTextureFormat(tic.format, tic.r_type.Value(), params.srgb_conversion);
params.type = GetFormatType(params.pixel_format);
if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) {
switch (params.pixel_format) {
@@ -92,31 +99,72 @@ SurfaceParams SurfaceParams::CreateForTexture(Core::System& system,
}
params.type = GetFormatType(params.pixel_format);
}
- params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value());
+ params.component_type = ComponentTypeFromTexture(tic.r_type.Value());
params.type = GetFormatType(params.pixel_format);
// TODO: on 1DBuffer we should use the tic info.
- if (!config.tic.IsBuffer()) {
- params.target = TextureType2SurfaceTarget(entry.GetType(), entry.IsArray());
- params.width = config.tic.Width();
- params.height = config.tic.Height();
- params.depth = config.tic.Depth();
- params.pitch = params.is_tiled ? 0 : config.tic.Pitch();
+ if (tic.IsBuffer()) {
+ params.target = SurfaceTarget::TextureBuffer;
+ params.width = tic.Width();
+ params.pitch = params.width * params.GetBytesPerPixel();
+ params.height = 1;
+ params.depth = 1;
+ params.num_levels = 1;
+ params.emulated_levels = 1;
+ params.is_layered = false;
+ } else {
+ params.target = TextureTypeToSurfaceTarget(entry.GetType(), entry.IsArray());
+ params.width = tic.Width();
+ params.height = tic.Height();
+ params.depth = tic.Depth();
+ params.pitch = params.is_tiled ? 0 : tic.Pitch();
if (params.target == SurfaceTarget::TextureCubemap ||
params.target == SurfaceTarget::TextureCubeArray) {
params.depth *= 6;
}
- params.num_levels = config.tic.max_mip_level + 1;
+ params.num_levels = tic.max_mip_level + 1;
params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap());
params.is_layered = params.IsLayered();
- } else {
+ }
+ return params;
+}
+
+SurfaceParams SurfaceParams::CreateForImage(const Tegra::Texture::TICEntry& tic,
+ const VideoCommon::Shader::Image& entry) {
+ SurfaceParams params;
+ params.is_tiled = tic.IsTiled();
+ params.srgb_conversion = tic.IsSrgbConversionEnabled();
+ params.block_width = params.is_tiled ? tic.BlockWidth() : 0,
+ params.block_height = params.is_tiled ? tic.BlockHeight() : 0,
+ params.block_depth = params.is_tiled ? tic.BlockDepth() : 0,
+ params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
+ params.pixel_format =
+ PixelFormatFromTextureFormat(tic.format, tic.r_type.Value(), params.srgb_conversion);
+ params.type = GetFormatType(params.pixel_format);
+ params.component_type = ComponentTypeFromTexture(tic.r_type.Value());
+ params.type = GetFormatType(params.pixel_format);
+ params.target = ImageTypeToSurfaceTarget(entry.GetType());
+ // TODO: on 1DBuffer we should use the tic info.
+ if (tic.IsBuffer()) {
params.target = SurfaceTarget::TextureBuffer;
- params.width = config.tic.Width();
+ params.width = tic.Width();
params.pitch = params.width * params.GetBytesPerPixel();
params.height = 1;
params.depth = 1;
params.num_levels = 1;
params.emulated_levels = 1;
params.is_layered = false;
+ } else {
+ params.width = tic.Width();
+ params.height = tic.Height();
+ params.depth = tic.Depth();
+ params.pitch = params.is_tiled ? 0 : tic.Pitch();
+ if (params.target == SurfaceTarget::TextureCubemap ||
+ params.target == SurfaceTarget::TextureCubeArray) {
+ params.depth *= 6;
+ }
+ params.num_levels = tic.max_mip_level + 1;
+ params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap());
+ params.is_layered = params.IsLayered();
}
return params;
}
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
index e7ef66ee2..c58e7f8a4 100644
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -4,8 +4,6 @@
#pragma once
-#include <map>
-
#include "common/alignment.h"
#include "common/bit_util.h"
#include "common/cityhash.h"
@@ -23,10 +21,13 @@ using VideoCore::Surface::SurfaceCompression;
class SurfaceParams {
public:
/// Creates SurfaceCachedParams from a texture configuration.
- static SurfaceParams CreateForTexture(Core::System& system,
- const Tegra::Texture::FullTextureInfo& config,
+ static SurfaceParams CreateForTexture(const Tegra::Texture::TICEntry& tic,
const VideoCommon::Shader::Sampler& entry);
+ /// Creates SurfaceCachedParams from an image configuration.
+ static SurfaceParams CreateForImage(const Tegra::Texture::TICEntry& tic,
+ const VideoCommon::Shader::Image& entry);
+
/// Creates SurfaceCachedParams for a depth buffer configuration.
static SurfaceParams CreateForDepthBuffer(
Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp
index 467696a4c..57a1f5803 100644
--- a/src/video_core/texture_cache/surface_view.cpp
+++ b/src/video_core/texture_cache/surface_view.cpp
@@ -10,7 +10,7 @@
namespace VideoCommon {
std::size_t ViewParams::Hash() const {
- return static_cast<std::size_t>(base_layer) ^ static_cast<std::size_t>(num_layers << 16) ^
+ return static_cast<std::size_t>(base_layer) ^ (static_cast<std::size_t>(num_layers) << 16) ^
(static_cast<std::size_t>(base_level) << 24) ^
(static_cast<std::size_t>(num_levels) << 32) ^ (static_cast<std::size_t>(target) << 36);
}
diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h
index 04ca5639b..b17fd11a9 100644
--- a/src/video_core/texture_cache/surface_view.h
+++ b/src/video_core/texture_cache/surface_view.h
@@ -13,8 +13,8 @@
namespace VideoCommon {
struct ViewParams {
- ViewParams(VideoCore::Surface::SurfaceTarget target, u32 base_layer, u32 num_layers,
- u32 base_level, u32 num_levels)
+ constexpr explicit ViewParams(VideoCore::Surface::SurfaceTarget target, u32 base_layer,
+ u32 num_layers, u32 base_level, u32 num_levels)
: target{target}, base_layer{base_layer}, num_layers{num_layers}, base_level{base_level},
num_levels{num_levels} {}
@@ -22,12 +22,6 @@ struct ViewParams {
bool operator==(const ViewParams& rhs) const;
- VideoCore::Surface::SurfaceTarget target{};
- u32 base_layer{};
- u32 num_layers{};
- u32 base_level{};
- u32 num_levels{};
-
bool IsLayered() const {
switch (target) {
case VideoCore::Surface::SurfaceTarget::Texture1DArray:
@@ -39,13 +33,19 @@ struct ViewParams {
return false;
}
}
+
+ VideoCore::Surface::SurfaceTarget target{};
+ u32 base_layer{};
+ u32 num_layers{};
+ u32 base_level{};
+ u32 num_levels{};
};
class ViewBase {
public:
- ViewBase(const ViewParams& params) : params{params} {}
+ constexpr explicit ViewBase(const ViewParams& params) : params{params} {}
- const ViewParams& GetViewParams() const {
+ constexpr const ViewParams& GetViewParams() const {
return params;
}
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 2ec0203d1..877c6635d 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -89,14 +89,29 @@ public:
}
}
- TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config,
+ TView GetTextureSurface(const Tegra::Texture::TICEntry& tic,
const VideoCommon::Shader::Sampler& entry) {
std::lock_guard lock{mutex};
- const auto gpu_addr{config.tic.Address()};
+ const auto gpu_addr{tic.Address()};
if (!gpu_addr) {
return {};
}
- const auto params{SurfaceParams::CreateForTexture(system, config, entry)};
+ const auto params{SurfaceParams::CreateForTexture(tic, entry)};
+ const auto [surface, view] = GetSurface(gpu_addr, params, true, false);
+ if (guard_samplers) {
+ sampled_textures.push_back(surface);
+ }
+ return view;
+ }
+
+ TView GetImageSurface(const Tegra::Texture::TICEntry& tic,
+ const VideoCommon::Shader::Image& entry) {
+ std::lock_guard lock{mutex};
+ const auto gpu_addr{tic.Address()};
+ if (!gpu_addr) {
+ return {};
+ }
+ const auto params{SurfaceParams::CreateForImage(tic, entry)};
const auto [surface, view] = GetSurface(gpu_addr, params, true, false);
if (guard_samplers) {
sampled_textures.push_back(surface);