diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/hle/service/acc/acc.cpp | 124 | ||||
-rw-r--r-- | src/core/hle/service/acc/acc.h | 1 | ||||
-rw-r--r-- | src/core/hle/service/acc/acc_su.cpp | 2 | ||||
-rw-r--r-- | src/core/hle/service/acc/profile_manager.cpp | 11 | ||||
-rw-r--r-- | src/core/hle/service/acc/profile_manager.h | 2 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 12 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 8 | ||||
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 32 | ||||
-rw-r--r-- | src/video_core/macro_interpreter.cpp | 4 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 145 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 20 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_device.cpp | 301 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_device.h | 62 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 48 | ||||
-rw-r--r-- | src/video_core/shader/decode/image.cpp | 92 | ||||
-rw-r--r-- | src/video_core/shader/node.h | 57 | ||||
-rw-r--r-- | src/video_core/shader/shader_ir.h | 10 | ||||
-rw-r--r-- | src/video_core/surface.cpp | 20 | ||||
-rw-r--r-- | src/video_core/surface.h | 2 |
19 files changed, 759 insertions, 194 deletions
diff --git a/src/core/hle/service/acc/acc.cpp b/src/core/hle/service/acc/acc.cpp index c01ee3eda..a7c55e116 100644 --- a/src/core/hle/service/acc/acc.cpp +++ b/src/core/hle/service/acc/acc.cpp @@ -31,6 +31,9 @@ namespace Service::Account { +constexpr ResultCode ERR_INVALID_BUFFER_SIZE{ErrorModule::Account, 30}; +constexpr ResultCode ERR_FAILED_SAVE_DATA{ErrorModule::Account, 100}; + static std::string GetImagePath(Common::UUID uuid) { return FileUtil::GetUserPath(FileUtil::UserPath::NANDDir) + "/system/save/8000000000000010/su/avators/" + uuid.FormatSwitch() + ".jpg"; @@ -41,20 +44,31 @@ static constexpr u32 SanitizeJPEGSize(std::size_t size) { return static_cast<u32>(std::min(size, max_jpeg_image_size)); } -class IProfile final : public ServiceFramework<IProfile> { +class IProfileCommon : public ServiceFramework<IProfileCommon> { public: - explicit IProfile(Common::UUID user_id, ProfileManager& profile_manager) - : ServiceFramework("IProfile"), profile_manager(profile_manager), user_id(user_id) { + explicit IProfileCommon(const char* name, bool editor_commands, Common::UUID user_id, + ProfileManager& profile_manager) + : ServiceFramework(name), profile_manager(profile_manager), user_id(user_id) { static const FunctionInfo functions[] = { - {0, &IProfile::Get, "Get"}, - {1, &IProfile::GetBase, "GetBase"}, - {10, &IProfile::GetImageSize, "GetImageSize"}, - {11, &IProfile::LoadImage, "LoadImage"}, + {0, &IProfileCommon::Get, "Get"}, + {1, &IProfileCommon::GetBase, "GetBase"}, + {10, &IProfileCommon::GetImageSize, "GetImageSize"}, + {11, &IProfileCommon::LoadImage, "LoadImage"}, }; + RegisterHandlers(functions); + + if (editor_commands) { + static const FunctionInfo editor_functions[] = { + {100, &IProfileCommon::Store, "Store"}, + {101, &IProfileCommon::StoreWithImage, "StoreWithImage"}, + }; + + RegisterHandlers(editor_functions); + } } -private: +protected: void Get(Kernel::HLERequestContext& ctx) { LOG_INFO(Service_ACC, "called user_id={}", user_id.Format()); ProfileBase profile_base{}; @@ -127,10 +141,91 @@ private: } } - const ProfileManager& profile_manager; + void Store(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto base = rp.PopRaw<ProfileBase>(); + + const auto user_data = ctx.ReadBuffer(); + + LOG_DEBUG(Service_ACC, "called, username='{}', timestamp={:016X}, uuid={}", + Common::StringFromFixedZeroTerminatedBuffer( + reinterpret_cast<const char*>(base.username.data()), base.username.size()), + base.timestamp, base.user_uuid.Format()); + + if (user_data.size() < sizeof(ProfileData)) { + LOG_ERROR(Service_ACC, "ProfileData buffer too small!"); + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ERR_INVALID_BUFFER_SIZE); + return; + } + + ProfileData data; + std::memcpy(&data, user_data.data(), sizeof(ProfileData)); + + if (!profile_manager.SetProfileBaseAndData(user_id, base, data)) { + LOG_ERROR(Service_ACC, "Failed to update profile data and base!"); + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ERR_FAILED_SAVE_DATA); + return; + } + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); + } + + void StoreWithImage(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + const auto base = rp.PopRaw<ProfileBase>(); + + const auto user_data = ctx.ReadBuffer(); + const auto image_data = ctx.ReadBuffer(1); + + LOG_DEBUG(Service_ACC, "called, username='{}', timestamp={:016X}, uuid={}", + Common::StringFromFixedZeroTerminatedBuffer( + reinterpret_cast<const char*>(base.username.data()), base.username.size()), + base.timestamp, base.user_uuid.Format()); + + if (user_data.size() < sizeof(ProfileData)) { + LOG_ERROR(Service_ACC, "ProfileData buffer too small!"); + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ERR_INVALID_BUFFER_SIZE); + return; + } + + ProfileData data; + std::memcpy(&data, user_data.data(), sizeof(ProfileData)); + + FileUtil::IOFile image(GetImagePath(user_id), "wb"); + + if (!image.IsOpen() || !image.Resize(image_data.size()) || + image.WriteBytes(image_data.data(), image_data.size()) != image_data.size() || + !profile_manager.SetProfileBaseAndData(user_id, base, data)) { + LOG_ERROR(Service_ACC, "Failed to update profile data, base, and image!"); + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ERR_FAILED_SAVE_DATA); + return; + } + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(RESULT_SUCCESS); + } + + ProfileManager& profile_manager; Common::UUID user_id; ///< The user id this profile refers to. }; +class IProfile final : public IProfileCommon { +public: + IProfile(Common::UUID user_id, ProfileManager& profile_manager) + : IProfileCommon("IProfile", false, user_id, profile_manager) {} +}; + +class IProfileEditor final : public IProfileCommon { +public: + IProfileEditor(Common::UUID user_id, ProfileManager& profile_manager) + : IProfileCommon("IProfileEditor", true, user_id, profile_manager) {} +}; + class IManagerForApplication final : public ServiceFramework<IManagerForApplication> { public: IManagerForApplication() : ServiceFramework("IManagerForApplication") { @@ -322,6 +417,17 @@ void Module::Interface::IsUserAccountSwitchLocked(Kernel::HLERequestContext& ctx rb.Push(is_locked); } +void Module::Interface::GetProfileEditor(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + Common::UUID user_id = rp.PopRaw<Common::UUID>(); + + LOG_DEBUG(Service_ACC, "called, user_id={}", user_id.Format()); + + IPC::ResponseBuilder rb{ctx, 2, 0, 1}; + rb.Push(RESULT_SUCCESS); + rb.PushIpcInterface<IProfileEditor>(user_id, *profile_manager); +} + void Module::Interface::TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx) { LOG_DEBUG(Service_ACC, "called"); // A u8 is passed into this function which we can safely ignore. It's to determine if we have diff --git a/src/core/hle/service/acc/acc.h b/src/core/hle/service/acc/acc.h index f651773b7..7a7dc9ec6 100644 --- a/src/core/hle/service/acc/acc.h +++ b/src/core/hle/service/acc/acc.h @@ -32,6 +32,7 @@ public: void IsUserRegistrationRequestPermitted(Kernel::HLERequestContext& ctx); void TrySelectUserWithoutInteraction(Kernel::HLERequestContext& ctx); void IsUserAccountSwitchLocked(Kernel::HLERequestContext& ctx); + void GetProfileEditor(Kernel::HLERequestContext& ctx); private: ResultCode InitializeApplicationInfoBase(u64 process_id); diff --git a/src/core/hle/service/acc/acc_su.cpp b/src/core/hle/service/acc/acc_su.cpp index 1b7ec3ed0..0d1663657 100644 --- a/src/core/hle/service/acc/acc_su.cpp +++ b/src/core/hle/service/acc/acc_su.cpp @@ -41,7 +41,7 @@ ACC_SU::ACC_SU(std::shared_ptr<Module> module, std::shared_ptr<ProfileManager> p {202, nullptr, "CancelUserRegistration"}, {203, nullptr, "DeleteUser"}, {204, nullptr, "SetUserPosition"}, - {205, nullptr, "GetProfileEditor"}, + {205, &ACC_SU::GetProfileEditor, "GetProfileEditor"}, {206, nullptr, "CompleteUserRegistrationForcibly"}, {210, nullptr, "CreateFloatingRegistrationRequest"}, {230, nullptr, "AuthenticateServiceAsync"}, diff --git a/src/core/hle/service/acc/profile_manager.cpp b/src/core/hle/service/acc/profile_manager.cpp index 49aa5908b..8f9986326 100644 --- a/src/core/hle/service/acc/profile_manager.cpp +++ b/src/core/hle/service/acc/profile_manager.cpp @@ -305,6 +305,17 @@ bool ProfileManager::SetProfileBase(UUID uuid, const ProfileBase& profile_new) { return true; } +bool ProfileManager::SetProfileBaseAndData(Common::UUID uuid, const ProfileBase& profile_new, + const ProfileData& data_new) { + const auto index = GetUserIndex(uuid); + if (index.has_value() && SetProfileBase(uuid, profile_new)) { + profiles[*index].data = data_new; + return true; + } + + return false; +} + void ProfileManager::ParseUserSaveFile() { FileUtil::IOFile save(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir) + ACC_SAVE_AVATORS_BASE_PATH + "profiles.dat", diff --git a/src/core/hle/service/acc/profile_manager.h b/src/core/hle/service/acc/profile_manager.h index fd7abb541..5a6d28925 100644 --- a/src/core/hle/service/acc/profile_manager.h +++ b/src/core/hle/service/acc/profile_manager.h @@ -91,6 +91,8 @@ public: bool RemoveUser(Common::UUID uuid); bool SetProfileBase(Common::UUID uuid, const ProfileBase& profile_new); + bool SetProfileBaseAndData(Common::UUID uuid, const ProfileBase& profile_new, + const ProfileData& data_new); private: void ParseUserSaveFile(); diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 965c4c06b..c7a3c85a0 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -332,6 +332,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { ProcessMacroBind(method_call.argument); break; } + case MAXWELL3D_REG_INDEX(firmware[4]): { + ProcessFirmwareCall4(); + break; + } case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]): case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]): case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]): @@ -422,6 +426,14 @@ void Maxwell3D::ProcessMacroBind(u32 data) { macro_positions[regs.macros.entry++] = data; } +void Maxwell3D::ProcessFirmwareCall4() { + LOG_WARNING(HW_GPU, "(STUBBED) called"); + + // Firmware call 4 is a blob that changes some registers depending on its parameters. + // These registers don't affect emulation and so are stubbed by setting 0xd00 to 1. + regs.reg_array[0xd00] = 1; +} + void Maxwell3D::ProcessQueryGet() { const GPUVAddr sequence_address{regs.query.QueryAddress()}; // Since the sequence address is given as a GPU VAddr, we have to convert it to an application diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index f67a5389f..e5ec90717 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1089,7 +1089,9 @@ public: INSERT_PADDING_WORDS(14); } shader_config[MaxShaderProgram]; - INSERT_PADDING_WORDS(0x80); + INSERT_PADDING_WORDS(0x60); + + u32 firmware[0x20]; struct { u32 cb_size; @@ -1319,6 +1321,9 @@ private: /// Handles writes to the macro bind register. void ProcessMacroBind(u32 data); + /// Handles firmware blob 4 + void ProcessFirmwareCall4(); + /// Handles a write to the CLEAR_BUFFERS register. void ProcessClearBuffers(); @@ -1431,6 +1436,7 @@ ASSERT_REG_POSITION(vertex_array[0], 0x700); ASSERT_REG_POSITION(independent_blend, 0x780); ASSERT_REG_POSITION(vertex_array_limit[0], 0x7C0); ASSERT_REG_POSITION(shader_config[0], 0x800); +ASSERT_REG_POSITION(firmware, 0x8C0); ASSERT_REG_POSITION(const_buffer, 0x8E0); ASSERT_REG_POSITION(cb_bind[0], 0x904); ASSERT_REG_POSITION(tex_cb_index, 0x982); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index bd8c1ada0..052e6d24e 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -544,6 +544,28 @@ enum class VoteOperation : u64 { Eq = 2, // allThreadsEqualNV }; +enum class ImageAtomicSize : u64 { + U32 = 0, + S32 = 1, + U64 = 2, + F32 = 3, + S64 = 5, + SD32 = 6, + SD64 = 7, +}; + +enum class ImageAtomicOperation : u64 { + Add = 0, + Min = 1, + Max = 2, + Inc = 3, + Dec = 4, + And = 5, + Or = 6, + Xor = 7, + Exch = 8, +}; + union Instruction { Instruction& operator=(const Instruction& instr) { value = instr.value; @@ -1392,6 +1414,14 @@ union Instruction { } sust; union { + BitField<28, 1, u64> is_ba; + BitField<51, 3, ImageAtomicSize> size; + BitField<33, 3, ImageType> image_type; + BitField<29, 4, ImageAtomicOperation> operation; + BitField<49, 2, OutOfBoundsStore> out_of_bounds_store; + } suatom_d; + + union { BitField<20, 24, u64> target; BitField<5, 1, u64> constant_buffer; @@ -1543,6 +1573,7 @@ public: TMML_B, // Texture Mip Map Level TMML, // Texture Mip Map Level SUST, // Surface Store + SUATOM, // Surface Atomic Operation EXIT, NOP, IPA, @@ -1826,6 +1857,7 @@ private: INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"), INST("1101111101011---", Id::TMML, Type::Texture, "TMML"), INST("11101011001-----", Id::SUST, Type::Image, "SUST"), + INST("1110101000------", Id::SUATOM, Type::Image, "SUATOM_D"), INST("0101000010110---", Id::NOP, Type::Trivial, "NOP"), INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index 4e1cb98db..62afc0d11 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp @@ -131,9 +131,7 @@ bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) { // An instruction with the Exit flag will not actually // cause an exit if it's executed inside a delay slot. - // TODO(Blinkhawk): Reversed to always exit. The behavior explained above requires further - // testing on the MME code. - if (opcode.is_exit) { + if (opcode.is_exit && !is_delay_slot) { // Exit has a delay slot, execute the next instruction Step(offset, true); return false; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 6edb2ca38..137b23740 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -706,7 +706,7 @@ private: void DeclareImages() { const auto& images{ir.GetImages()}; for (const auto& [offset, image] : images) { - const std::string image_type = [&]() { + const char* image_type = [&] { switch (image.GetType()) { case Tegra::Shader::ImageType::Texture1D: return "image1D"; @@ -725,6 +725,23 @@ private: return "image1D"; } }(); + + const auto [type_prefix, format] = [&]() -> std::pair<const char*, const char*> { + if (!image.IsSizeKnown()) { + return {"", ""}; + } + switch (image.GetSize()) { + case Tegra::Shader::ImageAtomicSize::U32: + return {"u", "r32ui, "}; + case Tegra::Shader::ImageAtomicSize::S32: + return {"i", "r32i, "}; + default: + UNIMPLEMENTED_MSG("Unimplemented atomic size={}", + static_cast<u32>(image.GetSize())); + return {"", ""}; + } + }(); + std::string qualifier = "coherent volatile"; if (image.IsRead() && !image.IsWritten()) { qualifier += " readonly"; @@ -1180,6 +1197,74 @@ private: return expr; } + std::string BuildIntegerCoordinates(Operation operation) { + constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; + const std::size_t coords_count{operation.GetOperandsCount()}; + std::string expr = constructors.at(coords_count - 1); + for (std::size_t i = 0; i < coords_count; ++i) { + expr += VisitOperand(operation, i).AsInt(); + if (i + 1 < coords_count) { + expr += ", "; + } + } + expr += ')'; + return expr; + } + + std::string BuildImageValues(Operation operation) { + const auto meta{std::get<MetaImage>(operation.GetMeta())}; + const auto [constructors, type] = [&]() -> std::pair<std::array<const char*, 4>, Type> { + constexpr std::array float_constructors{"float", "vec2", "vec3", "vec4"}; + if (!meta.image.IsSizeKnown()) { + return {float_constructors, Type::Float}; + } + switch (meta.image.GetSize()) { + case Tegra::Shader::ImageAtomicSize::U32: + return {{"uint", "uvec2", "uvec3", "uvec4"}, Type::Uint}; + case Tegra::Shader::ImageAtomicSize::S32: + return {{"int", "ivec2", "ivec3", "ivec4"}, Type::Uint}; + default: + UNIMPLEMENTED_MSG("Unimplemented image size={}", + static_cast<u32>(meta.image.GetSize())); + return {float_constructors, Type::Float}; + } + }(); + + const std::size_t values_count{meta.values.size()}; + std::string expr = fmt::format("{}(", constructors.at(values_count - 1)); + for (std::size_t i = 0; i < values_count; ++i) { + expr += Visit(meta.values.at(i)).As(type); + if (i + 1 < values_count) { + expr += ", "; + } + } + expr += ')'; + return expr; + } + + Expression AtomicImage(Operation operation, const char* opname) { + constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; + const auto meta{std::get<MetaImage>(operation.GetMeta())}; + ASSERT(meta.values.size() == 1); + ASSERT(meta.image.IsSizeKnown()); + + const auto type = [&]() { + switch (const auto size = meta.image.GetSize()) { + case Tegra::Shader::ImageAtomicSize::U32: + return Type::Uint; + case Tegra::Shader::ImageAtomicSize::S32: + return Type::Int; + default: + UNIMPLEMENTED_MSG("Unimplemented image size={}", static_cast<u32>(size)); + return Type::Uint; + } + }(); + + return {fmt::format("{}({}, {}, {})", opname, GetImage(meta.image), + BuildIntegerCoordinates(operation), Visit(meta.values[0]).As(type)), + type}; + } + Expression Assign(Operation operation) { const Node& dest = operation[0]; const Node& src = operation[1]; @@ -1694,36 +1779,37 @@ private: } Expression ImageStore(Operation operation) { - constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("}; const auto meta{std::get<MetaImage>(operation.GetMeta())}; + code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), + BuildIntegerCoordinates(operation), BuildImageValues(operation)); + return {}; + } - std::string expr = "imageStore("; - expr += GetImage(meta.image); - expr += ", "; + Expression AtomicImageAdd(Operation operation) { + return AtomicImage(operation, "imageAtomicAdd"); + } - const std::size_t coords_count{operation.GetOperandsCount()}; - expr += constructors.at(coords_count - 1); - for (std::size_t i = 0; i < coords_count; ++i) { - expr += VisitOperand(operation, i).AsInt(); - if (i + 1 < coords_count) { - expr += ", "; - } - } - expr += "), "; + Expression AtomicImageMin(Operation operation) { + return AtomicImage(operation, "imageAtomicMin"); + } - const std::size_t values_count{meta.values.size()}; - UNIMPLEMENTED_IF(values_count != 4); - expr += "vec4("; - for (std::size_t i = 0; i < values_count; ++i) { - expr += Visit(meta.values.at(i)).AsFloat(); - if (i + 1 < values_count) { - expr += ", "; - } - } - expr += "));"; + Expression AtomicImageMax(Operation operation) { + return AtomicImage(operation, "imageAtomicMax"); + } + Expression AtomicImageAnd(Operation operation) { + return AtomicImage(operation, "imageAtomicAnd"); + } - code.AddLine(expr); - return {}; + Expression AtomicImageOr(Operation operation) { + return AtomicImage(operation, "imageAtomicOr"); + } + + Expression AtomicImageXor(Operation operation) { + return AtomicImage(operation, "imageAtomicXor"); + } + + Expression AtomicImageExchange(Operation operation) { + return AtomicImage(operation, "imageAtomicExchange"); } Expression Branch(Operation operation) { @@ -2019,6 +2105,13 @@ private: &GLSLDecompiler::TexelFetch, &GLSLDecompiler::ImageStore, + &GLSLDecompiler::AtomicImageAdd, + &GLSLDecompiler::AtomicImageMin, + &GLSLDecompiler::AtomicImageMax, + &GLSLDecompiler::AtomicImageAnd, + &GLSLDecompiler::AtomicImageOr, + &GLSLDecompiler::AtomicImageXor, + &GLSLDecompiler::AtomicImageExchange, &GLSLDecompiler::Branch, &GLSLDecompiler::BranchIndirect, diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 5450feedf..f141c4e3b 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -341,16 +341,22 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn u64 index{}; u32 type{}; u8 is_bindless{}; - u8 is_read{}; u8 is_written{}; + u8 is_read{}; + u8 is_size_known{}; + u32 size{}; if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) || - !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) { + !LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) || + !LoadObjectFromPrecompiled(is_size_known) || !LoadObjectFromPrecompiled(size)) { return {}; } - entry.entries.images.emplace_back(static_cast<u64>(offset), static_cast<std::size_t>(index), - static_cast<Tegra::Shader::ImageType>(type), - is_bindless != 0, is_written != 0, is_read != 0); + entry.entries.images.emplace_back( + static_cast<std::size_t>(offset), static_cast<std::size_t>(index), + static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0, is_written != 0, + is_read != 0, + is_size_known ? std::make_optional(static_cast<Tegra::Shader::ImageAtomicSize>(size)) + : std::nullopt); } u32 global_memory_count{}; @@ -429,12 +435,14 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: return false; } for (const auto& image : entries.images) { + const u32 size = image.IsSizeKnown() ? static_cast<u32>(image.GetSize()) : 0U; if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) || !SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) || !SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) || !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) || + !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0)) || !SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) || - !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0))) { + !SaveObjectToPrecompiled(image.IsSizeKnown()) || !SaveObjectToPrecompiled(size)) { return false; } } diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 3b966ddc3..897cbb4e8 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -2,9 +2,10 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <map> +#include <bitset> #include <optional> #include <set> +#include <string_view> #include <vector> #include "common/assert.h" #include "video_core/renderer_vulkan/declarations.h" @@ -12,13 +13,32 @@ namespace Vulkan { +namespace { + +template <typename T> +void SetNext(void**& next, T& data) { + *next = &data; + next = &data.pNext; +} + +template <typename T> +T GetFeatures(vk::PhysicalDevice physical, vk::DispatchLoaderDynamic dldi) { + vk::PhysicalDeviceFeatures2 features; + T extension_features; + features.pNext = &extension_features; + physical.getFeatures2(&features, dldi); + return extension_features; +} + +} // Anonymous namespace + namespace Alternatives { -constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = { - vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}}; -constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = { - vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}}; -constexpr std::array<vk::Format, 2> Astc = {vk::Format::eA8B8G8R8UnormPack32, {}}; +constexpr std::array Depth24UnormS8Uint = {vk::Format::eD32SfloatS8Uint, + vk::Format::eD16UnormS8Uint, vk::Format{}}; +constexpr std::array Depth16UnormS8Uint = {vk::Format::eD24UnormS8Uint, + vk::Format::eD32SfloatS8Uint, vk::Format{}}; +constexpr std::array Astc = {vk::Format::eA8B8G8R8UnormPack32, vk::Format{}}; } // namespace Alternatives @@ -58,16 +78,53 @@ VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice phy VKDevice::~VKDevice() = default; bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) { - vk::PhysicalDeviceFeatures device_features; - device_features.vertexPipelineStoresAndAtomics = true; - device_features.independentBlend = true; - device_features.textureCompressionASTC_LDR = is_optimal_astc_supported; - const auto queue_cis = GetDeviceQueueCreateInfos(); - const std::vector<const char*> extensions = LoadExtensions(dldi); - const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), - 0, nullptr, static_cast<u32>(extensions.size()), - extensions.data(), &device_features); + const std::vector extensions = LoadExtensions(dldi); + + vk::PhysicalDeviceFeatures2 features2; + void** next = &features2.pNext; + auto& features = features2.features; + features.vertexPipelineStoresAndAtomics = true; + features.independentBlend = true; + features.depthClamp = true; + features.samplerAnisotropy = true; + features.largePoints = true; + features.textureCompressionASTC_LDR = is_optimal_astc_supported; + + vk::PhysicalDeviceVertexAttributeDivisorFeaturesEXT vertex_divisor; + vertex_divisor.vertexAttributeInstanceRateDivisor = true; + vertex_divisor.vertexAttributeInstanceRateZeroDivisor = true; + SetNext(next, vertex_divisor); + + vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8; + if (is_float16_supported) { + float16_int8.shaderFloat16 = true; + SetNext(next, float16_int8); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively"); + } + + vk::PhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout; + if (khr_uniform_buffer_standard_layout) { + std430_layout.uniformBufferStandardLayout = true; + SetNext(next, std430_layout); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support packed UBOs"); + } + + vk::PhysicalDeviceIndexTypeUint8FeaturesEXT index_type_uint8; + if (ext_index_type_uint8) { + index_type_uint8.indexTypeUint8 = true; + SetNext(next, index_type_uint8); + } else { + LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes"); + } + + vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(), 0, + nullptr, static_cast<u32>(extensions.size()), extensions.data(), + nullptr); + device_ci.pNext = &features2; + vk::Device dummy_logical; if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) { LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!"); @@ -78,6 +135,17 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan logical = UniqueDevice( dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld)); + if (khr_driver_properties) { + vk::PhysicalDeviceDriverPropertiesKHR driver; + vk::PhysicalDeviceProperties2 properties; + properties.pNext = &driver; + physical.getProperties2(&properties, dld); + driver_id = driver.driverID; + LOG_INFO(Render_Vulkan, "Driver: {} {}", driver.driverName, driver.driverInfo); + } else { + LOG_INFO(Render_Vulkan, "Driver: Unknown"); + } + graphics_queue = logical->getQueue(graphics_family, 0, dld); present_queue = logical->getQueue(present_family, 0, dld); return true; @@ -92,20 +160,19 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, // The wanted format is not supported by hardware, search for alternatives const vk::Format* alternatives = GetFormatAlternatives(wanted_format); if (alternatives == nullptr) { - LOG_CRITICAL(Render_Vulkan, - "Format={} with usage={} and type={} has no defined alternatives and host " - "hardware does not support it", - vk::to_string(wanted_format), vk::to_string(wanted_usage), - static_cast<u32>(format_type)); - UNREACHABLE(); + UNREACHABLE_MSG("Format={} with usage={} and type={} has no defined alternatives and host " + "hardware does not support it", + vk::to_string(wanted_format), vk::to_string(wanted_usage), + static_cast<u32>(format_type)); return wanted_format; } std::size_t i = 0; for (vk::Format alternative = alternatives[0]; alternative != vk::Format{}; alternative = alternatives[++i]) { - if (!IsFormatSupported(alternative, wanted_usage, format_type)) + if (!IsFormatSupported(alternative, wanted_usage, format_type)) { continue; + } LOG_WARNING(Render_Vulkan, "Emulating format={} with alternative format={} with usage={} and type={}", static_cast<u32>(wanted_format), static_cast<u32>(alternative), @@ -114,12 +181,10 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, } // No alternatives found, panic - LOG_CRITICAL(Render_Vulkan, - "Format={} with usage={} and type={} is not supported by the host hardware and " - "doesn't support any of the alternatives", - static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage), - static_cast<u32>(format_type)); - UNREACHABLE(); + UNREACHABLE_MSG("Format={} with usage={} and type={} is not supported by the host hardware and " + "doesn't support any of the alternatives", + static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage), + static_cast<u32>(format_type)); return wanted_format; } @@ -132,7 +197,7 @@ bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc | vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc | vk::FormatFeatureFlagBits::eTransferDst}; - constexpr std::array<vk::Format, 9> astc_formats = { + constexpr std::array astc_formats = { vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock, vk::Format::eAstc8x8SrgbBlock, vk::Format::eAstc8x6SrgbBlock, vk::Format::eAstc5x4SrgbBlock, vk::Format::eAstc5x5UnormBlock, @@ -151,76 +216,120 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag FormatType format_type) const { const auto it = format_properties.find(wanted_format); if (it == format_properties.end()) { - LOG_CRITICAL(Render_Vulkan, "Unimplemented format query={}", vk::to_string(wanted_format)); - UNREACHABLE(); + UNIMPLEMENTED_MSG("Unimplemented format query={}", vk::to_string(wanted_format)); return true; } - const vk::FormatFeatureFlags supported_usage = GetFormatFeatures(it->second, format_type); + const auto supported_usage = GetFormatFeatures(it->second, format_type); return (supported_usage & wanted_usage) == wanted_usage; } bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, vk::SurfaceKHR surface) { - bool has_swapchain{}; + LOG_INFO(Render_Vulkan, "{}", physical.getProperties(dldi).deviceName); + bool is_suitable = true; + + constexpr std::array required_extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME, + VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME}; + std::bitset<required_extensions.size()> available_extensions{}; + for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { - has_swapchain |= prop.extensionName == std::string(VK_KHR_SWAPCHAIN_EXTENSION_NAME); + for (std::size_t i = 0; i < required_extensions.size(); ++i) { + if (available_extensions[i]) { + continue; + } + available_extensions[i] = + required_extensions[i] == std::string_view{prop.extensionName}; + } } - if (!has_swapchain) { - // The device doesn't support creating swapchains. - return false; + if (!available_extensions.all()) { + for (std::size_t i = 0; i < required_extensions.size(); ++i) { + if (available_extensions[i]) { + continue; + } + LOG_INFO(Render_Vulkan, "Missing required extension: {}", required_extensions[i]); + is_suitable = false; + } } bool has_graphics{}, has_present{}; const auto queue_family_properties = physical.getQueueFamilyProperties(dldi); for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { const auto& family = queue_family_properties[i]; - if (family.queueCount == 0) + if (family.queueCount == 0) { continue; - + } has_graphics |= (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0); has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0; } if (!has_graphics || !has_present) { - // The device doesn't have a graphics and present queue. - return false; + LOG_INFO(Render_Vulkan, "Device lacks a graphics and present queue"); + is_suitable = false; } // TODO(Rodrigo): Check if the device matches all requeriments. const auto properties{physical.getProperties(dldi)}; - const auto limits{properties.limits}; - if (limits.maxUniformBufferRange < 65536) { - return false; + const auto& limits{properties.limits}; + + constexpr u32 required_ubo_size = 65536; + if (limits.maxUniformBufferRange < required_ubo_size) { + LOG_INFO(Render_Vulkan, "Device UBO size {} is too small, {} is required)", + limits.maxUniformBufferRange, required_ubo_size); + is_suitable = false; } - const vk::PhysicalDeviceFeatures features{physical.getFeatures(dldi)}; - if (!features.vertexPipelineStoresAndAtomics || !features.independentBlend) { - return false; + const auto features{physical.getFeatures(dldi)}; + const std::array feature_report = { + std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), + std::make_pair(features.independentBlend, "independentBlend"), + std::make_pair(features.depthClamp, "depthClamp"), + std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"), + std::make_pair(features.largePoints, "largePoints"), + }; + for (const auto& [supported, name] : feature_report) { + if (supported) { + continue; + } + LOG_INFO(Render_Vulkan, "Missing required feature: {}", name); + is_suitable = false; } - // Device is suitable. - return true; + return is_suitable; } std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) { std::vector<const char*> extensions; - extensions.reserve(2); + extensions.reserve(7); extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); + extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME); const auto Test = [&](const vk::ExtensionProperties& extension, std::optional<std::reference_wrapper<bool>> status, const char* name, - u32 revision) { - if (extension.extensionName != std::string(name)) { + bool push) { + if (extension.extensionName != std::string_view(name)) { return; } - extensions.push_back(name); + if (push) { + extensions.push_back(name); + } if (status) { status->get() = true; } }; + bool khr_shader_float16_int8{}; for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { - Test(extension, ext_scalar_block_layout, VK_EXT_SCALAR_BLOCK_LAYOUT_EXTENSION_NAME, 1); + Test(extension, khr_uniform_buffer_standard_layout, + VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); + Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); + Test(extension, khr_driver_properties, VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, true); + Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); + } + + if (khr_shader_float16_int8) { + is_float16_supported = + GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16; + extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); } return extensions; @@ -250,9 +359,10 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK } void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) { - const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); + const auto props = physical.getProperties(dldi); device_type = props.deviceType; uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment); + storage_buffer_alignment = static_cast<u64>(props.limits.minStorageBufferOffsetAlignment); max_storage_buffer_range = static_cast<u64>(props.limits.maxStorageBufferRange); } @@ -273,42 +383,53 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con return queue_cis; } -std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( +std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) { - static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32, - vk::Format::eB5G6R5UnormPack16, - vk::Format::eA2B10G10R10UnormPack32, - vk::Format::eR32G32B32A32Sfloat, - vk::Format::eR16G16Unorm, - vk::Format::eR16G16Snorm, - vk::Format::eR8G8B8A8Srgb, - vk::Format::eR8Unorm, - vk::Format::eB10G11R11UfloatPack32, - vk::Format::eR32Sfloat, - vk::Format::eR16Sfloat, - vk::Format::eR16G16B16A16Sfloat, - vk::Format::eD32Sfloat, - vk::Format::eD16Unorm, - vk::Format::eD16UnormS8Uint, - vk::Format::eD24UnormS8Uint, - vk::Format::eD32SfloatS8Uint, - vk::Format::eBc1RgbaUnormBlock, - vk::Format::eBc2UnormBlock, - vk::Format::eBc3UnormBlock, - vk::Format::eBc4UnormBlock, - vk::Format::eBc5UnormBlock, - vk::Format::eBc5SnormBlock, - vk::Format::eBc7UnormBlock, - vk::Format::eAstc4x4UnormBlock, - vk::Format::eAstc4x4SrgbBlock, - vk::Format::eAstc8x8SrgbBlock, - vk::Format::eAstc8x6SrgbBlock, - vk::Format::eAstc5x4SrgbBlock, - vk::Format::eAstc5x5UnormBlock, - vk::Format::eAstc5x5SrgbBlock, - vk::Format::eAstc10x8UnormBlock, - vk::Format::eAstc10x8SrgbBlock}; - std::map<vk::Format, vk::FormatProperties> format_properties; + constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32, + vk::Format::eA8B8G8R8SnormPack32, + vk::Format::eA8B8G8R8SrgbPack32, + vk::Format::eB5G6R5UnormPack16, + vk::Format::eA2B10G10R10UnormPack32, + vk::Format::eR32G32B32A32Sfloat, + vk::Format::eR16G16B16A16Uint, + vk::Format::eR16G16Unorm, + vk::Format::eR16G16Snorm, + vk::Format::eR16G16Sfloat, + vk::Format::eR16Unorm, + vk::Format::eR8G8B8A8Srgb, + vk::Format::eR8G8Unorm, + vk::Format::eR8G8Snorm, + vk::Format::eR8Unorm, + vk::Format::eB10G11R11UfloatPack32, + vk::Format::eR32Sfloat, + vk::Format::eR16Sfloat, + vk::Format::eR16G16B16A16Sfloat, + vk::Format::eB8G8R8A8Unorm, + vk::Format::eD32Sfloat, + vk::Format::eD16Unorm, + vk::Format::eD16UnormS8Uint, + vk::Format::eD24UnormS8Uint, + vk::Format::eD32SfloatS8Uint, + vk::Format::eBc1RgbaUnormBlock, + vk::Format::eBc2UnormBlock, + vk::Format::eBc3UnormBlock, + vk::Format::eBc4UnormBlock, + vk::Format::eBc5UnormBlock, + vk::Format::eBc5SnormBlock, + vk::Format::eBc7UnormBlock, + vk::Format::eBc1RgbaSrgbBlock, + vk::Format::eBc3SrgbBlock, + vk::Format::eBc7SrgbBlock, + vk::Format::eAstc4x4UnormBlock, + vk::Format::eAstc4x4SrgbBlock, + vk::Format::eAstc8x8SrgbBlock, + vk::Format::eAstc8x6SrgbBlock, + vk::Format::eAstc5x4SrgbBlock, + vk::Format::eAstc5x5UnormBlock, + vk::Format::eAstc5x5SrgbBlock, + vk::Format::eAstc10x8UnormBlock, + vk::Format::eAstc10x8SrgbBlock}; + std::unordered_map<vk::Format, vk::FormatProperties> format_properties; for (const auto format : formats) { format_properties.emplace(format, physical.getFormatProperties(format, dldi)); } diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 537825d8b..010d4c3d6 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -4,7 +4,7 @@ #pragma once -#include <map> +#include <unordered_map> #include <vector> #include "common/common_types.h" #include "video_core/renderer_vulkan/declarations.h" @@ -69,16 +69,26 @@ public: return present_family; } - /// Returns if the device is integrated with the host CPU. + /// Returns true if the device is integrated with the host CPU. bool IsIntegrated() const { return device_type == vk::PhysicalDeviceType::eIntegratedGpu; } + /// Returns the driver ID. + vk::DriverIdKHR GetDriverID() const { + return driver_id; + } + /// Returns uniform buffer alignment requeriment. u64 GetUniformBufferAlignment() const { return uniform_buffer_alignment; } + /// Returns storage alignment requeriment. + u64 GetStorageBufferAlignment() const { + return storage_buffer_alignment; + } + /// Returns the maximum range for storage buffers. u64 GetMaxStorageBufferRange() const { return max_storage_buffer_range; @@ -89,9 +99,19 @@ public: return is_optimal_astc_supported; } + /// Returns true if the device supports float16 natively + bool IsFloat16Supported() const { + return is_float16_supported; + } + /// Returns true if the device supports VK_EXT_scalar_block_layout. - bool IsExtScalarBlockLayoutSupported() const { - return ext_scalar_block_layout; + bool IsKhrUniformBufferStandardLayoutSupported() const { + return khr_uniform_buffer_standard_layout; + } + + /// Returns true if the device supports VK_EXT_index_type_uint8. + bool IsExtIndexTypeUint8Supported() const { + return ext_index_type_uint8; } /// Checks if the physical device is suitable. @@ -123,22 +143,28 @@ private: FormatType format_type) const; /// Returns the device properties for Vulkan formats. - static std::map<vk::Format, vk::FormatProperties> GetFormatProperties( + static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); - const vk::PhysicalDevice physical; ///< Physical device. - vk::DispatchLoaderDynamic dld; ///< Device function pointers. - UniqueDevice logical; ///< Logical device. - vk::Queue graphics_queue; ///< Main graphics queue. - vk::Queue present_queue; ///< Main present queue. - u32 graphics_family{}; ///< Main graphics queue family index. - u32 present_family{}; ///< Main present queue family index. - vk::PhysicalDeviceType device_type; ///< Physical device type. - u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment. - u64 max_storage_buffer_range{}; ///< Max storage buffer size. - bool is_optimal_astc_supported{}; ///< Support for native ASTC. - bool ext_scalar_block_layout{}; ///< Support for VK_EXT_scalar_block_layout. - std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary. + const vk::PhysicalDevice physical; ///< Physical device. + vk::DispatchLoaderDynamic dld; ///< Device function pointers. + UniqueDevice logical; ///< Logical device. + vk::Queue graphics_queue; ///< Main graphics queue. + vk::Queue present_queue; ///< Main present queue. + u32 graphics_family{}; ///< Main graphics queue family index. + u32 present_family{}; ///< Main present queue family index. + vk::PhysicalDeviceType device_type; ///< Physical device type. + vk::DriverIdKHR driver_id{}; ///< Driver ID. + u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment. + u64 storage_buffer_alignment{}; ///< Storage buffer alignment requeriment. + u64 max_storage_buffer_range{}; ///< Max storage buffer size. + bool is_optimal_astc_supported{}; ///< Support for native ASTC. + bool is_float16_supported{}; ///< Support for float16 arithmetics. + bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs. + bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8. + bool khr_driver_properties{}; ///< Support for VK_KHR_driver_properties. + std::unordered_map<vk::Format, vk::FormatProperties> + format_properties; ///< Format properties dictionary. }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index a35b45c9c..b9153934e 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -370,8 +370,8 @@ private: u32 binding = const_buffers_base_binding; for (const auto& entry : ir.GetConstantBuffers()) { const auto [index, size] = entry; - const Id type = - device.IsExtScalarBlockLayoutSupported() ? t_cbuf_scalar_ubo : t_cbuf_std140_ubo; + const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo + : t_cbuf_std140_ubo; const Id id = OpVariable(type, spv::StorageClass::Uniform); AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index))); @@ -565,7 +565,7 @@ private: const Id buffer_id = constant_buffers.at(cbuf->GetIndex()); Id pointer{}; - if (device.IsExtScalarBlockLayoutSupported()) { + if (device.IsKhrUniformBufferStandardLayoutSupported()) { const Id buffer_offset = Emit(OpShiftRightLogical( t_uint, BitcastTo<Type::Uint>(Visit(offset)), Constant(t_uint, 2u))); pointer = Emit( @@ -944,6 +944,41 @@ private: return {}; } + Id AtomicImageAdd(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id AtomicImageMin(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id AtomicImageMax(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id AtomicImageAnd(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id AtomicImageOr(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id AtomicImageXor(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + + Id AtomicImageExchange(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + Id Branch(Operation operation) { const auto target = std::get_if<ImmediateNode>(&*operation[0]); UNIMPLEMENTED_IF(!target); @@ -1366,6 +1401,13 @@ private: &SPIRVDecompiler::TexelFetch, &SPIRVDecompiler::ImageStore, + &SPIRVDecompiler::AtomicImageAdd, + &SPIRVDecompiler::AtomicImageMin, + &SPIRVDecompiler::AtomicImageMax, + &SPIRVDecompiler::AtomicImageAnd, + &SPIRVDecompiler::AtomicImageOr, + &SPIRVDecompiler::AtomicImageXor, + &SPIRVDecompiler::AtomicImageExchange, &SPIRVDecompiler::Branch, &SPIRVDecompiler::BranchIndirect, diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 008109a99..d54fb88c9 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -44,7 +44,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { switch (opcode->get().GetId()) { case OpCode::Id::SUST: { UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P); - UNIMPLEMENTED_IF(instr.sust.image_type == Tegra::Shader::ImageType::TextureBuffer); UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore); UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store @@ -66,8 +65,46 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { image.MarkWrite(); MetaImage meta{image, values}; - const Node store{Operation(OperationCode::ImageStore, meta, std::move(coords))}; - bb.push_back(store); + bb.push_back(Operation(OperationCode::ImageStore, meta, std::move(coords))); + break; + } + case OpCode::Id::SUATOM: { + UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0); + + Node value = GetRegister(instr.gpr0); + + std::vector<Node> coords; + const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)}; + for (std::size_t i = 0; i < num_coords; ++i) { + coords.push_back(GetRegister(instr.gpr8.Value() + i)); + } + + const OperationCode operation_code = [instr] { + switch (instr.suatom_d.operation) { + case Tegra::Shader::ImageAtomicOperation::Add: + return OperationCode::AtomicImageAdd; + case Tegra::Shader::ImageAtomicOperation::Min: + return OperationCode::AtomicImageMin; + case Tegra::Shader::ImageAtomicOperation::Max: + return OperationCode::AtomicImageMax; + case Tegra::Shader::ImageAtomicOperation::And: + return OperationCode::AtomicImageAnd; + case Tegra::Shader::ImageAtomicOperation::Or: + return OperationCode::AtomicImageOr; + case Tegra::Shader::ImageAtomicOperation::Xor: + return OperationCode::AtomicImageXor; + case Tegra::Shader::ImageAtomicOperation::Exch: + return OperationCode::AtomicImageExchange; + default: + UNIMPLEMENTED_MSG("Unimplemented operation={}", + static_cast<u32>(instr.suatom_d.operation.Value())); + return OperationCode::AtomicImageAdd; + } + }(); + + const auto& image{GetImage(instr.image, instr.suatom_d.image_type, instr.suatom_d.size)}; + MetaImage meta{image, {std::move(value)}}; + SetRegister(bb, instr.gpr0, Operation(operation_code, meta, std::move(coords))); break; } default: @@ -77,38 +114,51 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { return pc; } -Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { - const auto offset{static_cast<u64>(image.index.Value())}; - - // If this image has already been used, return the existing mapping. - const auto it = used_images.find(offset); - if (it != used_images.end()) { - ASSERT(it->second.GetType() == type); - return it->second; +Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size) { + const auto offset{static_cast<std::size_t>(image.index.Value())}; + if (const auto image = TryUseExistingImage(offset, type, size)) { + return *image; } - // Otherwise create a new mapping for this image. const std::size_t next_index{used_images.size()}; - return used_images.emplace(offset, Image{offset, next_index, type}).first->second; + return used_images.emplace(offset, Image{offset, next_index, type, size}).first->second; } -Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { +Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size) { const Node image_register{GetRegister(reg)}; const auto [base_image, cbuf_index, cbuf_offset]{ TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; - // If this image has already been used, return the existing mapping. - const auto it = used_images.find(cbuf_key); - if (it != used_images.end()) { - ASSERT(it->second.GetType() == type); - return it->second; + if (const auto image = TryUseExistingImage(cbuf_key, type, size)) { + return *image; } - // Otherwise create a new mapping for this image. const std::size_t next_index{used_images.size()}; - return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type}) + return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type, size}) .first->second; } +Image* ShaderIR::TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size) { + auto it = used_images.find(offset); + if (it == used_images.end()) { + return nullptr; + } + auto& image = it->second; + ASSERT(image.GetType() == type); + + if (size) { + // We know the size, if it's known it has to be the same as before, otherwise we can set it. + if (image.IsSizeKnown()) { + ASSERT(image.GetSize() == size); + } else { + image.SetSize(*size); + } + } + return ℑ +} + } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index b29aedce8..b47b201cf 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -7,6 +7,7 @@ #include <array> #include <cstddef> #include <memory> +#include <optional> #include <string> #include <tuple> #include <utility> @@ -148,7 +149,14 @@ enum class OperationCode { TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 TexelFetch, /// (MetaTexture, int[N], int) -> float4 - ImageStore, /// (MetaImage, float[N] coords) -> void + ImageStore, /// (MetaImage, int[N] values) -> void + AtomicImageAdd, /// (MetaImage, int[N] coords) -> void + AtomicImageMin, /// (MetaImage, int[N] coords) -> void + AtomicImageMax, /// (MetaImage, int[N] coords) -> void + AtomicImageAnd, /// (MetaImage, int[N] coords) -> void + AtomicImageOr, /// (MetaImage, int[N] coords) -> void + AtomicImageXor, /// (MetaImage, int[N] coords) -> void + AtomicImageExchange, /// (MetaImage, int[N] coords) -> void Branch, /// (uint branch_target) -> void BranchIndirect, /// (uint branch_target) -> void @@ -275,25 +283,32 @@ private: class Image final { public: - constexpr explicit Image(u64 offset, std::size_t index, Tegra::Shader::ImageType type) - : offset{offset}, index{index}, type{type}, is_bindless{false} {} + constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size) + : offset{offset}, index{index}, type{type}, is_bindless{false}, size{size} {} constexpr explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index, - Tegra::Shader::ImageType type) + Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size) : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type}, - is_bindless{true} {} + is_bindless{true}, size{size} {} constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type, - bool is_bindless, bool is_written, bool is_read) + bool is_bindless, bool is_written, bool is_read, + std::optional<Tegra::Shader::ImageAtomicSize> size) : offset{offset}, index{index}, type{type}, is_bindless{is_bindless}, - is_written{is_written}, is_read{is_read} {} + is_written{is_written}, is_read{is_read}, size{size} {} + + void MarkWrite() { + is_written = true; + } void MarkRead() { is_read = true; } - void MarkWrite() { - is_written = true; + void SetSize(Tegra::Shader::ImageAtomicSize size_) { + size = size_; } constexpr std::size_t GetOffset() const { @@ -312,25 +327,39 @@ public: return is_bindless; } - constexpr bool IsRead() const { - return is_read; - } - constexpr bool IsWritten() const { return is_written; } + constexpr bool IsRead() const { + return is_read; + } + constexpr std::pair<u32, u32> GetBindlessCBuf() const { return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)}; } + constexpr bool IsSizeKnown() const { + return size.has_value(); + } + + constexpr Tegra::Shader::ImageAtomicSize GetSize() const { + return size.value(); + } + + constexpr bool operator<(const Image& rhs) const { + return std::tie(offset, index, type, size, is_bindless) < + std::tie(rhs.offset, rhs.index, rhs.type, rhs.size, rhs.is_bindless); + } + private: u64 offset{}; std::size_t index{}; Tegra::Shader::ImageType type{}; bool is_bindless{}; - bool is_read{}; bool is_written{}; + bool is_read{}; + std::optional<Tegra::Shader::ImageAtomicSize> size{}; }; struct GlobalMemoryBase { diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 0f891eace..62816bd56 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -272,10 +272,16 @@ private: bool is_shadow); /// Accesses an image. - Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); + Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size = {}); /// Access a bindless image sampler. - Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); + Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size = {}); + + /// Tries to access an existing image, updating it's state as needed + Image* TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type, + std::optional<Tegra::Shader::ImageAtomicSize> size); /// Extracts a sequence of bits from a node Node BitfieldExtract(Node value, u32 offset, u32 bits); diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 4ceb219be..53d0142cb 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -513,6 +513,26 @@ bool IsPixelFormatASTC(PixelFormat format) { } } +bool IsPixelFormatSRGB(PixelFormat format) { + switch (format) { + case PixelFormat::RGBA8_SRGB: + case PixelFormat::BGRA8_SRGB: + case PixelFormat::DXT1_SRGB: + case PixelFormat::DXT23_SRGB: + case PixelFormat::DXT45_SRGB: + case PixelFormat::BC7U_SRGB: + case PixelFormat::ASTC_2D_4X4_SRGB: + case PixelFormat::ASTC_2D_8X8_SRGB: + case PixelFormat::ASTC_2D_8X5_SRGB: + case PixelFormat::ASTC_2D_5X4_SRGB: + case PixelFormat::ASTC_2D_5X5_SRGB: + case PixelFormat::ASTC_2D_10X8_SRGB: + return true; + default: + return false; + } +} + std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) { return {GetDefaultBlockWidth(format), GetDefaultBlockHeight(format)}; } diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 83f31c12c..19268b7cd 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -547,6 +547,8 @@ SurfaceType GetFormatType(PixelFormat pixel_format); bool IsPixelFormatASTC(PixelFormat format); +bool IsPixelFormatSRGB(PixelFormat format); + std::pair<u32, u32> GetASTCBlockSize(PixelFormat format); /// Returns true if the specified PixelFormat is a BCn format, e.g. DXT or DXN |