diff options
48 files changed, 1128 insertions, 906 deletions
diff --git a/externals/microprofile/microprofile.h b/externals/microprofile/microprofile.h index c1556d10c..9d830f7bf 100644 --- a/externals/microprofile/microprofile.h +++ b/externals/microprofile/microprofile.h @@ -828,7 +828,7 @@ inline MicroProfileLogEntry MicroProfileMakeLogIndex(uint64_t nBegin, MicroProfi MicroProfileLogEntry Entry = (nBegin<<62) | ((0x3fff&nToken)<<48) | (MP_LOG_TICK_MASK&nTick); int t = MicroProfileLogType(Entry); uint64_t nTimerIndex = MicroProfileLogTimerIndex(Entry); - MP_ASSERT(t == nBegin); + MP_ASSERT((uint64_t)t == nBegin); MP_ASSERT(nTimerIndex == (nToken&0x3fff)); return Entry; @@ -1556,10 +1556,10 @@ void MicroProfileFlip() pFramePut->nFrameStartCpu = MP_TICK(); pFramePut->nFrameStartGpu = (uint32_t)MicroProfileGpuInsertTimeStamp(); - if(pFrameNext->nFrameStartGpu != (uint64_t)-1) + if(pFrameNext->nFrameStartGpu != -1) pFrameNext->nFrameStartGpu = MicroProfileGpuGetTimeStamp((uint32_t)pFrameNext->nFrameStartGpu); - if(pFrameCurrent->nFrameStartGpu == (uint64_t)-1) + if(pFrameCurrent->nFrameStartGpu == -1) pFrameCurrent->nFrameStartGpu = pFrameNext->nFrameStartGpu + 1; uint64_t nFrameStartCpu = pFrameCurrent->nFrameStartCpu; diff --git a/src/core/core.cpp b/src/core/core.cpp index 218508126..d1bc9340d 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -166,7 +166,7 @@ struct System::Impl { service_manager = std::make_shared<Service::SM::ServiceManager>(); Service::Init(service_manager, system); - GDBStub::Init(); + GDBStub::DeferStart(); renderer = VideoCore::CreateRenderer(emu_window, system); if (!renderer->Init()) { diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp index e8d8871a7..6d15aeed9 100644 --- a/src/core/gdbstub/gdbstub.cpp +++ b/src/core/gdbstub/gdbstub.cpp @@ -141,6 +141,7 @@ constexpr char target_xml[] = )"; int gdbserver_socket = -1; +bool defer_start = false; u8 command_buffer[GDB_BUFFER_SIZE]; u32 command_length; @@ -1166,6 +1167,9 @@ static void RemoveBreakpoint() { void HandlePacket() { if (!IsConnected()) { + if (defer_start) { + ToggleServer(true); + } return; } @@ -1256,6 +1260,10 @@ void ToggleServer(bool status) { } } +void DeferStart() { + defer_start = true; +} + static void Init(u16 port) { if (!server_enabled) { // Set the halt loop to false in case the user enabled the gdbstub mid-execution. @@ -1341,6 +1349,7 @@ void Shutdown() { if (!server_enabled) { return; } + defer_start = false; LOG_INFO(Debug_GDBStub, "Stopping GDB ..."); if (gdbserver_socket != -1) { diff --git a/src/core/gdbstub/gdbstub.h b/src/core/gdbstub/gdbstub.h index 5a36524b2..8fe3c320b 100644 --- a/src/core/gdbstub/gdbstub.h +++ b/src/core/gdbstub/gdbstub.h @@ -43,6 +43,13 @@ void ToggleServer(bool status); /// Start the gdbstub server. void Init(); +/** + * Defer initialization of the gdbstub to the first packet processing functions. + * This avoids a case where the gdbstub thread is frozen after initialization + * and fails to respond in time to packets. + */ +void DeferStart(); + /// Stop gdbstub server. void Shutdown(); diff --git a/src/core/hle/service/am/applets/web_browser.cpp b/src/core/hle/service/am/applets/web_browser.cpp index 12443c910..9f30e167d 100644 --- a/src/core/hle/service/am/applets/web_browser.cpp +++ b/src/core/hle/service/am/applets/web_browser.cpp @@ -254,6 +254,12 @@ void WebBrowser::Execute() { if (status != RESULT_SUCCESS) { complete = true; + + // This is a workaround in order not to softlock yuzu when an error happens during the + // webapplet init. In order to avoid an svcBreak, the status is set to RESULT_SUCCESS + Finalize(); + status = RESULT_SUCCESS; + return; } diff --git a/src/core/hle/service/set/set.cpp b/src/core/hle/service/set/set.cpp index 5bcc0b588..9e12c76fc 100644 --- a/src/core/hle/service/set/set.cpp +++ b/src/core/hle/service/set/set.cpp @@ -111,6 +111,14 @@ void SET::GetLanguageCode(Kernel::HLERequestContext& ctx) { rb.PushEnum(available_language_codes[Settings::values.language_index]); } +void SET::GetRegionCode(Kernel::HLERequestContext& ctx) { + LOG_DEBUG(Service_SET, "called"); + + IPC::ResponseBuilder rb{ctx, 3}; + rb.Push(RESULT_SUCCESS); + rb.Push(Settings::values.region_index); +} + SET::SET() : ServiceFramework("set") { // clang-format off static const FunctionInfo functions[] = { @@ -118,7 +126,7 @@ SET::SET() : ServiceFramework("set") { {1, &SET::GetAvailableLanguageCodes, "GetAvailableLanguageCodes"}, {2, &SET::MakeLanguageCode, "MakeLanguageCode"}, {3, &SET::GetAvailableLanguageCodeCount, "GetAvailableLanguageCodeCount"}, - {4, nullptr, "GetRegionCode"}, + {4, &SET::GetRegionCode, "GetRegionCode"}, {5, &SET::GetAvailableLanguageCodes2, "GetAvailableLanguageCodes2"}, {6, &SET::GetAvailableLanguageCodeCount2, "GetAvailableLanguageCodeCount2"}, {7, nullptr, "GetKeyCodeMap"}, diff --git a/src/core/hle/service/set/set.h b/src/core/hle/service/set/set.h index b154e08aa..6084b345d 100644 --- a/src/core/hle/service/set/set.h +++ b/src/core/hle/service/set/set.h @@ -43,6 +43,7 @@ private: void GetAvailableLanguageCodeCount(Kernel::HLERequestContext& ctx); void GetAvailableLanguageCodeCount2(Kernel::HLERequestContext& ctx); void GetQuestFlag(Kernel::HLERequestContext& ctx); + void GetRegionCode(Kernel::HLERequestContext& ctx); }; } // namespace Service::Set diff --git a/src/core/hle/service/sm/controller.cpp b/src/core/hle/service/sm/controller.cpp index c45b285f8..9cca84b31 100644 --- a/src/core/hle/service/sm/controller.cpp +++ b/src/core/hle/service/sm/controller.cpp @@ -44,7 +44,7 @@ void Controller::QueryPointerBufferSize(Kernel::HLERequestContext& ctx) { IPC::ResponseBuilder rb{ctx, 3}; rb.Push(RESULT_SUCCESS); - rb.Push<u16>(0x500); + rb.Push<u16>(0x1000); } Controller::Controller() : ServiceFramework("IpcController") { diff --git a/src/core/hle/service/time/time_zone_content_manager.cpp b/src/core/hle/service/time/time_zone_content_manager.cpp index 57b1a2bca..78d4acd95 100644 --- a/src/core/hle/service/time/time_zone_content_manager.cpp +++ b/src/core/hle/service/time/time_zone_content_manager.cpp @@ -53,7 +53,7 @@ static std::vector<std::string> BuildLocationNameCache(Core::System& system) { return {}; } - std::vector<char> raw_data(binary_list->GetSize()); + std::vector<char> raw_data(binary_list->GetSize() + 1); binary_list->ReadBytes<char>(raw_data.data(), binary_list->GetSize()); std::stringstream data_stream{raw_data.data()}; diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 7c0303684..c1282cb80 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -86,6 +86,7 @@ void LogSettings() { LogSetting("System_RngSeed", Settings::values.rng_seed.value_or(0)); LogSetting("System_CurrentUser", Settings::values.current_user); LogSetting("System_LanguageIndex", Settings::values.language_index); + LogSetting("System_RegionIndex", Settings::values.region_index); LogSetting("Core_UseMultiCore", Settings::values.use_multi_core); LogSetting("Renderer_UseResolutionFactor", Settings::values.resolution_factor); LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); diff --git a/src/core/settings.h b/src/core/settings.h index cb5979e6f..79ec01731 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -387,6 +387,8 @@ struct Values { s32 current_user; s32 language_index; + s32 region_index; + s32 sound_index; // Controls std::array<PlayerInput, 10> players; diff --git a/src/input_common/udp/client.cpp b/src/input_common/udp/client.cpp index e82ae7ef1..da5227058 100644 --- a/src/input_common/udp/client.cpp +++ b/src/input_common/udp/client.cpp @@ -35,7 +35,7 @@ public: pad_index(pad_index) { boost::system::error_code ec{}; auto ipv4 = boost::asio::ip::make_address_v4(host, ec); - if (ec.failed()) { + if (ec.value() != boost::system::errc::success) { LOG_ERROR(Input, "Invalid IPv4 address \"{}\" provided to socket", host); ipv4 = boost::asio::ip::address_v4{}; } diff --git a/src/input_common/udp/udp.cpp b/src/input_common/udp/udp.cpp index ca99cc22f..8c6ef1394 100644 --- a/src/input_common/udp/udp.cpp +++ b/src/input_common/udp/udp.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <mutex> +#include <optional> #include <tuple> #include "common/param_package.h" @@ -44,7 +45,7 @@ public: std::unique_ptr<Input::TouchDevice> Create(const Common::ParamPackage& params) override { { std::lock_guard guard(status->update_mutex); - status->touch_calibration.emplace(); + status->touch_calibration = DeviceStatus::CalibrationData{}; // These default values work well for DS4 but probably not other touch inputs status->touch_calibration->min_x = params.Get("min_x", 100); status->touch_calibration->min_y = params.Get("min_y", 50); diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp index 4429f3405..e16075993 100644 --- a/src/video_core/dirty_flags.cpp +++ b/src/video_core/dirty_flags.cpp @@ -15,14 +15,6 @@ namespace VideoCommon::Dirty { using Tegra::Engines::Maxwell3D; -void SetupCommonOnWriteStores(Tegra::Engines::Maxwell3D::DirtyState::Flags& store) { - store[RenderTargets] = true; - store[ZetaBuffer] = true; - for (std::size_t i = 0; i < Maxwell3D::Regs::NumRenderTargets; ++i) { - store[ColorBuffer0 + i] = true; - } -} - void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) { static constexpr std::size_t num_per_rt = NUM(rt[0]); static constexpr std::size_t begin = OFF(rt); diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h index 0dbafd3ef..3f6c1d83a 100644 --- a/src/video_core/dirty_flags.h +++ b/src/video_core/dirty_flags.h @@ -44,8 +44,6 @@ void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables, std::size_ FillBlock(tables[1], begin, num, index_b); } -void SetupCommonOnWriteStores(Tegra::Engines::Maxwell3D::DirtyState::Flags& store); - void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables); } // namespace VideoCommon::Dirty diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 1ecd65925..368c75a66 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -119,14 +119,6 @@ Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { Texture::TICEntry tic_entry; memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); - const auto r_type{tic_entry.r_type.Value()}; - const auto g_type{tic_entry.g_type.Value()}; - const auto b_type{tic_entry.b_type.Value()}; - const auto a_type{tic_entry.a_type.Value()}; - - // TODO(Subv): Different data types for separate components are not supported - DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); - return tic_entry; } diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index ce536e29b..ba63b44b4 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -98,6 +98,8 @@ void Maxwell3D::InitializeRegisterDefaults() { regs.framebuffer_srgb = 1; regs.front_face = Maxwell3D::Regs::FrontFace::ClockWise; + shadow_state = regs; + mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_end_gl)] = true; mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)] = true; mme_inline[MAXWELL3D_REG_INDEX(vertex_buffer.count)] = true; @@ -160,8 +162,17 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register, increase the size of the Regs structure"); - if (regs.reg_array[method] != method_call.argument) { - regs.reg_array[method] = method_call.argument; + u32 arg = method_call.argument; + // Keep track of the register value in shadow_state when requested. + if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Track || + shadow_state.shadow_ram_control == Regs::ShadowRamControl::TrackWithFilter) { + shadow_state.reg_array[method] = arg; + } else if (shadow_state.shadow_ram_control == Regs::ShadowRamControl::Replay) { + arg = shadow_state.reg_array[method]; + } + + if (regs.reg_array[method] != arg) { + regs.reg_array[method] = arg; for (const auto& table : dirty.tables) { dirty.flags[table[method]] = true; @@ -169,12 +180,16 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { } switch (method) { + case MAXWELL3D_REG_INDEX(shadow_ram_control): { + shadow_state.shadow_ram_control = static_cast<Regs::ShadowRamControl>(method_call.argument); + break; + } case MAXWELL3D_REG_INDEX(macros.data): { - ProcessMacroUpload(method_call.argument); + ProcessMacroUpload(arg); break; } case MAXWELL3D_REG_INDEX(macros.bind): { - ProcessMacroBind(method_call.argument); + ProcessMacroBind(arg); break; } case MAXWELL3D_REG_INDEX(firmware[4]): { @@ -250,7 +265,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { } case MAXWELL3D_REG_INDEX(data_upload): { const bool is_last_call = method_call.IsLastCall(); - upload_state.ProcessData(method_call.argument, is_last_call); + upload_state.ProcessData(arg, is_last_call); if (is_last_call) { OnMemoryWrite(); } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 8a9e9992e..d24c9f657 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -531,6 +531,17 @@ public: Fill = 0x1b02, }; + enum class ShadowRamControl : u32 { + // write value to shadow ram + Track = 0, + // write value to shadow ram ( with validation ??? ) + TrackWithFilter = 1, + // only write to real hw register + Passthrough = 2, + // write value from shadow ram to real hw register + Replay = 3, + }; + struct RenderTargetConfig { u32 address_high; u32 address_low; @@ -674,7 +685,9 @@ public: u32 bind; } macros; - INSERT_UNION_PADDING_WORDS(0x17); + ShadowRamControl shadow_ram_control; + + INSERT_UNION_PADDING_WORDS(0x16); Upload::Registers upload; struct { @@ -1263,7 +1276,12 @@ public: }; std::array<u32, NUM_REGS> reg_array; }; - } regs{}; + }; + + Regs regs{}; + + /// Store temporary hw register values, used by some calls to restore state after a operation + Regs shadow_state; static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size"); static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable"); @@ -1458,6 +1476,7 @@ private: "Field " #field_name " has invalid position") ASSERT_REG_POSITION(macros, 0x45); +ASSERT_REG_POSITION(shadow_ram_control, 0x49); ASSERT_REG_POSITION(upload, 0x60); ASSERT_REG_POSITION(exec_upload, 0x6C); ASSERT_REG_POSITION(data_upload, 0x6D); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 4cd0c07a7..49dc5abe0 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -917,14 +917,9 @@ union Instruction { } fadd32i; union { - BitField<20, 8, u64> shift_position; - BitField<28, 8, u64> shift_length; - BitField<48, 1, u64> negate_b; - BitField<49, 1, u64> negate_a; - - u64 GetLeftShiftValue() const { - return 32 - (shift_position + shift_length); - } + BitField<40, 1, u64> brev; + BitField<47, 1, u64> rd_cc; + BitField<48, 1, u64> is_signed; } bfe; union { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index ba8c9d665..64acb17df 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -39,6 +39,7 @@ enum class RenderTargetFormat : u32 { RGBA32_FLOAT = 0xC0, RGBA32_UINT = 0xC2, RGBA16_UNORM = 0xC6, + RGBA16_SNORM = 0xC7, RGBA16_UINT = 0xC9, RGBA16_FLOAT = 0xCA, RG32_FLOAT = 0xCB, diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp index f2c83266e..6d522c318 100644 --- a/src/video_core/morton.cpp +++ b/src/video_core/morton.cpp @@ -51,6 +51,7 @@ static constexpr ConversionArray morton_to_linear_fns = { MortonCopy<true, PixelFormat::R8UI>, MortonCopy<true, PixelFormat::RGBA16F>, MortonCopy<true, PixelFormat::RGBA16U>, + MortonCopy<true, PixelFormat::RGBA16S>, MortonCopy<true, PixelFormat::RGBA16UI>, MortonCopy<true, PixelFormat::R11FG11FB10F>, MortonCopy<true, PixelFormat::RGBA32UI>, @@ -131,6 +132,7 @@ static constexpr ConversionArray linear_to_morton_fns = { MortonCopy<false, PixelFormat::R8U>, MortonCopy<false, PixelFormat::R8UI>, MortonCopy<false, PixelFormat::RGBA16F>, + MortonCopy<false, PixelFormat::RGBA16S>, MortonCopy<false, PixelFormat::RGBA16U>, MortonCopy<false, PixelFormat::RGBA16UI>, MortonCopy<false, PixelFormat::R11FG11FB10F>, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 1af4268a4..826eee7df 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -93,10 +93,6 @@ void oglEnable(GLenum cap, bool state) { (state ? glEnable : glDisable)(cap); } -void oglEnablei(GLenum cap, bool state, GLuint index) { - (state ? glEnablei : glDisablei)(cap, index); -} - } // Anonymous namespace RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, @@ -478,7 +474,6 @@ void RasterizerOpenGL::Clear() { void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { MICROPROFILE_SCOPE(OpenGL_Drawing); auto& gpu = system.GPU().Maxwell3D(); - const auto& regs = gpu.regs; query_cache.UpdateCounters(); @@ -529,7 +524,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { // Upload vertex and index data. SetupVertexBuffer(); SetupVertexInstances(); - GLintptr index_buffer_offset; + GLintptr index_buffer_offset = 0; if (is_indexed) { index_buffer_offset = SetupIndexBuffer(); } @@ -555,7 +550,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { ConfigureFramebuffers(); // Signal the buffer cache that we are not going to upload more things. - const bool invalidate = buffer_cache.Unmap(); + buffer_cache.Unmap(); // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL. vertex_array_pushbuffer.Bind(); @@ -938,13 +933,15 @@ void RasterizerOpenGL::SyncViewport() { } flags[Dirty::Viewport0 + i] = false; - const Common::Rectangle<f32> rect{regs.viewport_transform[i].GetRect()}; + const auto& src = regs.viewport_transform[i]; + const Common::Rectangle<f32> rect{src.GetRect()}; glViewportIndexedf(static_cast<GLuint>(i), rect.left, rect.bottom, rect.GetWidth(), rect.GetHeight()); - const auto& src = regs.viewports[i]; - glDepthRangeIndexed(static_cast<GLuint>(i), static_cast<GLdouble>(src.depth_range_near), - static_cast<GLdouble>(src.depth_range_far)); + const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; + const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z; + const GLdouble far_depth = src.translate_z + src.scale_z; + glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth); } } } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index e6ae8041b..8aa4a7ac9 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -402,10 +402,6 @@ std::string FlowStackTopName(MetaStackClass stack) { return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); } -[[deprecated]] constexpr bool IsVertexShader(ShaderType stage) { - return stage == ShaderType::Vertex; -} - struct GenericVaryingDescription { std::string name; u8 first_element = 0; @@ -538,8 +534,9 @@ private: } void DeclareVertex() { - if (!IsVertexShader(stage)) + if (stage != ShaderType::Vertex) { return; + } DeclareVertexRedeclarations(); } @@ -621,14 +618,14 @@ private: break; } } - if (!IsVertexShader(stage) || device.HasVertexViewportLayer()) { + if (stage != ShaderType::Vertex || device.HasVertexViewportLayer()) { if (ir.UsesLayer()) { code.AddLine("int gl_Layer;"); } if (ir.UsesViewportIndex()) { code.AddLine("int gl_ViewportIndex;"); } - } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && IsVertexShader(stage) && + } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { LOG_ERROR( Render_OpenGL, @@ -1170,7 +1167,7 @@ private: // TODO(Subv): Find out what the values are for the first two elements when inside a // vertex shader, and what's the value of the fourth element when inside a Tess Eval // shader. - ASSERT(IsVertexShader(stage)); + ASSERT(stage == ShaderType::Vertex); switch (element) { case 2: // Config pack's first value is instance_id. @@ -1248,12 +1245,12 @@ private: UNIMPLEMENTED(); return {}; case 1: - if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { + if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { return {}; } return {{"gl_Layer", Type::Int}}; case 2: - if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { + if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { return {}; } return {{"gl_ViewportIndex", Type::Int}}; @@ -2052,16 +2049,19 @@ private: expr += GetSampler(meta->sampler); expr += ", "; - expr += constructors.at(operation.GetOperandsCount() - 1); + expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1); expr += '('; for (std::size_t i = 0; i < count; ++i) { - expr += VisitOperand(operation, i).AsInt(); - const std::size_t next = i + 1; - if (next == count) - expr += ')'; - else if (next < count) + if (i > 0) { expr += ", "; + } + expr += VisitOperand(operation, i).AsInt(); } + if (meta->array) { + expr += ", "; + expr += Visit(meta->array).AsInt(); + } + expr += ')'; if (meta->lod && !meta->sampler.IsBuffer()) { expr += ", "; @@ -2572,7 +2572,7 @@ private: } u32 GetNumPhysicalInputAttributes() const { - return IsVertexShader(stage) ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); + return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); } u32 GetNumPhysicalAttributes() const { diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index 3f3bdf812..255ac3147 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp @@ -238,7 +238,6 @@ void StateTracker::Initialize() { SetupDirtyMisc(tables); auto& store = dirty.on_write_stores; - SetupCommonOnWriteStores(store); store[VertexBuffers] = true; for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) { store[VertexBuffer0 + i] = true; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 2d3838a7a..f424e3000 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -53,6 +53,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U + {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT, false}, // RGBA16S {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 12333e8c9..fca5e3ec0 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -5,8 +5,11 @@ #include <algorithm> #include <cstddef> #include <cstdlib> +#include <cstring> #include <memory> + #include <glad/glad.h> + #include "common/assert.h" #include "common/logging/log.h" #include "common/microprofile.h" @@ -25,6 +28,8 @@ namespace OpenGL { +namespace { + // If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have // to wait on available presentation frames. constexpr std::size_t SWAP_CHAIN_SIZE = 3; @@ -41,124 +46,6 @@ struct Frame { bool is_srgb{}; /// Framebuffer is sRGB or RGB }; -/** - * For smooth Vsync rendering, we want to always present the latest frame that the core generates, - * but also make sure that rendering happens at the pace that the frontend dictates. This is a - * helper class that the renderer uses to sync frames between the render thread and the presentation - * thread - */ -class FrameMailbox { -public: - std::mutex swap_chain_lock; - std::condition_variable present_cv; - std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{}; - std::queue<Frame*> free_queue; - std::deque<Frame*> present_queue; - Frame* previous_frame{}; - - FrameMailbox() { - for (auto& frame : swap_chain) { - free_queue.push(&frame); - } - } - - ~FrameMailbox() { - // lock the mutex and clear out the present and free_queues and notify any people who are - // blocked to prevent deadlock on shutdown - std::scoped_lock lock{swap_chain_lock}; - std::queue<Frame*>().swap(free_queue); - present_queue.clear(); - present_cv.notify_all(); - } - - void ReloadPresentFrame(Frame* frame, u32 height, u32 width) { - frame->present.Release(); - frame->present.Create(); - GLint previous_draw_fbo{}; - glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo); - glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle); - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, - frame->color.handle); - if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { - LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!"); - } - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo); - frame->color_reloaded = false; - } - - void ReloadRenderFrame(Frame* frame, u32 width, u32 height) { - // Recreate the color texture attachment - frame->color.Release(); - frame->color.Create(); - const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8; - glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height); - - // Recreate the FBO for the render target - frame->render.Release(); - frame->render.Create(); - glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle); - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, - frame->color.handle); - if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { - LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!"); - } - - frame->width = width; - frame->height = height; - frame->color_reloaded = true; - } - - Frame* GetRenderFrame() { - std::unique_lock lock{swap_chain_lock}; - - // If theres no free frames, we will reuse the oldest render frame - if (free_queue.empty()) { - auto frame = present_queue.back(); - present_queue.pop_back(); - return frame; - } - - Frame* frame = free_queue.front(); - free_queue.pop(); - return frame; - } - - void ReleaseRenderFrame(Frame* frame) { - std::unique_lock lock{swap_chain_lock}; - present_queue.push_front(frame); - present_cv.notify_one(); - } - - Frame* TryGetPresentFrame(int timeout_ms) { - std::unique_lock lock{swap_chain_lock}; - // wait for new entries in the present_queue - present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms), - [&] { return !present_queue.empty(); }); - if (present_queue.empty()) { - // timed out waiting for a frame to draw so return the previous frame - return previous_frame; - } - - // free the previous frame and add it back to the free queue - if (previous_frame) { - free_queue.push(previous_frame); - } - - // the newest entries are pushed to the front of the queue - Frame* frame = present_queue.front(); - present_queue.pop_front(); - // remove all old entries from the present queue and move them back to the free_queue - for (auto f : present_queue) { - free_queue.push(f); - } - present_queue.clear(); - previous_frame = frame; - return frame; - } -}; - -namespace { - constexpr char VERTEX_SHADER[] = R"( #version 430 core @@ -211,6 +98,24 @@ struct ScreenRectVertex { std::array<GLfloat, 2> tex_coord; }; +/// Returns true if any debug tool is attached +bool HasDebugTool() { + const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); + if (nsight) { + return true; + } + + GLint num_extensions; + glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions); + for (GLuint index = 0; index < static_cast<GLuint>(num_extensions); ++index) { + const auto name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, index)); + if (!std::strcmp(name, "GL_EXT_debug_tool")) { + return true; + } + } + return false; +} + /** * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left * corner and (width, height) on the lower-bottom. @@ -294,6 +199,153 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit } // Anonymous namespace +/** + * For smooth Vsync rendering, we want to always present the latest frame that the core generates, + * but also make sure that rendering happens at the pace that the frontend dictates. This is a + * helper class that the renderer uses to sync frames between the render thread and the presentation + * thread + */ +class FrameMailbox { +public: + std::mutex swap_chain_lock; + std::condition_variable present_cv; + std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{}; + std::queue<Frame*> free_queue; + std::deque<Frame*> present_queue; + Frame* previous_frame{}; + + FrameMailbox() : has_debug_tool{HasDebugTool()} { + for (auto& frame : swap_chain) { + free_queue.push(&frame); + } + } + + ~FrameMailbox() { + // lock the mutex and clear out the present and free_queues and notify any people who are + // blocked to prevent deadlock on shutdown + std::scoped_lock lock{swap_chain_lock}; + std::queue<Frame*>().swap(free_queue); + present_queue.clear(); + present_cv.notify_all(); + } + + void ReloadPresentFrame(Frame* frame, u32 height, u32 width) { + frame->present.Release(); + frame->present.Create(); + GLint previous_draw_fbo{}; + glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo); + glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, + frame->color.handle); + if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!"); + } + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo); + frame->color_reloaded = false; + } + + void ReloadRenderFrame(Frame* frame, u32 width, u32 height) { + // Recreate the color texture attachment + frame->color.Release(); + frame->color.Create(); + const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8; + glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height); + + // Recreate the FBO for the render target + frame->render.Release(); + frame->render.Create(); + glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, + frame->color.handle); + if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!"); + } + + frame->width = width; + frame->height = height; + frame->color_reloaded = true; + } + + Frame* GetRenderFrame() { + std::unique_lock lock{swap_chain_lock}; + + // If theres no free frames, we will reuse the oldest render frame + if (free_queue.empty()) { + auto frame = present_queue.back(); + present_queue.pop_back(); + return frame; + } + + Frame* frame = free_queue.front(); + free_queue.pop(); + return frame; + } + + void ReleaseRenderFrame(Frame* frame) { + std::unique_lock lock{swap_chain_lock}; + present_queue.push_front(frame); + present_cv.notify_one(); + + DebugNotifyNextFrame(); + } + + Frame* TryGetPresentFrame(int timeout_ms) { + DebugWaitForNextFrame(); + + std::unique_lock lock{swap_chain_lock}; + // wait for new entries in the present_queue + present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms), + [&] { return !present_queue.empty(); }); + if (present_queue.empty()) { + // timed out waiting for a frame to draw so return the previous frame + return previous_frame; + } + + // free the previous frame and add it back to the free queue + if (previous_frame) { + free_queue.push(previous_frame); + } + + // the newest entries are pushed to the front of the queue + Frame* frame = present_queue.front(); + present_queue.pop_front(); + // remove all old entries from the present queue and move them back to the free_queue + for (auto f : present_queue) { + free_queue.push(f); + } + present_queue.clear(); + previous_frame = frame; + return frame; + } + +private: + std::mutex debug_synch_mutex; + std::condition_variable debug_synch_condition; + std::atomic_int frame_for_debug{}; + const bool has_debug_tool; // When true, using a GPU debugger, so keep frames in lock-step + + /// Signal that a new frame is available (called from GPU thread) + void DebugNotifyNextFrame() { + if (!has_debug_tool) { + return; + } + frame_for_debug++; + std::lock_guard lock{debug_synch_mutex}; + debug_synch_condition.notify_one(); + } + + /// Wait for a new frame to be available (called from presentation thread) + void DebugWaitForNextFrame() { + if (!has_debug_tool) { + return; + } + const int last_frame = frame_for_debug; + std::unique_lock lock{debug_synch_mutex}; + debug_synch_condition.wait(lock, + [this, last_frame] { return frame_for_debug > last_frame; }); + } +}; + RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system) : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system}, frame_mailbox{std::make_unique<FrameMailbox>()} {} diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index df3ac707c..f93447610 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -125,6 +125,7 @@ struct FormatTuple { {vk::Format::eR8Uint, Attachable | Storage}, // R8UI {vk::Format::eR16G16B16A16Sfloat, Attachable | Storage}, // RGBA16F {vk::Format::eR16G16B16A16Unorm, Attachable | Storage}, // RGBA16U + {vk::Format::eR16G16B16A16Snorm, Attachable | Storage}, // RGBA16S {vk::Format::eR16G16B16A16Uint, Attachable | Storage}, // RGBA16UI {vk::Format::eB10G11R11UfloatPack32, Attachable | Storage}, // R11FG11FB10F {vk::Format::eR32G32B32A32Uint, Attachable | Storage}, // RGBA32UI @@ -256,6 +257,8 @@ vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { return vk::ShaderStageFlagBits::eGeometry; case Tegra::Engines::ShaderType::Fragment: return vk::ShaderStageFlagBits::eFragment; + case Tegra::Engines::ShaderType::Compute: + return vk::ShaderStageFlagBits::eCompute; } UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage)); return {}; @@ -331,6 +334,8 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr return vk::Format::eR16G16B16Unorm; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: return vk::Format::eR16G16B16A16Unorm; + case Maxwell::VertexAttribute::Size::Size_10_10_10_2: + return vk::Format::eA2B10G10R10UnormPack32; default: break; } @@ -364,6 +369,10 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr return vk::Format::eR8G8B8A8Uint; case Maxwell::VertexAttribute::Size::Size_32: return vk::Format::eR32Uint; + case Maxwell::VertexAttribute::Size::Size_32_32: + return vk::Format::eR32G32Uint; + case Maxwell::VertexAttribute::Size::Size_32_32_32: + return vk::Format::eR32G32B32Uint; case Maxwell::VertexAttribute::Size::Size_32_32_32_32: return vk::Format::eR32G32B32A32Uint; default: diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 3847bd722..28d2fbc4f 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -535,6 +535,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti vk::Format::eR32G32Sfloat, vk::Format::eR32G32Uint, vk::Format::eR16G16B16A16Uint, + vk::Format::eR16G16B16A16Snorm, vk::Format::eR16G16B16A16Unorm, vk::Format::eR16G16Unorm, vk::Format::eR16G16Snorm, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 056ef495c..557b9d662 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -179,10 +179,11 @@ Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine( VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, const VKDevice& device, VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue) + VKUpdateDescriptorQueue& update_descriptor_queue, + VKRenderPassCache& renderpass_cache) : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler}, descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue}, - renderpass_cache(device) {} + renderpass_cache{renderpass_cache} {} VKPipelineCache::~VKPipelineCache() = default; @@ -191,7 +192,6 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { std::array<Shader, Maxwell::MaxShaderProgram> shaders; for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - const auto& shader_config = gpu.regs.shader_config[index]; const auto program{static_cast<Maxwell::ShaderProgram>(index)}; // Skip stages that are not enabled diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 21340c9a4..c4c112290 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -161,7 +161,8 @@ public: explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, const VKDevice& device, VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue); + VKUpdateDescriptorQueue& update_descriptor_queue, + VKRenderPassCache& renderpass_cache); ~VKPipelineCache(); std::array<Shader, Maxwell::MaxShaderProgram> GetShaders(); @@ -184,8 +185,7 @@ private: VKScheduler& scheduler; VKDescriptorPool& descriptor_pool; VKUpdateDescriptorQueue& update_descriptor_queue; - - VKRenderPassCache renderpass_cache; + VKRenderPassCache& renderpass_cache; std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f889019c1..58c69b786 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -287,12 +287,13 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind screen_info{screen_info}, device{device}, resource_manager{resource_manager}, memory_manager{memory_manager}, state_tracker{state_tracker}, scheduler{scheduler}, staging_pool(device, memory_manager, scheduler), descriptor_pool(device), - update_descriptor_queue(device, scheduler), + update_descriptor_queue(device, scheduler), renderpass_cache(device), quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, staging_pool), - pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue), + pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue, + renderpass_cache), buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), sampler_cache(device), query_cache(system, *this, device, scheduler) { scheduler.SetQueryCache(query_cache); @@ -365,13 +366,16 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { void RasterizerVulkan::Clear() { MICROPROFILE_SCOPE(Vulkan_Clearing); - query_cache.UpdateCounters(); - const auto& gpu = system.GPU().Maxwell3D(); if (!system.GPU().Maxwell3D().ShouldExecute()) { return; } + sampled_views.clear(); + image_views.clear(); + + query_cache.UpdateCounters(); + const auto& regs = gpu.regs; const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || regs.clear_buffers.A; @@ -380,52 +384,54 @@ void RasterizerVulkan::Clear() { if (!use_color && !use_depth && !use_stencil) { return; } - // Clearing images requires to be out of a renderpass - scheduler.RequestOutsideRenderPassOperationContext(); - // TODO(Rodrigo): Implement clears rendering a quad or using beginning a renderpass. + [[maybe_unused]] const auto texceptions = UpdateAttachments(); + DEBUG_ASSERT(texceptions.none()); + SetupImageTransitions(0, color_attachments, zeta_attachment); - if (use_color) { - View color_view; - { - MICROPROFILE_SCOPE(Vulkan_RenderTargets); - color_view = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT.Value(), false); - } + const vk::RenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0)); + const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); + scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr}); + + const auto& scissor = regs.scissor_test[0]; + const vk::Offset2D scissor_offset(scissor.min_x, scissor.min_y); + vk::Extent2D scissor_extent{scissor.max_x - scissor.min_x, scissor.max_y - scissor.min_y}; + scissor_extent.width = std::min(scissor_extent.width, render_area.width); + scissor_extent.height = std::min(scissor_extent.height, render_area.height); - color_view->Transition(vk::ImageLayout::eTransferDstOptimal, - vk::PipelineStageFlagBits::eTransfer, - vk::AccessFlagBits::eTransferWrite); + const u32 layer = regs.clear_buffers.layer; + const vk::ClearRect clear_rect({scissor_offset, scissor_extent}, layer, 1); + if (use_color) { const std::array clear_color = {regs.clear_color[0], regs.clear_color[1], regs.clear_color[2], regs.clear_color[3]}; - const vk::ClearColorValue clear(clear_color); - scheduler.Record([image = color_view->GetImage(), - subresource = color_view->GetImageSubresourceRange(), - clear](auto cmdbuf, auto& dld) { - cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear, subresource, - dld); + const vk::ClearValue clear_value{clear_color}; + const u32 color_attachment = regs.clear_buffers.RT; + scheduler.Record([color_attachment, clear_value, clear_rect](auto cmdbuf, auto& dld) { + const vk::ClearAttachment attachment(vk::ImageAspectFlagBits::eColor, color_attachment, + clear_value); + cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld); }); } - if (use_depth || use_stencil) { - View zeta_surface; - { - MICROPROFILE_SCOPE(Vulkan_RenderTargets); - zeta_surface = texture_cache.GetDepthBufferSurface(false); - } - zeta_surface->Transition(vk::ImageLayout::eTransferDstOptimal, - vk::PipelineStageFlagBits::eTransfer, - vk::AccessFlagBits::eTransferWrite); - - const vk::ClearDepthStencilValue clear(regs.clear_depth, - static_cast<u32>(regs.clear_stencil)); - scheduler.Record([image = zeta_surface->GetImage(), - subresource = zeta_surface->GetImageSubresourceRange(), - clear](auto cmdbuf, auto& dld) { - cmdbuf.clearDepthStencilImage(image, vk::ImageLayout::eTransferDstOptimal, clear, - subresource, dld); - }); + if (!use_depth && !use_stencil) { + return; + } + vk::ImageAspectFlags aspect_flags; + if (use_depth) { + aspect_flags |= vk::ImageAspectFlagBits::eDepth; + } + if (use_stencil) { + aspect_flags |= vk::ImageAspectFlagBits::eStencil; } + + scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, + clear_rect, aspect_flags](auto cmdbuf, auto& dld) { + const vk::ClearDepthStencilValue clear_zeta(clear_depth, clear_stencil); + const vk::ClearValue clear_value{clear_zeta}; + const vk::ClearAttachment attachment(aspect_flags, 0, clear_value); + cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld); + }); } void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { @@ -542,8 +548,6 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, // Verify that the cached surface is the same size and format as the requested framebuffer const auto& params{surface->GetSurfaceParams()}; - const auto& pixel_format{ - VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); @@ -1151,7 +1155,7 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { // This implementation assumes that all attributes are used in the shader. const GPUVAddr start{regs.vertex_array[index].StartAddress()}; const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; - DEBUG_ASSERT(end > start); + DEBUG_ASSERT(end >= start); size += (end - start + 1) * regs.vertex_array[index].enable; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index b2e73d98d..3185868e9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -253,6 +253,7 @@ private: VKStagingBufferPool staging_pool; VKDescriptorPool descriptor_pool; VKUpdateDescriptorQueue update_descriptor_queue; + VKRenderPassCache renderpass_cache; QuadArrayPass quad_array_pass; Uint8Pass uint8_pass; diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index b2c298051..51ecb5567 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -837,7 +837,7 @@ private: Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset)); } - element += static_cast<u8>(num_components); + element = static_cast<u8>(static_cast<std::size_t>(element) + num_components); } } diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index d9ea3cc21..374959f82 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -100,7 +100,6 @@ void VKStagingBufferPool::ReleaseCache(bool host_visible) { } u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t log2) { - static constexpr u64 epochs_to_destroy = 180; static constexpr std::size_t deletions_per_tick = 16; auto& staging = cache[log2]; @@ -108,6 +107,7 @@ u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t lo const std::size_t old_size = entries.size(); const auto is_deleteable = [this](const auto& entry) { + static constexpr u64 epochs_to_destroy = 180; return entry.last_epoch + epochs_to_destroy < epoch && !entry.watch.IsUsed(); }; const std::size_t begin_offset = staging.delete_index; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index d74e68b63..94a89e388 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -90,8 +90,6 @@ void StateTracker::Initialize() { SetupDirtyBlendConstants(tables); SetupDirtyDepthBounds(tables); SetupDirtyStencilProperties(tables); - - SetupCommonOnWriteStores(dirty.on_write_stores); } void StateTracker::InvalidateCommandBufferState() { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 73d92a5ae..26175921b 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -52,6 +52,9 @@ vk::ImageType SurfaceTargetToImage(SurfaceTarget target) { return vk::ImageType::e2D; case SurfaceTarget::Texture3D: return vk::ImageType::e3D; + case SurfaceTarget::TextureBuffer: + UNREACHABLE(); + return {}; } UNREACHABLE_MSG("Unknown texture target={}", static_cast<u32>(target)); return {}; @@ -273,7 +276,6 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) { for (u32 level = 0; level < params.num_levels; ++level) { vk::BufferImageCopy copy = GetBufferImageCopy(level); - const auto& dld = device.GetDispatchLoader(); if (image->GetAspectMask() == (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) { vk::BufferImageCopy depth = copy; @@ -422,7 +424,6 @@ void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, dst_base_layer, num_layers, copy_params.dest_level, 1, vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eTransferDstOptimal); - const auto& dld{device.GetDispatchLoader()}; const vk::ImageSubresourceLayers src_subresource( src_surface->GetAspectMask(), copy_params.source_level, copy_params.source_z, num_layers); const vk::ImageSubresourceLayers dst_subresource( @@ -458,7 +459,6 @@ void VKTextureCache::ImageBlit(View& src_view, View& dst_view, dst_view->GetImageSubresourceLayers(), {dst_top_left, dst_bot_right}); const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; - const auto& dld{device.GetDispatchLoader()}; scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit, is_linear](auto cmdbuf, auto& dld) { cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image, diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp index e02bcd097..8e3b46e8e 100644 --- a/src/video_core/shader/decode/bfe.cpp +++ b/src/video_core/shader/decode/bfe.cpp @@ -17,33 +17,60 @@ u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED_IF(instr.bfe.negate_b); - Node op_a = GetRegister(instr.gpr8); - op_a = GetOperandAbsNegInteger(op_a, false, instr.bfe.negate_a, false); - - switch (opcode->get().GetId()) { - case OpCode::Id::BFE_IMM: { - UNIMPLEMENTED_IF_MSG(instr.generates_cc, - "Condition codes generation in BFE is not implemented"); + Node op_b = [&] { + switch (opcode->get().GetId()) { + case OpCode::Id::BFE_R: + return GetRegister(instr.gpr20); + case OpCode::Id::BFE_C: + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); + case OpCode::Id::BFE_IMM: + return Immediate(instr.alu.GetSignedImm20_20()); + default: + UNREACHABLE(); + return Immediate(0); + } + }(); - const Node inner_shift_imm = Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue())); - const Node outer_shift_imm = - Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position)); + UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented"); - const Node inner_shift = - Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, inner_shift_imm); - const Node outer_shift = - Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm); + const bool is_signed = instr.bfe.is_signed; - SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc); - SetRegister(bb, instr.gpr0, outer_shift); - break; - } - default: - UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName()); + // using reverse parallel method in + // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel + // note for later if possible to implement faster method. + if (instr.bfe.brev) { + const auto swap = [&](u32 s, u32 mask) { + Node v1 = + SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s)); + if (mask != 0) { + v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1), + Immediate(mask)); + } + Node v2 = op_a; + if (mask != 0) { + v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2), + Immediate(mask)); + } + v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2), + Immediate(s)); + return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1), + std::move(v2)); + }; + op_a = swap(1, 0x55555555U); + op_a = swap(2, 0x33333333U); + op_a = swap(4, 0x0F0F0F0FU); + op_a = swap(8, 0x00FF00FFU); + op_a = swap(16, 0); } + const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, + Immediate(0), Immediate(8)); + const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, + Immediate(8), Immediate(8)); + auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits); + SetRegister(bb, instr.gpr0, std::move(result)); + return pc; } diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 206961909..fbd7e9a17 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -12,6 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; +using Tegra::Shader::PredCondition; u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; @@ -63,15 +64,18 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { } }(); - op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16); + op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a), + instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16)); const Node original_b = op_b; - op_b = BitfieldExtract(op_b, is_high_b ? 16 : 0, 16); + op_b = SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b), + is_high_b ? Immediate(16) : Immediate(0), Immediate(16)); - // TODO(Rodrigo): Use an appropiate sign for this operation - Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b); + // we already check sign_a and sign_b is difference or not before so just use one in here. + Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b); if (is_psl) { - product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); + product = + SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16)); } SetTemporary(bb, 0, product); product = GetTemporary(0); @@ -88,12 +92,40 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { return BitfieldExtract(original_c, 16, 16); case Tegra::Shader::XmadMode::CBcc: { const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, - NO_PRECISE, original_b, Immediate(16)); - return SignedOperation(OperationCode::IAdd, is_signed_c, NO_PRECISE, original_c, - shifted_b); + original_b, Immediate(16)); + return SignedOperation(OperationCode::IAdd, is_signed_c, original_c, shifted_b); + } + case Tegra::Shader::XmadMode::CSfu: { + const Node comp_a = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_a, + op_a, Immediate(0)); + const Node comp_b = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_b, + op_b, Immediate(0)); + const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b); + + const Node comp_minus_a = GetPredicateComparisonInteger( + PredCondition::NotEqual, is_signed_a, + SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a, + Immediate(0x80000000)), + Immediate(0)); + const Node comp_minus_b = GetPredicateComparisonInteger( + PredCondition::NotEqual, is_signed_b, + SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b, + Immediate(0x80000000)), + Immediate(0)); + + Node new_c = Operation( + OperationCode::Select, comp_minus_a, + SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)), + original_c); + new_c = Operation( + OperationCode::Select, comp_minus_b, + SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)), + std::move(new_c)); + + return Operation(OperationCode::Select, comp, original_c, std::move(new_c)); } default: - UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}", static_cast<u32>(instr.xmad.mode.Value())); + UNREACHABLE(); return Immediate(0); } }(); @@ -102,18 +134,19 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { op_c = GetTemporary(1); // TODO(Rodrigo): Use an appropiate sign for this operation - Node sum = Operation(OperationCode::IAdd, product, op_c); + Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c)); SetTemporary(bb, 2, sum); sum = GetTemporary(2); if (is_merge) { - const Node a = BitfieldExtract(sum, 0, 16); - const Node b = - Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, original_b, Immediate(16)); - sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b); + const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum), + Immediate(0), Immediate(16)); + const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b, + Immediate(16)); + sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b); } SetInternalFlagsFromInteger(bb, sum, instr.generates_cc); - SetRegister(bb, instr.gpr0, sum); + SetRegister(bb, instr.gpr0, std::move(sum)); return pc; } diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp index b3dcd291c..76c56abb5 100644 --- a/src/video_core/shader/node_helper.cpp +++ b/src/video_core/shader/node_helper.cpp @@ -68,6 +68,8 @@ OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) return OperationCode::UBitwiseXor; case OperationCode::IBitwiseNot: return OperationCode::UBitwiseNot; + case OperationCode::IBitfieldExtract: + return OperationCode::UBitfieldExtract; case OperationCode::IBitfieldInsert: return OperationCode::UBitfieldInsert; case OperationCode::IBitCount: diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 9707c353d..cc7181229 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -111,6 +111,8 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) return PixelFormat::RGBA16F; case Tegra::RenderTargetFormat::RGBA16_UNORM: return PixelFormat::RGBA16U; + case Tegra::RenderTargetFormat::RGBA16_SNORM: + return PixelFormat::RGBA16S; case Tegra::RenderTargetFormat::RGBA16_UINT: return PixelFormat::RGBA16UI; case Tegra::RenderTargetFormat::RGBA32_FLOAT: diff --git a/src/video_core/surface.h b/src/video_core/surface.h index d88109e5a..ae8817465 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -25,82 +25,83 @@ enum class PixelFormat { R8UI = 7, RGBA16F = 8, RGBA16U = 9, - RGBA16UI = 10, - R11FG11FB10F = 11, - RGBA32UI = 12, - DXT1 = 13, - DXT23 = 14, - DXT45 = 15, - DXN1 = 16, // This is also known as BC4 - DXN2UNORM = 17, - DXN2SNORM = 18, - BC7U = 19, - BC6H_UF16 = 20, - BC6H_SF16 = 21, - ASTC_2D_4X4 = 22, - BGRA8 = 23, - RGBA32F = 24, - RG32F = 25, - R32F = 26, - R16F = 27, - R16U = 28, - R16S = 29, - R16UI = 30, - R16I = 31, - RG16 = 32, - RG16F = 33, - RG16UI = 34, - RG16I = 35, - RG16S = 36, - RGB32F = 37, - RGBA8_SRGB = 38, - RG8U = 39, - RG8S = 40, - RG32UI = 41, - RGBX16F = 42, - R32UI = 43, - R32I = 44, - ASTC_2D_8X8 = 45, - ASTC_2D_8X5 = 46, - ASTC_2D_5X4 = 47, - BGRA8_SRGB = 48, - DXT1_SRGB = 49, - DXT23_SRGB = 50, - DXT45_SRGB = 51, - BC7U_SRGB = 52, - R4G4B4A4U = 53, - ASTC_2D_4X4_SRGB = 54, - ASTC_2D_8X8_SRGB = 55, - ASTC_2D_8X5_SRGB = 56, - ASTC_2D_5X4_SRGB = 57, - ASTC_2D_5X5 = 58, - ASTC_2D_5X5_SRGB = 59, - ASTC_2D_10X8 = 60, - ASTC_2D_10X8_SRGB = 61, - ASTC_2D_6X6 = 62, - ASTC_2D_6X6_SRGB = 63, - ASTC_2D_10X10 = 64, - ASTC_2D_10X10_SRGB = 65, - ASTC_2D_12X12 = 66, - ASTC_2D_12X12_SRGB = 67, - ASTC_2D_8X6 = 68, - ASTC_2D_8X6_SRGB = 69, - ASTC_2D_6X5 = 70, - ASTC_2D_6X5_SRGB = 71, - E5B9G9R9F = 72, + RGBA16S = 10, + RGBA16UI = 11, + R11FG11FB10F = 12, + RGBA32UI = 13, + DXT1 = 14, + DXT23 = 15, + DXT45 = 16, + DXN1 = 17, // This is also known as BC4 + DXN2UNORM = 18, + DXN2SNORM = 19, + BC7U = 20, + BC6H_UF16 = 21, + BC6H_SF16 = 22, + ASTC_2D_4X4 = 23, + BGRA8 = 24, + RGBA32F = 25, + RG32F = 26, + R32F = 27, + R16F = 28, + R16U = 29, + R16S = 30, + R16UI = 31, + R16I = 32, + RG16 = 33, + RG16F = 34, + RG16UI = 35, + RG16I = 36, + RG16S = 37, + RGB32F = 38, + RGBA8_SRGB = 39, + RG8U = 40, + RG8S = 41, + RG32UI = 42, + RGBX16F = 43, + R32UI = 44, + R32I = 45, + ASTC_2D_8X8 = 46, + ASTC_2D_8X5 = 47, + ASTC_2D_5X4 = 48, + BGRA8_SRGB = 49, + DXT1_SRGB = 50, + DXT23_SRGB = 51, + DXT45_SRGB = 52, + BC7U_SRGB = 53, + R4G4B4A4U = 54, + ASTC_2D_4X4_SRGB = 55, + ASTC_2D_8X8_SRGB = 56, + ASTC_2D_8X5_SRGB = 57, + ASTC_2D_5X4_SRGB = 58, + ASTC_2D_5X5 = 59, + ASTC_2D_5X5_SRGB = 60, + ASTC_2D_10X8 = 61, + ASTC_2D_10X8_SRGB = 62, + ASTC_2D_6X6 = 63, + ASTC_2D_6X6_SRGB = 64, + ASTC_2D_10X10 = 65, + ASTC_2D_10X10_SRGB = 66, + ASTC_2D_12X12 = 67, + ASTC_2D_12X12_SRGB = 68, + ASTC_2D_8X6 = 69, + ASTC_2D_8X6_SRGB = 70, + ASTC_2D_6X5 = 71, + ASTC_2D_6X5_SRGB = 72, + E5B9G9R9F = 73, MaxColorFormat, // Depth formats - Z32F = 73, - Z16 = 74, + Z32F = 74, + Z16 = 75, MaxDepthFormat, // DepthStencil formats - Z24S8 = 75, - S8Z24 = 76, - Z32FS8 = 77, + Z24S8 = 76, + S8Z24 = 77, + Z32FS8 = 78, MaxDepthStencilFormat, @@ -138,6 +139,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{ 0, // R8UI 0, // RGBA16F 0, // RGBA16U + 0, // RGBA16S 0, // RGBA16UI 0, // R11FG11FB10F 0, // RGBA32UI @@ -235,6 +237,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{ 1, // R8UI 1, // RGBA16F 1, // RGBA16U + 1, // RGBA16S 1, // RGBA16UI 1, // R11FG11FB10F 1, // RGBA32UI @@ -324,6 +327,7 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{ 1, // R8UI 1, // RGBA16F 1, // RGBA16U + 1, // RGBA16S 1, // RGBA16UI 1, // R11FG11FB10F 1, // RGBA32UI @@ -413,6 +417,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{ 8, // R8UI 64, // RGBA16F 64, // RGBA16U + 64, // RGBA16S 64, // RGBA16UI 32, // R11FG11FB10F 128, // RGBA32UI @@ -517,6 +522,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table SurfaceCompression::None, // R8UI SurfaceCompression::None, // RGBA16F SurfaceCompression::None, // RGBA16U + SurfaceCompression::None, // RGBA16S SurfaceCompression::None, // RGBA16UI SurfaceCompression::None, // R11FG11FB10F SurfaceCompression::None, // RGBA32UI diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index cc3ad8417..e151c26c4 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -41,7 +41,7 @@ struct Table { ComponentType alpha_component; bool is_srgb; }; -constexpr std::array<Table, 75> DefinitionTable = {{ +constexpr std::array<Table, 76> DefinitionTable = {{ {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, @@ -61,6 +61,7 @@ constexpr std::array<Table, 75> DefinitionTable = {{ {TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U}, {TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S}, + {TextureFormat::R16_G16_B16_A16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RGBA16S}, {TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U}, {TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F}, {TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI}, diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 33bd31865..062b4f252 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -17,26 +17,37 @@ #include <algorithm> #include <cassert> -#include <cstdint> #include <cstring> #include <vector> +#include "common/common_types.h" + #include "video_core/textures/astc.h" +namespace { + +/// Count the number of bits set in a number. +constexpr u32 Popcnt(u32 n) { + u32 c = 0; + for (; n; c++) { + n &= n - 1; + } + return c; +} + +} // Anonymous namespace + class InputBitStream { public: - explicit InputBitStream(const unsigned char* ptr, int start_offset = 0) + explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0) : m_CurByte(ptr), m_NextBit(start_offset % 8) {} - ~InputBitStream() = default; - - int GetBitsRead() const { + std::size_t GetBitsRead() const { return m_BitsRead; } - int ReadBit() { - - int bit = *m_CurByte >> m_NextBit++; + u32 ReadBit() { + u32 bit = *m_CurByte >> m_NextBit++; while (m_NextBit >= 8) { m_NextBit -= 8; m_CurByte++; @@ -46,57 +57,66 @@ public: return bit & 1; } - unsigned int ReadBits(unsigned int nBits) { - unsigned int ret = 0; - for (unsigned int i = 0; i < nBits; i++) { + u32 ReadBits(std::size_t nBits) { + u32 ret = 0; + for (std::size_t i = 0; i < nBits; ++i) { + ret |= (ReadBit() & 1) << i; + } + return ret; + } + + template <std::size_t nBits> + u32 ReadBits() { + u32 ret = 0; + for (std::size_t i = 0; i < nBits; ++i) { ret |= (ReadBit() & 1) << i; } return ret; } private: - const unsigned char* m_CurByte; - int m_NextBit = 0; - int m_BitsRead = 0; + const u8* m_CurByte; + std::size_t m_NextBit = 0; + std::size_t m_BitsRead = 0; }; class OutputBitStream { public: - explicit OutputBitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0) + explicit OutputBitStream(u8* ptr, s32 nBits = 0, s32 start_offset = 0) : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} ~OutputBitStream() = default; - int GetBitsWritten() const { + s32 GetBitsWritten() const { return m_BitsWritten; } - void WriteBitsR(unsigned int val, unsigned int nBits) { - for (unsigned int i = 0; i < nBits; i++) { + void WriteBitsR(u32 val, u32 nBits) { + for (u32 i = 0; i < nBits; i++) { WriteBit((val >> (nBits - i - 1)) & 1); } } - void WriteBits(unsigned int val, unsigned int nBits) { - for (unsigned int i = 0; i < nBits; i++) { + void WriteBits(u32 val, u32 nBits) { + for (u32 i = 0; i < nBits; i++) { WriteBit((val >> i) & 1); } } private: - void WriteBit(int b) { + void WriteBit(s32 b) { if (done) return; - const unsigned int mask = 1 << m_NextBit++; + const u32 mask = 1 << m_NextBit++; // clear the bit - *m_CurByte &= static_cast<unsigned char>(~mask); + *m_CurByte &= static_cast<u8>(~mask); // Write the bit, if necessary if (b) - *m_CurByte |= static_cast<unsigned char>(mask); + *m_CurByte |= static_cast<u8>(mask); // Next byte? if (m_NextBit >= 8) { @@ -107,10 +127,10 @@ private: done = done || ++m_BitsWritten >= m_NumBits; } - int m_BitsWritten = 0; - const int m_NumBits; - unsigned char* m_CurByte; - int m_NextBit = 0; + s32 m_BitsWritten = 0; + const s32 m_NumBits; + u8* m_CurByte; + s32 m_NextBit = 0; bool done = false; }; @@ -123,20 +143,20 @@ public: Bits(const Bits&) = delete; Bits& operator=(const Bits&) = delete; - uint8_t operator[](uint32_t bitPos) const { - return static_cast<uint8_t>((m_Bits >> bitPos) & 1); + u8 operator[](u32 bitPos) const { + return static_cast<u8>((m_Bits >> bitPos) & 1); } - IntType operator()(uint32_t start, uint32_t end) const { + IntType operator()(u32 start, u32 end) const { if (start == end) { return (*this)[start]; } else if (start > end) { - uint32_t t = start; + u32 t = start; start = end; end = t; } - uint64_t mask = (1 << (end - start + 1)) - 1; + u64 mask = (1 << (end - start + 1)) - 1; return (m_Bits >> start) & static_cast<IntType>(mask); } @@ -144,273 +164,236 @@ private: const IntType& m_Bits; }; -enum EIntegerEncoding { eIntegerEncoding_JustBits, eIntegerEncoding_Quint, eIntegerEncoding_Trit }; - -class IntegerEncodedValue { -private: - const EIntegerEncoding m_Encoding; - const uint32_t m_NumBits; - uint32_t m_BitValue; - union { - uint32_t m_QuintValue; - uint32_t m_TritValue; - }; +enum class IntegerEncoding { JustBits, Qus32, Trit }; -public: - // Jank, but we're not doing any heavy lifting in this class, so it's - // probably OK. It allows us to use these in std::vectors... - IntegerEncodedValue& operator=(const IntegerEncodedValue& other) { - new (this) IntegerEncodedValue(other); - return *this; - } +struct IntegerEncodedValue { + constexpr IntegerEncodedValue() = default; - IntegerEncodedValue(EIntegerEncoding encoding, uint32_t numBits) - : m_Encoding(encoding), m_NumBits(numBits) {} + constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_) + : encoding{encoding_}, num_bits{num_bits_} {} - EIntegerEncoding GetEncoding() const { - return m_Encoding; - } - uint32_t BaseBitLength() const { - return m_NumBits; - } - - uint32_t GetBitValue() const { - return m_BitValue; - } - void SetBitValue(uint32_t val) { - m_BitValue = val; - } - - uint32_t GetTritValue() const { - return m_TritValue; - } - void SetTritValue(uint32_t val) { - m_TritValue = val; - } - - uint32_t GetQuintValue() const { - return m_QuintValue; - } - void SetQuintValue(uint32_t val) { - m_QuintValue = val; - } - - bool MatchesEncoding(const IntegerEncodedValue& other) const { - return m_Encoding == other.m_Encoding && m_NumBits == other.m_NumBits; + constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const { + return encoding == other.encoding && num_bits == other.num_bits; } // Returns the number of bits required to encode nVals values. - uint32_t GetBitLength(uint32_t nVals) const { - uint32_t totalBits = m_NumBits * nVals; - if (m_Encoding == eIntegerEncoding_Trit) { + u32 GetBitLength(u32 nVals) const { + u32 totalBits = num_bits * nVals; + if (encoding == IntegerEncoding::Trit) { totalBits += (nVals * 8 + 4) / 5; - } else if (m_Encoding == eIntegerEncoding_Quint) { + } else if (encoding == IntegerEncoding::Qus32) { totalBits += (nVals * 7 + 2) / 3; } return totalBits; } - // Count the number of bits set in a number. - static inline uint32_t Popcnt(uint32_t n) { - uint32_t c; - for (c = 0; n; c++) { - n &= n - 1; + IntegerEncoding encoding{}; + u32 num_bits = 0; + u32 bit_value = 0; + union { + u32 qus32_value = 0; + u32 trit_value; + }; +}; + +static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, + u32 nBitsPerValue) { + // Implement the algorithm in section C.2.12 + u32 m[5]; + u32 t[5]; + u32 T; + + // Read the trit encoded block according to + // table C.2.14 + m[0] = bits.ReadBits(nBitsPerValue); + T = bits.ReadBits<2>(); + m[1] = bits.ReadBits(nBitsPerValue); + T |= bits.ReadBits<2>() << 2; + m[2] = bits.ReadBits(nBitsPerValue); + T |= bits.ReadBit() << 4; + m[3] = bits.ReadBits(nBitsPerValue); + T |= bits.ReadBits<2>() << 5; + m[4] = bits.ReadBits(nBitsPerValue); + T |= bits.ReadBit() << 7; + + u32 C = 0; + + Bits<u32> Tb(T); + if (Tb(2, 4) == 7) { + C = (Tb(5, 7) << 2) | Tb(0, 1); + t[4] = t[3] = 2; + } else { + C = Tb(0, 4); + if (Tb(5, 6) == 3) { + t[4] = 2; + t[3] = Tb[7]; + } else { + t[4] = Tb[7]; + t[3] = Tb(5, 6); } - return c; } - // Returns a new instance of this struct that corresponds to the - // can take no more than maxval values - static IntegerEncodedValue CreateEncoding(uint32_t maxVal) { - while (maxVal > 0) { - uint32_t check = maxVal + 1; - - // Is maxVal a power of two? - if (!(check & (check - 1))) { - return IntegerEncodedValue(eIntegerEncoding_JustBits, Popcnt(maxVal)); - } - - // Is maxVal of the type 3*2^n - 1? - if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) { - return IntegerEncodedValue(eIntegerEncoding_Trit, Popcnt(check / 3 - 1)); - } + Bits<u32> Cb(C); + if (Cb(0, 1) == 3) { + t[2] = 2; + t[1] = Cb[4]; + t[0] = (Cb[3] << 1) | (Cb[2] & ~Cb[3]); + } else if (Cb(2, 3) == 3) { + t[2] = 2; + t[1] = 2; + t[0] = Cb(0, 1); + } else { + t[2] = Cb[4]; + t[1] = Cb(2, 3); + t[0] = (Cb[1] << 1) | (Cb[0] & ~Cb[1]); + } - // Is maxVal of the type 5*2^n - 1? - if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) { - return IntegerEncodedValue(eIntegerEncoding_Quint, Popcnt(check / 5 - 1)); - } + for (std::size_t i = 0; i < 5; ++i) { + IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Trit, nBitsPerValue); + val.bit_value = m[i]; + val.trit_value = t[i]; + } +} - // Apparently it can't be represented with a bounded integer sequence... - // just iterate. - maxVal--; +static void DecodeQus32Block(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, + u32 nBitsPerValue) { + // Implement the algorithm in section C.2.12 + u32 m[3]; + u32 q[3]; + u32 Q; + + // Read the trit encoded block according to + // table C.2.15 + m[0] = bits.ReadBits(nBitsPerValue); + Q = bits.ReadBits<3>(); + m[1] = bits.ReadBits(nBitsPerValue); + Q |= bits.ReadBits<2>() << 3; + m[2] = bits.ReadBits(nBitsPerValue); + Q |= bits.ReadBits<2>() << 5; + + Bits<u32> Qb(Q); + if (Qb(1, 2) == 3 && Qb(5, 6) == 0) { + q[0] = q[1] = 4; + q[2] = (Qb[0] << 2) | ((Qb[4] & ~Qb[0]) << 1) | (Qb[3] & ~Qb[0]); + } else { + u32 C = 0; + if (Qb(1, 2) == 3) { + q[2] = 4; + C = (Qb(3, 4) << 3) | ((~Qb(5, 6) & 3) << 1) | Qb[0]; + } else { + q[2] = Qb(5, 6); + C = Qb(0, 4); } - return IntegerEncodedValue(eIntegerEncoding_JustBits, 0); - } - - // Fills result with the values that are encoded in the given - // bitstream. We must know beforehand what the maximum possible - // value is, and how many values we're decoding. - static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, - InputBitStream& bits, uint32_t maxRange, uint32_t nValues) { - // Determine encoding parameters - IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange); - - // Start decoding - uint32_t nValsDecoded = 0; - while (nValsDecoded < nValues) { - switch (val.GetEncoding()) { - case eIntegerEncoding_Quint: - DecodeQuintBlock(bits, result, val.BaseBitLength()); - nValsDecoded += 3; - break; - case eIntegerEncoding_Trit: - DecodeTritBlock(bits, result, val.BaseBitLength()); - nValsDecoded += 5; - break; - - case eIntegerEncoding_JustBits: - val.SetBitValue(bits.ReadBits(val.BaseBitLength())); - result.push_back(val); - nValsDecoded++; - break; - } + Bits<u32> Cb(C); + if (Cb(0, 2) == 5) { + q[1] = 4; + q[0] = Cb(3, 4); + } else { + q[1] = Cb(3, 4); + q[0] = Cb(0, 2); } } -private: - static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, - uint32_t nBitsPerValue) { - // Implement the algorithm in section C.2.12 - uint32_t m[5]; - uint32_t t[5]; - uint32_t T; - - // Read the trit encoded block according to - // table C.2.14 - m[0] = bits.ReadBits(nBitsPerValue); - T = bits.ReadBits(2); - m[1] = bits.ReadBits(nBitsPerValue); - T |= bits.ReadBits(2) << 2; - m[2] = bits.ReadBits(nBitsPerValue); - T |= bits.ReadBit() << 4; - m[3] = bits.ReadBits(nBitsPerValue); - T |= bits.ReadBits(2) << 5; - m[4] = bits.ReadBits(nBitsPerValue); - T |= bits.ReadBit() << 7; - - uint32_t C = 0; - - Bits<uint32_t> Tb(T); - if (Tb(2, 4) == 7) { - C = (Tb(5, 7) << 2) | Tb(0, 1); - t[4] = t[3] = 2; - } else { - C = Tb(0, 4); - if (Tb(5, 6) == 3) { - t[4] = 2; - t[3] = Tb[7]; - } else { - t[4] = Tb[7]; - t[3] = Tb(5, 6); - } + for (std::size_t i = 0; i < 3; ++i) { + IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Qus32, nBitsPerValue); + val.bit_value = m[i]; + val.qus32_value = q[i]; + } +} + +// Returns a new instance of this struct that corresponds to the +// can take no more than maxval values +static constexpr IntegerEncodedValue CreateEncoding(u32 maxVal) { + while (maxVal > 0) { + u32 check = maxVal + 1; + + // Is maxVal a power of two? + if (!(check & (check - 1))) { + return IntegerEncodedValue(IntegerEncoding::JustBits, Popcnt(maxVal)); } - Bits<uint32_t> Cb(C); - if (Cb(0, 1) == 3) { - t[2] = 2; - t[1] = Cb[4]; - t[0] = (Cb[3] << 1) | (Cb[2] & ~Cb[3]); - } else if (Cb(2, 3) == 3) { - t[2] = 2; - t[1] = 2; - t[0] = Cb(0, 1); - } else { - t[2] = Cb[4]; - t[1] = Cb(2, 3); - t[0] = (Cb[1] << 1) | (Cb[0] & ~Cb[1]); + // Is maxVal of the type 3*2^n - 1? + if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) { + return IntegerEncodedValue(IntegerEncoding::Trit, Popcnt(check / 3 - 1)); } - for (uint32_t i = 0; i < 5; i++) { - IntegerEncodedValue val(eIntegerEncoding_Trit, nBitsPerValue); - val.SetBitValue(m[i]); - val.SetTritValue(t[i]); - result.push_back(val); + // Is maxVal of the type 5*2^n - 1? + if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) { + return IntegerEncodedValue(IntegerEncoding::Qus32, Popcnt(check / 5 - 1)); } + + // Apparently it can't be represented with a bounded integer sequence... + // just iterate. + maxVal--; } + return IntegerEncodedValue(IntegerEncoding::JustBits, 0); +} - static void DecodeQuintBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, - uint32_t nBitsPerValue) { - // Implement the algorithm in section C.2.12 - uint32_t m[3]; - uint32_t q[3]; - uint32_t Q; - - // Read the trit encoded block according to - // table C.2.15 - m[0] = bits.ReadBits(nBitsPerValue); - Q = bits.ReadBits(3); - m[1] = bits.ReadBits(nBitsPerValue); - Q |= bits.ReadBits(2) << 3; - m[2] = bits.ReadBits(nBitsPerValue); - Q |= bits.ReadBits(2) << 5; - - Bits<uint32_t> Qb(Q); - if (Qb(1, 2) == 3 && Qb(5, 6) == 0) { - q[0] = q[1] = 4; - q[2] = (Qb[0] << 2) | ((Qb[4] & ~Qb[0]) << 1) | (Qb[3] & ~Qb[0]); - } else { - uint32_t C = 0; - if (Qb(1, 2) == 3) { - q[2] = 4; - C = (Qb(3, 4) << 3) | ((~Qb(5, 6) & 3) << 1) | Qb[0]; - } else { - q[2] = Qb(5, 6); - C = Qb(0, 4); - } +static constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() { + std::array<IntegerEncodedValue, 256> encodings{}; + for (std::size_t i = 0; i < encodings.size(); ++i) { + encodings[i] = CreateEncoding(static_cast<u32>(i)); + } + return encodings; +} - Bits<uint32_t> Cb(C); - if (Cb(0, 2) == 5) { - q[1] = 4; - q[0] = Cb(3, 4); - } else { - q[1] = Cb(3, 4); - q[0] = Cb(0, 2); - } - } +static constexpr std::array EncodingsValues = MakeEncodedValues(); + +// Fills result with the values that are encoded in the given +// bitstream. We must know beforehand what the maximum possible +// value is, and how many values we're decoding. +static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, InputBitStream& bits, + u32 maxRange, u32 nValues) { + // Determine encoding parameters + IntegerEncodedValue val = EncodingsValues[maxRange]; + + // Start decoding + u32 nValsDecoded = 0; + while (nValsDecoded < nValues) { + switch (val.encoding) { + case IntegerEncoding::Qus32: + DecodeQus32Block(bits, result, val.num_bits); + nValsDecoded += 3; + break; + + case IntegerEncoding::Trit: + DecodeTritBlock(bits, result, val.num_bits); + nValsDecoded += 5; + break; - for (uint32_t i = 0; i < 3; i++) { - IntegerEncodedValue val(eIntegerEncoding_Quint, nBitsPerValue); - val.m_BitValue = m[i]; - val.m_QuintValue = q[i]; + case IntegerEncoding::JustBits: + val.bit_value = bits.ReadBits(val.num_bits); result.push_back(val); + nValsDecoded++; + break; } } -}; +} namespace ASTCC { struct TexelWeightParams { - uint32_t m_Width = 0; - uint32_t m_Height = 0; + u32 m_Width = 0; + u32 m_Height = 0; bool m_bDualPlane = false; - uint32_t m_MaxWeight = 0; + u32 m_MaxWeight = 0; bool m_bError = false; bool m_bVoidExtentLDR = false; bool m_bVoidExtentHDR = false; - uint32_t GetPackedBitSize() const { + u32 GetPackedBitSize() const { // How many indices do we have? - uint32_t nIdxs = m_Height * m_Width; + u32 nIdxs = m_Height * m_Width; if (m_bDualPlane) { nIdxs *= 2; } - return IntegerEncodedValue::CreateEncoding(m_MaxWeight).GetBitLength(nIdxs); + return EncodingsValues[m_MaxWeight].GetBitLength(nIdxs); } - uint32_t GetNumWeightValues() const { - uint32_t ret = m_Width * m_Height; + u32 GetNumWeightValues() const { + u32 ret = m_Width * m_Height; if (m_bDualPlane) { ret *= 2; } @@ -422,7 +405,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { TexelWeightParams params; // Read the entire block mode all at once - uint16_t modeBits = static_cast<uint16_t>(strm.ReadBits(11)); + u16 modeBits = static_cast<u16>(strm.ReadBits<11>()); // Does this match the void extent block mode? if ((modeBits & 0x01FF) == 0x1FC) { @@ -457,7 +440,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { // of the block mode. Layout is determined by a number // between 0 and 9 corresponding to table C.2.8 of the // ASTC spec. - uint32_t layout = 0; + u32 layout = 0; if ((modeBits & 0x1) || (modeBits & 0x2)) { // layout is in [0-4] @@ -509,7 +492,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { assert(layout < 10); // Determine R - uint32_t R = !!(modeBits & 0x10); + u32 R = !!(modeBits & 0x10); if (layout < 5) { R |= (modeBits & 0x3) << 1; } else { @@ -520,54 +503,54 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { // Determine width & height switch (layout) { case 0: { - uint32_t A = (modeBits >> 5) & 0x3; - uint32_t B = (modeBits >> 7) & 0x3; + u32 A = (modeBits >> 5) & 0x3; + u32 B = (modeBits >> 7) & 0x3; params.m_Width = B + 4; params.m_Height = A + 2; break; } case 1: { - uint32_t A = (modeBits >> 5) & 0x3; - uint32_t B = (modeBits >> 7) & 0x3; + u32 A = (modeBits >> 5) & 0x3; + u32 B = (modeBits >> 7) & 0x3; params.m_Width = B + 8; params.m_Height = A + 2; break; } case 2: { - uint32_t A = (modeBits >> 5) & 0x3; - uint32_t B = (modeBits >> 7) & 0x3; + u32 A = (modeBits >> 5) & 0x3; + u32 B = (modeBits >> 7) & 0x3; params.m_Width = A + 2; params.m_Height = B + 8; break; } case 3: { - uint32_t A = (modeBits >> 5) & 0x3; - uint32_t B = (modeBits >> 7) & 0x1; + u32 A = (modeBits >> 5) & 0x3; + u32 B = (modeBits >> 7) & 0x1; params.m_Width = A + 2; params.m_Height = B + 6; break; } case 4: { - uint32_t A = (modeBits >> 5) & 0x3; - uint32_t B = (modeBits >> 7) & 0x1; + u32 A = (modeBits >> 5) & 0x3; + u32 B = (modeBits >> 7) & 0x1; params.m_Width = B + 2; params.m_Height = A + 2; break; } case 5: { - uint32_t A = (modeBits >> 5) & 0x3; + u32 A = (modeBits >> 5) & 0x3; params.m_Width = 12; params.m_Height = A + 2; break; } case 6: { - uint32_t A = (modeBits >> 5) & 0x3; + u32 A = (modeBits >> 5) & 0x3; params.m_Width = A + 2; params.m_Height = 12; break; @@ -586,15 +569,15 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { } case 9: { - uint32_t A = (modeBits >> 5) & 0x3; - uint32_t B = (modeBits >> 9) & 0x3; + u32 A = (modeBits >> 5) & 0x3; + u32 B = (modeBits >> 9) & 0x3; params.m_Width = A + 6; params.m_Height = B + 6; break; } default: - assert(!"Don't know this layout..."); + assert(false && "Don't know this layout..."); params.m_bError = true; break; } @@ -605,10 +588,10 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { bool H = (layout != 9) && (modeBits & 0x200); if (H) { - const uint32_t maxWeights[6] = {9, 11, 15, 19, 23, 31}; + const u32 maxWeights[6] = {9, 11, 15, 19, 23, 31}; params.m_MaxWeight = maxWeights[R - 2]; } else { - const uint32_t maxWeights[6] = {1, 2, 3, 4, 5, 7}; + const u32 maxWeights[6] = {1, 2, 3, 4, 5, 7}; params.m_MaxWeight = maxWeights[R - 2]; } @@ -617,32 +600,32 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { return params; } -static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint32_t blockWidth, - uint32_t blockHeight) { +static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 blockWidth, + u32 blockHeight) { // Don't actually care about the void extent, just read the bits... - for (int i = 0; i < 4; ++i) { - strm.ReadBits(13); + for (s32 i = 0; i < 4; ++i) { + strm.ReadBits<13>(); } // Decode the RGBA components and renormalize them to the range [0, 255] - uint16_t r = static_cast<uint16_t>(strm.ReadBits(16)); - uint16_t g = static_cast<uint16_t>(strm.ReadBits(16)); - uint16_t b = static_cast<uint16_t>(strm.ReadBits(16)); - uint16_t a = static_cast<uint16_t>(strm.ReadBits(16)); + u16 r = static_cast<u16>(strm.ReadBits<16>()); + u16 g = static_cast<u16>(strm.ReadBits<16>()); + u16 b = static_cast<u16>(strm.ReadBits<16>()); + u16 a = static_cast<u16>(strm.ReadBits<16>()); - uint32_t rgba = (r >> 8) | (g & 0xFF00) | (static_cast<uint32_t>(b) & 0xFF00) << 8 | - (static_cast<uint32_t>(a) & 0xFF00) << 16; + u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast<u32>(b) & 0xFF00) << 8 | + (static_cast<u32>(a) & 0xFF00) << 16; - for (uint32_t j = 0; j < blockHeight; j++) { - for (uint32_t i = 0; i < blockWidth; i++) { + for (u32 j = 0; j < blockHeight; j++) { + for (u32 i = 0; i < blockWidth; i++) { outBuf[j * blockWidth + i] = rgba; } } } -static void FillError(uint32_t* outBuf, uint32_t blockWidth, uint32_t blockHeight) { - for (uint32_t j = 0; j < blockHeight; j++) { - for (uint32_t i = 0; i < blockWidth; i++) { +static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) { + for (u32 j = 0; j < blockHeight; j++) { + for (u32 i = 0; i < blockWidth; i++) { outBuf[j * blockWidth + i] = 0xFFFF00FF; } } @@ -651,18 +634,18 @@ static void FillError(uint32_t* outBuf, uint32_t blockWidth, uint32_t blockHeigh // Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)] // is the same as [(numBits - 1):0] and repeats all the way down. template <typename IntType> -static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) { +static IntType Replicate(IntType val, u32 numBits, u32 toBit) { if (numBits == 0) return 0; if (toBit == 0) return 0; IntType v = val & static_cast<IntType>((1 << numBits) - 1); IntType res = v; - uint32_t reslen = numBits; + u32 reslen = numBits; while (reslen < toBit) { - uint32_t comp = 0; + u32 comp = 0; if (numBits > toBit - reslen) { - uint32_t newshift = toBit - reslen; + u32 newshift = toBit - reslen; comp = numBits - newshift; numBits = newshift; } @@ -675,14 +658,14 @@ static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) { class Pixel { protected: - using ChannelType = int16_t; - uint8_t m_BitDepth[4] = {8, 8, 8, 8}; - int16_t color[4] = {}; + using ChannelType = s16; + u8 m_BitDepth[4] = {8, 8, 8, 8}; + s16 color[4] = {}; public: Pixel() = default; - Pixel(uint32_t a, uint32_t r, uint32_t g, uint32_t b, unsigned bitDepth = 8) - : m_BitDepth{uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth)}, + Pixel(u32 a, u32 r, u32 g, u32 b, u32 bitDepth = 8) + : m_BitDepth{u8(bitDepth), u8(bitDepth), u8(bitDepth), u8(bitDepth)}, color{static_cast<ChannelType>(a), static_cast<ChannelType>(r), static_cast<ChannelType>(g), static_cast<ChannelType>(b)} {} @@ -691,22 +674,22 @@ public: // significant bits when going from larger to smaller bit depth // or by repeating the most significant bits when going from // smaller to larger bit depths. - void ChangeBitDepth(const uint8_t (&depth)[4]) { - for (uint32_t i = 0; i < 4; i++) { + void ChangeBitDepth(const u8 (&depth)[4]) { + for (u32 i = 0; i < 4; i++) { Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]); m_BitDepth[i] = depth[i]; } } template <typename IntType> - static float ConvertChannelToFloat(IntType channel, uint8_t bitDepth) { + static float ConvertChannelToFloat(IntType channel, u8 bitDepth) { float denominator = static_cast<float>((1 << bitDepth) - 1); return static_cast<float>(channel) / denominator; } // Changes the bit depth of a single component. See the comment // above for how we do this. - static ChannelType ChangeBitDepth(Pixel::ChannelType val, uint8_t oldDepth, uint8_t newDepth) { + static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth, u8 newDepth) { assert(newDepth <= 8); assert(oldDepth <= 8); @@ -722,16 +705,15 @@ public: if (newDepth == 0) { return 0xFF; } else { - uint8_t bitsWasted = static_cast<uint8_t>(oldDepth - newDepth); - uint16_t v = static_cast<uint16_t>(val); - v = static_cast<uint16_t>((v + (1 << (bitsWasted - 1))) >> bitsWasted); - v = ::std::min<uint16_t>(::std::max<uint16_t>(0, v), - static_cast<uint16_t>((1 << newDepth) - 1)); - return static_cast<uint8_t>(v); + u8 bitsWasted = static_cast<u8>(oldDepth - newDepth); + u16 v = static_cast<u16>(val); + v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted); + v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << newDepth) - 1)); + return static_cast<u8>(v); } } - assert(!"We shouldn't get here."); + assert(false && "We shouldn't get here."); return 0; } @@ -759,15 +741,15 @@ public: ChannelType& B() { return color[3]; } - const ChannelType& Component(uint32_t idx) const { + const ChannelType& Component(u32 idx) const { return color[idx]; } - ChannelType& Component(uint32_t idx) { + ChannelType& Component(u32 idx) { return color[idx]; } - void GetBitDepth(uint8_t (&outDepth)[4]) const { - for (int i = 0; i < 4; i++) { + void GetBitDepth(u8 (&outDepth)[4]) const { + for (s32 i = 0; i < 4; i++) { outDepth[i] = m_BitDepth[i]; } } @@ -776,12 +758,12 @@ public: // and then pack each channel into an R8G8B8A8 32-bit integer. We assume // that the architecture is little-endian, so the alpha channel will end // up in the most-significant byte. - uint32_t Pack() const { + u32 Pack() const { Pixel eightBit(*this); - const uint8_t eightBitDepth[4] = {8, 8, 8, 8}; + const u8 eightBitDepth[4] = {8, 8, 8, 8}; eightBit.ChangeBitDepth(eightBitDepth); - uint32_t r = 0; + u32 r = 0; r |= eightBit.A(); r <<= 8; r |= eightBit.B(); @@ -794,7 +776,7 @@ public: // Clamps the pixel to the range [0,255] void ClampByte() { - for (uint32_t i = 0; i < 4; i++) { + for (u32 i = 0; i < 4; i++) { color[i] = (color[i] < 0) ? 0 : ((color[i] > 255) ? 255 : color[i]); } } @@ -804,24 +786,24 @@ public: } }; -static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* modes, - const uint32_t nPartitions, const uint32_t nBitsForColorData) { +static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nPartitions, + const u32 nBitsForColorData) { // First figure out how many color values we have - uint32_t nValues = 0; - for (uint32_t i = 0; i < nPartitions; i++) { + u32 nValues = 0; + for (u32 i = 0; i < nPartitions; i++) { nValues += ((modes[i] >> 2) + 1) << 1; } // Then based on the number of values and the remaining number of bits, // figure out the max value for each of them... - uint32_t range = 256; + u32 range = 256; while (--range > 0) { - IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(range); - uint32_t bitLength = val.GetBitLength(nValues); + IntegerEncodedValue val = EncodingsValues[range]; + u32 bitLength = val.GetBitLength(nValues); if (bitLength <= nBitsForColorData) { // Find the smallest possible range that matches the given encoding while (--range > 0) { - IntegerEncodedValue newval = IntegerEncodedValue::CreateEncoding(range); + IntegerEncodedValue newval = EncodingsValues[range]; if (!newval.MatchesEncoding(val)) { break; } @@ -835,12 +817,14 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode // We now have enough to decode our integer sequence. std::vector<IntegerEncodedValue> decodedColorValues; + decodedColorValues.reserve(32); + InputBitStream colorStream(data); - IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); + DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); // Once we have the decoded values, we need to dequantize them to the 0-255 range // This procedure is outlined in ASTC spec C.2.13 - uint32_t outIdx = 0; + u32 outIdx = 0; for (auto itr = decodedColorValues.begin(); itr != decodedColorValues.end(); ++itr) { // Have we already decoded all that we need? if (outIdx >= nValues) { @@ -848,25 +832,25 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode } const IntegerEncodedValue& val = *itr; - uint32_t bitlen = val.BaseBitLength(); - uint32_t bitval = val.GetBitValue(); + u32 bitlen = val.num_bits; + u32 bitval = val.bit_value; assert(bitlen >= 1); - uint32_t A = 0, B = 0, C = 0, D = 0; + u32 A = 0, B = 0, C = 0, D = 0; // A is just the lsb replicated 9 times. A = Replicate(bitval & 1, 1, 9); - switch (val.GetEncoding()) { + switch (val.encoding) { // Replicate bits - case eIntegerEncoding_JustBits: + case IntegerEncoding::JustBits: out[outIdx++] = Replicate(bitval, bitlen, 8); break; // Use algorithm in C.2.13 - case eIntegerEncoding_Trit: { + case IntegerEncoding::Trit: { - D = val.GetTritValue(); + D = val.trit_value; switch (bitlen) { case 1: { @@ -876,48 +860,48 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode case 2: { C = 93; // B = b000b0bb0 - uint32_t b = (bitval >> 1) & 1; + u32 b = (bitval >> 1) & 1; B = (b << 8) | (b << 4) | (b << 2) | (b << 1); } break; case 3: { C = 44; // B = cb000cbcb - uint32_t cb = (bitval >> 1) & 3; + u32 cb = (bitval >> 1) & 3; B = (cb << 7) | (cb << 2) | cb; } break; case 4: { C = 22; // B = dcb000dcb - uint32_t dcb = (bitval >> 1) & 7; + u32 dcb = (bitval >> 1) & 7; B = (dcb << 6) | dcb; } break; case 5: { C = 11; // B = edcb000ed - uint32_t edcb = (bitval >> 1) & 0xF; + u32 edcb = (bitval >> 1) & 0xF; B = (edcb << 5) | (edcb >> 2); } break; case 6: { C = 5; // B = fedcb000f - uint32_t fedcb = (bitval >> 1) & 0x1F; + u32 fedcb = (bitval >> 1) & 0x1F; B = (fedcb << 4) | (fedcb >> 4); } break; default: - assert(!"Unsupported trit encoding for color values!"); + assert(false && "Unsupported trit encoding for color values!"); break; } // switch(bitlen) - } // case eIntegerEncoding_Trit + } // case IntegerEncoding::Trit break; - case eIntegerEncoding_Quint: { + case IntegerEncoding::Qus32: { - D = val.GetQuintValue(); + D = val.qus32_value; switch (bitlen) { case 1: { @@ -927,41 +911,41 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode case 2: { C = 54; // B = b0000bb00 - uint32_t b = (bitval >> 1) & 1; + u32 b = (bitval >> 1) & 1; B = (b << 8) | (b << 3) | (b << 2); } break; case 3: { C = 26; // B = cb0000cbc - uint32_t cb = (bitval >> 1) & 3; + u32 cb = (bitval >> 1) & 3; B = (cb << 7) | (cb << 1) | (cb >> 1); } break; case 4: { C = 13; // B = dcb0000dc - uint32_t dcb = (bitval >> 1) & 7; + u32 dcb = (bitval >> 1) & 7; B = (dcb << 6) | (dcb >> 1); } break; case 5: { C = 6; // B = edcb0000e - uint32_t edcb = (bitval >> 1) & 0xF; + u32 edcb = (bitval >> 1) & 0xF; B = (edcb << 5) | (edcb >> 3); } break; default: - assert(!"Unsupported quint encoding for color values!"); + assert(false && "Unsupported quint encoding for color values!"); break; } // switch(bitlen) - } // case eIntegerEncoding_Quint + } // case IntegerEncoding::Qus32 break; - } // switch(val.GetEncoding()) + } // switch(val.encoding) - if (val.GetEncoding() != eIntegerEncoding_JustBits) { - uint32_t T = D * C + B; + if (val.encoding != IntegerEncoding::JustBits) { + u32 T = D * C + B; T ^= A; T = (A & 0x80) | (T >> 2); out[outIdx++] = T; @@ -969,31 +953,31 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode } // Make sure that each of our values is in the proper range... - for (uint32_t i = 0; i < nValues; i++) { + for (u32 i = 0; i < nValues; i++) { assert(out[i] <= 255); } } -static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) { - uint32_t bitval = val.GetBitValue(); - uint32_t bitlen = val.BaseBitLength(); +static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) { + u32 bitval = val.bit_value; + u32 bitlen = val.num_bits; - uint32_t A = Replicate(bitval & 1, 1, 7); - uint32_t B = 0, C = 0, D = 0; + u32 A = Replicate(bitval & 1, 1, 7); + u32 B = 0, C = 0, D = 0; - uint32_t result = 0; - switch (val.GetEncoding()) { - case eIntegerEncoding_JustBits: + u32 result = 0; + switch (val.encoding) { + case IntegerEncoding::JustBits: result = Replicate(bitval, bitlen, 6); break; - case eIntegerEncoding_Trit: { - D = val.GetTritValue(); + case IntegerEncoding::Trit: { + D = val.trit_value; assert(D < 3); switch (bitlen) { case 0: { - uint32_t results[3] = {0, 32, 63}; + u32 results[3] = {0, 32, 63}; result = results[D]; } break; @@ -1003,29 +987,29 @@ static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) { case 2: { C = 23; - uint32_t b = (bitval >> 1) & 1; + u32 b = (bitval >> 1) & 1; B = (b << 6) | (b << 2) | b; } break; case 3: { C = 11; - uint32_t cb = (bitval >> 1) & 3; + u32 cb = (bitval >> 1) & 3; B = (cb << 5) | cb; } break; default: - assert(!"Invalid trit encoding for texel weight"); + assert(false && "Invalid trit encoding for texel weight"); break; } } break; - case eIntegerEncoding_Quint: { - D = val.GetQuintValue(); + case IntegerEncoding::Qus32: { + D = val.qus32_value; assert(D < 5); switch (bitlen) { case 0: { - uint32_t results[5] = {0, 16, 32, 47, 63}; + u32 results[5] = {0, 16, 32, 47, 63}; result = results[D]; } break; @@ -1035,18 +1019,18 @@ static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) { case 2: { C = 13; - uint32_t b = (bitval >> 1) & 1; + u32 b = (bitval >> 1) & 1; B = (b << 6) | (b << 1); } break; default: - assert(!"Invalid quint encoding for texel weight"); + assert(false && "Invalid quint encoding for texel weight"); break; } } break; } - if (val.GetEncoding() != eIntegerEncoding_JustBits && bitlen > 0) { + if (val.encoding != IntegerEncoding::JustBits && bitlen > 0) { // Decode the value... result = D * C + B; result ^= A; @@ -1063,12 +1047,11 @@ static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) { return result; } -static void UnquantizeTexelWeights(uint32_t out[2][144], - const std::vector<IntegerEncodedValue>& weights, - const TexelWeightParams& params, const uint32_t blockWidth, - const uint32_t blockHeight) { - uint32_t weightIdx = 0; - uint32_t unquantized[2][144]; +static void UnquantizeTexelWeights(u32 out[2][144], const std::vector<IntegerEncodedValue>& weights, + const TexelWeightParams& params, const u32 blockWidth, + const u32 blockHeight) { + u32 weightIdx = 0; + u32 unquantized[2][144]; for (auto itr = weights.begin(); itr != weights.end(); ++itr) { unquantized[0][weightIdx] = UnquantizeTexelWeight(*itr); @@ -1086,34 +1069,34 @@ static void UnquantizeTexelWeights(uint32_t out[2][144], } // Do infill if necessary (Section C.2.18) ... - uint32_t Ds = (1024 + (blockWidth / 2)) / (blockWidth - 1); - uint32_t Dt = (1024 + (blockHeight / 2)) / (blockHeight - 1); + u32 Ds = (1024 + (blockWidth / 2)) / (blockWidth - 1); + u32 Dt = (1024 + (blockHeight / 2)) / (blockHeight - 1); - const uint32_t kPlaneScale = params.m_bDualPlane ? 2U : 1U; - for (uint32_t plane = 0; plane < kPlaneScale; plane++) - for (uint32_t t = 0; t < blockHeight; t++) - for (uint32_t s = 0; s < blockWidth; s++) { - uint32_t cs = Ds * s; - uint32_t ct = Dt * t; + const u32 kPlaneScale = params.m_bDualPlane ? 2U : 1U; + for (u32 plane = 0; plane < kPlaneScale; plane++) + for (u32 t = 0; t < blockHeight; t++) + for (u32 s = 0; s < blockWidth; s++) { + u32 cs = Ds * s; + u32 ct = Dt * t; - uint32_t gs = (cs * (params.m_Width - 1) + 32) >> 6; - uint32_t gt = (ct * (params.m_Height - 1) + 32) >> 6; + u32 gs = (cs * (params.m_Width - 1) + 32) >> 6; + u32 gt = (ct * (params.m_Height - 1) + 32) >> 6; - uint32_t js = gs >> 4; - uint32_t fs = gs & 0xF; + u32 js = gs >> 4; + u32 fs = gs & 0xF; - uint32_t jt = gt >> 4; - uint32_t ft = gt & 0x0F; + u32 jt = gt >> 4; + u32 ft = gt & 0x0F; - uint32_t w11 = (fs * ft + 8) >> 4; - uint32_t w10 = ft - w11; - uint32_t w01 = fs - w11; - uint32_t w00 = 16 - fs - ft + w11; + u32 w11 = (fs * ft + 8) >> 4; + u32 w10 = ft - w11; + u32 w01 = fs - w11; + u32 w00 = 16 - fs - ft + w11; - uint32_t v0 = js + jt * params.m_Width; + u32 v0 = js + jt * params.m_Width; #define FIND_TEXEL(tidx, bidx) \ - uint32_t p##bidx = 0; \ + u32 p##bidx = 0; \ do { \ if ((tidx) < (params.m_Width * params.m_Height)) { \ p##bidx = unquantized[plane][(tidx)]; \ @@ -1133,7 +1116,7 @@ static void UnquantizeTexelWeights(uint32_t out[2][144], } // Transfers a bit as described in C.2.14 -static inline void BitTransferSigned(int32_t& a, int32_t& b) { +static inline void BitTransferSigned(s32& a, s32& b) { b >>= 1; b |= a & 0x80; a >>= 1; @@ -1144,14 +1127,14 @@ static inline void BitTransferSigned(int32_t& a, int32_t& b) { // Adds more precision to the blue channel as described // in C.2.14 -static inline Pixel BlueContract(int32_t a, int32_t r, int32_t g, int32_t b) { - return Pixel(static_cast<int16_t>(a), static_cast<int16_t>((r + b) >> 1), - static_cast<int16_t>((g + b) >> 1), static_cast<int16_t>(b)); +static inline Pixel BlueContract(s32 a, s32 r, s32 g, s32 b) { + return Pixel(static_cast<s16>(a), static_cast<s16>((r + b) >> 1), + static_cast<s16>((g + b) >> 1), static_cast<s16>(b)); } // Partition selection functions as specified in // C.2.21 -static inline uint32_t hash52(uint32_t p) { +static inline u32 hash52(u32 p) { p ^= p >> 15; p -= p << 17; p += p << 7; @@ -1165,8 +1148,7 @@ static inline uint32_t hash52(uint32_t p) { return p; } -static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, - int32_t partitionCount, int32_t smallBlock) { +static u32 SelectPartition(s32 seed, s32 x, s32 y, s32 z, s32 partitionCount, s32 smallBlock) { if (1 == partitionCount) return 0; @@ -1178,34 +1160,34 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, seed += (partitionCount - 1) * 1024; - uint32_t rnum = hash52(static_cast<uint32_t>(seed)); - uint8_t seed1 = static_cast<uint8_t>(rnum & 0xF); - uint8_t seed2 = static_cast<uint8_t>((rnum >> 4) & 0xF); - uint8_t seed3 = static_cast<uint8_t>((rnum >> 8) & 0xF); - uint8_t seed4 = static_cast<uint8_t>((rnum >> 12) & 0xF); - uint8_t seed5 = static_cast<uint8_t>((rnum >> 16) & 0xF); - uint8_t seed6 = static_cast<uint8_t>((rnum >> 20) & 0xF); - uint8_t seed7 = static_cast<uint8_t>((rnum >> 24) & 0xF); - uint8_t seed8 = static_cast<uint8_t>((rnum >> 28) & 0xF); - uint8_t seed9 = static_cast<uint8_t>((rnum >> 18) & 0xF); - uint8_t seed10 = static_cast<uint8_t>((rnum >> 22) & 0xF); - uint8_t seed11 = static_cast<uint8_t>((rnum >> 26) & 0xF); - uint8_t seed12 = static_cast<uint8_t>(((rnum >> 30) | (rnum << 2)) & 0xF); - - seed1 = static_cast<uint8_t>(seed1 * seed1); - seed2 = static_cast<uint8_t>(seed2 * seed2); - seed3 = static_cast<uint8_t>(seed3 * seed3); - seed4 = static_cast<uint8_t>(seed4 * seed4); - seed5 = static_cast<uint8_t>(seed5 * seed5); - seed6 = static_cast<uint8_t>(seed6 * seed6); - seed7 = static_cast<uint8_t>(seed7 * seed7); - seed8 = static_cast<uint8_t>(seed8 * seed8); - seed9 = static_cast<uint8_t>(seed9 * seed9); - seed10 = static_cast<uint8_t>(seed10 * seed10); - seed11 = static_cast<uint8_t>(seed11 * seed11); - seed12 = static_cast<uint8_t>(seed12 * seed12); - - int32_t sh1, sh2, sh3; + u32 rnum = hash52(static_cast<u32>(seed)); + u8 seed1 = static_cast<u8>(rnum & 0xF); + u8 seed2 = static_cast<u8>((rnum >> 4) & 0xF); + u8 seed3 = static_cast<u8>((rnum >> 8) & 0xF); + u8 seed4 = static_cast<u8>((rnum >> 12) & 0xF); + u8 seed5 = static_cast<u8>((rnum >> 16) & 0xF); + u8 seed6 = static_cast<u8>((rnum >> 20) & 0xF); + u8 seed7 = static_cast<u8>((rnum >> 24) & 0xF); + u8 seed8 = static_cast<u8>((rnum >> 28) & 0xF); + u8 seed9 = static_cast<u8>((rnum >> 18) & 0xF); + u8 seed10 = static_cast<u8>((rnum >> 22) & 0xF); + u8 seed11 = static_cast<u8>((rnum >> 26) & 0xF); + u8 seed12 = static_cast<u8>(((rnum >> 30) | (rnum << 2)) & 0xF); + + seed1 = static_cast<u8>(seed1 * seed1); + seed2 = static_cast<u8>(seed2 * seed2); + seed3 = static_cast<u8>(seed3 * seed3); + seed4 = static_cast<u8>(seed4 * seed4); + seed5 = static_cast<u8>(seed5 * seed5); + seed6 = static_cast<u8>(seed6 * seed6); + seed7 = static_cast<u8>(seed7 * seed7); + seed8 = static_cast<u8>(seed8 * seed8); + seed9 = static_cast<u8>(seed9 * seed9); + seed10 = static_cast<u8>(seed10 * seed10); + seed11 = static_cast<u8>(seed11 * seed11); + seed12 = static_cast<u8>(seed12 * seed12); + + s32 sh1, sh2, sh3; if (seed & 1) { sh1 = (seed & 2) ? 4 : 5; sh2 = (partitionCount == 3) ? 6 : 5; @@ -1215,23 +1197,23 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, } sh3 = (seed & 0x10) ? sh1 : sh2; - seed1 = static_cast<uint8_t>(seed1 >> sh1); - seed2 = static_cast<uint8_t>(seed2 >> sh2); - seed3 = static_cast<uint8_t>(seed3 >> sh1); - seed4 = static_cast<uint8_t>(seed4 >> sh2); - seed5 = static_cast<uint8_t>(seed5 >> sh1); - seed6 = static_cast<uint8_t>(seed6 >> sh2); - seed7 = static_cast<uint8_t>(seed7 >> sh1); - seed8 = static_cast<uint8_t>(seed8 >> sh2); - seed9 = static_cast<uint8_t>(seed9 >> sh3); - seed10 = static_cast<uint8_t>(seed10 >> sh3); - seed11 = static_cast<uint8_t>(seed11 >> sh3); - seed12 = static_cast<uint8_t>(seed12 >> sh3); - - int32_t a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); - int32_t b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); - int32_t c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); - int32_t d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); + seed1 = static_cast<u8>(seed1 >> sh1); + seed2 = static_cast<u8>(seed2 >> sh2); + seed3 = static_cast<u8>(seed3 >> sh1); + seed4 = static_cast<u8>(seed4 >> sh2); + seed5 = static_cast<u8>(seed5 >> sh1); + seed6 = static_cast<u8>(seed6 >> sh2); + seed7 = static_cast<u8>(seed7 >> sh1); + seed8 = static_cast<u8>(seed8 >> sh2); + seed9 = static_cast<u8>(seed9 >> sh3); + seed10 = static_cast<u8>(seed10 >> sh3); + seed11 = static_cast<u8>(seed11 >> sh3); + seed12 = static_cast<u8>(seed12 >> sh3); + + s32 a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); + s32 b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); + s32 c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); + s32 d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); a &= 0x3F; b &= 0x3F; @@ -1252,27 +1234,26 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, return 3; } -static inline uint32_t Select2DPartition(int32_t seed, int32_t x, int32_t y, int32_t partitionCount, - int32_t smallBlock) { +static inline u32 Select2DPartition(s32 seed, s32 x, s32 y, s32 partitionCount, s32 smallBlock) { return SelectPartition(seed, x, y, 0, partitionCount, smallBlock); } // Section C.2.14 -static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValues, - uint32_t colorEndpointMode) { +static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues, + u32 colorEndpos32Mode) { #define READ_UINT_VALUES(N) \ - uint32_t v[N]; \ - for (uint32_t i = 0; i < N; i++) { \ + u32 v[N]; \ + for (u32 i = 0; i < N; i++) { \ v[i] = *(colorValues++); \ } #define READ_INT_VALUES(N) \ - int32_t v[N]; \ - for (uint32_t i = 0; i < N; i++) { \ - v[i] = static_cast<int32_t>(*(colorValues++)); \ + s32 v[N]; \ + for (u32 i = 0; i < N; i++) { \ + v[i] = static_cast<s32>(*(colorValues++)); \ } - switch (colorEndpointMode) { + switch (colorEndpos32Mode) { case 0: { READ_UINT_VALUES(2) ep1 = Pixel(0xFF, v[0], v[0], v[0]); @@ -1281,8 +1262,8 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue case 1: { READ_UINT_VALUES(2) - uint32_t L0 = (v[0] >> 2) | (v[1] & 0xC0); - uint32_t L1 = std::max(L0 + (v[1] & 0x3F), 0xFFU); + u32 L0 = (v[0] >> 2) | (v[1] & 0xC0); + u32 L1 = std::max(L0 + (v[1] & 0x3F), 0xFFU); ep1 = Pixel(0xFF, L0, L0, L0); ep2 = Pixel(0xFF, L1, L1, L1); } break; @@ -1371,7 +1352,7 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue } break; default: - assert(!"Unsupported color endpoint mode (is it HDR?)"); + assert(false && "Unsupported color endpoint mode (is it HDR?)"); break; } @@ -1379,14 +1360,14 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue #undef READ_INT_VALUES } -static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, - const uint32_t blockHeight, uint32_t* outBuf) { +static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 blockHeight, + u32* outBuf) { InputBitStream strm(inBuf); TexelWeightParams weightParams = DecodeBlockInfo(strm); // Was there an error? if (weightParams.m_bError) { - assert(!"Invalid block mode"); + assert(false && "Invalid block mode"); FillError(outBuf, blockWidth, blockHeight); return; } @@ -1397,63 +1378,63 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, } if (weightParams.m_bVoidExtentHDR) { - assert(!"HDR void extent blocks are unsupported!"); + assert(false && "HDR void extent blocks are unsupported!"); FillError(outBuf, blockWidth, blockHeight); return; } if (weightParams.m_Width > blockWidth) { - assert(!"Texel weight grid width should be smaller than block width"); + assert(false && "Texel weight grid width should be smaller than block width"); FillError(outBuf, blockWidth, blockHeight); return; } if (weightParams.m_Height > blockHeight) { - assert(!"Texel weight grid height should be smaller than block height"); + assert(false && "Texel weight grid height should be smaller than block height"); FillError(outBuf, blockWidth, blockHeight); return; } // Read num partitions - uint32_t nPartitions = strm.ReadBits(2) + 1; + u32 nPartitions = strm.ReadBits<2>() + 1; assert(nPartitions <= 4); if (nPartitions == 4 && weightParams.m_bDualPlane) { - assert(!"Dual plane mode is incompatible with four partition blocks"); + assert(false && "Dual plane mode is incompatible with four partition blocks"); FillError(outBuf, blockWidth, blockHeight); return; } - // Based on the number of partitions, read the color endpoint mode for + // Based on the number of partitions, read the color endpos32 mode for // each partition. - // Determine partitions, partition index, and color endpoint modes - int32_t planeIdx = -1; - uint32_t partitionIndex; - uint32_t colorEndpointMode[4] = {0, 0, 0, 0}; + // Determine partitions, partition index, and color endpos32 modes + s32 planeIdx = -1; + u32 partitionIndex; + u32 colorEndpos32Mode[4] = {0, 0, 0, 0}; // Define color data. - uint8_t colorEndpointData[16]; - memset(colorEndpointData, 0, sizeof(colorEndpointData)); - OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0); + u8 colorEndpos32Data[16]; + memset(colorEndpos32Data, 0, sizeof(colorEndpos32Data)); + OutputBitStream colorEndpos32Stream(colorEndpos32Data, 16 * 8, 0); // Read extra config data... - uint32_t baseCEM = 0; + u32 baseCEM = 0; if (nPartitions == 1) { - colorEndpointMode[0] = strm.ReadBits(4); + colorEndpos32Mode[0] = strm.ReadBits<4>(); partitionIndex = 0; } else { - partitionIndex = strm.ReadBits(10); - baseCEM = strm.ReadBits(6); + partitionIndex = strm.ReadBits<10>(); + baseCEM = strm.ReadBits<6>(); } - uint32_t baseMode = (baseCEM & 3); + u32 baseMode = (baseCEM & 3); - // Remaining bits are color endpoint data... - uint32_t nWeightBits = weightParams.GetPackedBitSize(); - int32_t remainingBits = 128 - nWeightBits - strm.GetBitsRead(); + // Remaining bits are color endpos32 data... + u32 nWeightBits = weightParams.GetPackedBitSize(); + s32 remainingBits = 128 - nWeightBits - static_cast<s32>(strm.GetBitsRead()); // Consider extra bits prior to texel data... - uint32_t extraCEMbits = 0; + u32 extraCEMbits = 0; if (baseMode) { switch (nPartitions) { case 2: @@ -1473,18 +1454,18 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, remainingBits -= extraCEMbits; // Do we have a dual plane situation? - uint32_t planeSelectorBits = 0; + u32 planeSelectorBits = 0; if (weightParams.m_bDualPlane) { planeSelectorBits = 2; } remainingBits -= planeSelectorBits; // Read color data... - uint32_t colorDataBits = remainingBits; + u32 colorDataBits = remainingBits; while (remainingBits > 0) { - uint32_t nb = std::min(remainingBits, 8); - uint32_t b = strm.ReadBits(nb); - colorEndpointStream.WriteBits(b, nb); + u32 nb = std::min(remainingBits, 8); + u32 b = strm.ReadBits(nb); + colorEndpos32Stream.WriteBits(b, nb); remainingBits -= 8; } @@ -1493,64 +1474,64 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, // Read the rest of the CEM if (baseMode) { - uint32_t extraCEM = strm.ReadBits(extraCEMbits); - uint32_t CEM = (extraCEM << 6) | baseCEM; + u32 extraCEM = strm.ReadBits(extraCEMbits); + u32 CEM = (extraCEM << 6) | baseCEM; CEM >>= 2; bool C[4] = {0}; - for (uint32_t i = 0; i < nPartitions; i++) { + for (u32 i = 0; i < nPartitions; i++) { C[i] = CEM & 1; CEM >>= 1; } - uint8_t M[4] = {0}; - for (uint32_t i = 0; i < nPartitions; i++) { + u8 M[4] = {0}; + for (u32 i = 0; i < nPartitions; i++) { M[i] = CEM & 3; CEM >>= 2; assert(M[i] <= 3); } - for (uint32_t i = 0; i < nPartitions; i++) { - colorEndpointMode[i] = baseMode; + for (u32 i = 0; i < nPartitions; i++) { + colorEndpos32Mode[i] = baseMode; if (!(C[i])) - colorEndpointMode[i] -= 1; - colorEndpointMode[i] <<= 2; - colorEndpointMode[i] |= M[i]; + colorEndpos32Mode[i] -= 1; + colorEndpos32Mode[i] <<= 2; + colorEndpos32Mode[i] |= M[i]; } } else if (nPartitions > 1) { - uint32_t CEM = baseCEM >> 2; - for (uint32_t i = 0; i < nPartitions; i++) { - colorEndpointMode[i] = CEM; + u32 CEM = baseCEM >> 2; + for (u32 i = 0; i < nPartitions; i++) { + colorEndpos32Mode[i] = CEM; } } // Make sure everything up till here is sane. - for (uint32_t i = 0; i < nPartitions; i++) { - assert(colorEndpointMode[i] < 16); + for (u32 i = 0; i < nPartitions; i++) { + assert(colorEndpos32Mode[i] < 16); } assert(strm.GetBitsRead() + weightParams.GetPackedBitSize() == 128); // Decode both color data and texel weight data - uint32_t colorValues[32]; // Four values, two endpoints, four maximum paritions - DecodeColorValues(colorValues, colorEndpointData, colorEndpointMode, nPartitions, + u32 colorValues[32]; // Four values, two endpos32s, four maximum paritions + DecodeColorValues(colorValues, colorEndpos32Data, colorEndpos32Mode, nPartitions, colorDataBits); - Pixel endpoints[4][2]; - const uint32_t* colorValuesPtr = colorValues; - for (uint32_t i = 0; i < nPartitions; i++) { - ComputeEndpoints(endpoints[i][0], endpoints[i][1], colorValuesPtr, colorEndpointMode[i]); + Pixel endpos32s[4][2]; + const u32* colorValuesPtr = colorValues; + for (u32 i = 0; i < nPartitions; i++) { + ComputeEndpos32s(endpos32s[i][0], endpos32s[i][1], colorValuesPtr, colorEndpos32Mode[i]); } // Read the texel weight data.. - uint8_t texelWeightData[16]; + u8 texelWeightData[16]; memcpy(texelWeightData, inBuf, sizeof(texelWeightData)); // Reverse everything - for (uint32_t i = 0; i < 8; i++) { + for (u32 i = 0; i < 8; i++) { // Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits #define REVERSE_BYTE(b) (((b)*0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32 - unsigned char a = static_cast<unsigned char>(REVERSE_BYTE(texelWeightData[i])); - unsigned char b = static_cast<unsigned char>(REVERSE_BYTE(texelWeightData[15 - i])); + u8 a = static_cast<u8>(REVERSE_BYTE(texelWeightData[i])); + u8 b = static_cast<u8>(REVERSE_BYTE(texelWeightData[15 - i])); #undef REVERSE_BYTE texelWeightData[i] = b; @@ -1558,50 +1539,51 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, } // Make sure that higher non-texel bits are set to zero - const uint32_t clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; + const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; texelWeightData[clearByteStart - 1] = texelWeightData[clearByteStart - 1] & - static_cast<uint8_t>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); + static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); std::vector<IntegerEncodedValue> texelWeightValues; + texelWeightValues.reserve(64); + InputBitStream weightStream(texelWeightData); - IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream, - weightParams.m_MaxWeight, - weightParams.GetNumWeightValues()); + DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight, + weightParams.GetNumWeightValues()); // Blocks can be at most 12x12, so we can have as many as 144 weights - uint32_t weights[2][144]; + u32 weights[2][144]; UnquantizeTexelWeights(weights, texelWeightValues, weightParams, blockWidth, blockHeight); - // Now that we have endpoints and weights, we can interpolate and generate + // Now that we have endpos32s and weights, we can s32erpolate and generate // the proper decoding... - for (uint32_t j = 0; j < blockHeight; j++) - for (uint32_t i = 0; i < blockWidth; i++) { - uint32_t partition = Select2DPartition(partitionIndex, i, j, nPartitions, - (blockHeight * blockWidth) < 32); + for (u32 j = 0; j < blockHeight; j++) + for (u32 i = 0; i < blockWidth; i++) { + u32 partition = Select2DPartition(partitionIndex, i, j, nPartitions, + (blockHeight * blockWidth) < 32); assert(partition < nPartitions); Pixel p; - for (uint32_t c = 0; c < 4; c++) { - uint32_t C0 = endpoints[partition][0].Component(c); + for (u32 c = 0; c < 4; c++) { + u32 C0 = endpos32s[partition][0].Component(c); C0 = Replicate(C0, 8, 16); - uint32_t C1 = endpoints[partition][1].Component(c); + u32 C1 = endpos32s[partition][1].Component(c); C1 = Replicate(C1, 8, 16); - uint32_t plane = 0; + u32 plane = 0; if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) { plane = 1; } - uint32_t weight = weights[plane][j * blockWidth + i]; - uint32_t C = (C0 * (64 - weight) + C1 * weight + 32) / 64; + u32 weight = weights[plane][j * blockWidth + i]; + u32 C = (C0 * (64 - weight) + C1 * weight + 32) / 64; if (C == 65535) { p.Component(c) = 255; } else { double Cf = static_cast<double>(C); - p.Component(c) = static_cast<uint16_t>(255.0 * (Cf / 65536.0) + 0.5); + p.Component(c) = static_cast<u16>(255.0 * (Cf / 65536.0) + 0.5); } } @@ -1613,26 +1595,26 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, namespace Tegra::Texture::ASTC { -std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, - uint32_t depth, uint32_t block_width, uint32_t block_height) { - uint32_t blockIdx = 0; +std::vector<u8> Decompress(const u8* data, u32 width, u32 height, u32 depth, u32 block_width, + u32 block_height) { + u32 blockIdx = 0; std::size_t depth_offset = 0; - std::vector<uint8_t> outData(height * width * depth * 4); - for (uint32_t k = 0; k < depth; k++) { - for (uint32_t j = 0; j < height; j += block_height) { - for (uint32_t i = 0; i < width; i += block_width) { + std::vector<u8> outData(height * width * depth * 4); + for (u32 k = 0; k < depth; k++) { + for (u32 j = 0; j < height; j += block_height) { + for (u32 i = 0; i < width; i += block_width) { - const uint8_t* blockPtr = data + blockIdx * 16; + const u8* blockPtr = data + blockIdx * 16; // Blocks can be at most 12x12 - uint32_t uncompData[144]; + u32 uncompData[144]; ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); - uint32_t decompWidth = std::min(block_width, width - i); - uint32_t decompHeight = std::min(block_height, height - j); + u32 decompWidth = std::min(block_width, width - i); + u32 decompHeight = std::min(block_height, height - j); - uint8_t* outRow = depth_offset + outData.data() + (j * width + i) * 4; - for (uint32_t jj = 0; jj < decompHeight; jj++) { + u8* outRow = depth_offset + outData.data() + (j * width + i) * 4; + for (u32 jj = 0; jj < decompHeight; jj++) { memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); } diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index c38860628..3b9ab38dd 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -539,7 +539,7 @@ void Config::ReadDebuggingValues() { void Config::ReadServiceValues() { qt_config->beginGroup(QStringLiteral("Services")); Settings::values.bcat_backend = - ReadSetting(QStringLiteral("bcat_backend"), QStringLiteral("boxcat")) + ReadSetting(QStringLiteral("bcat_backend"), QStringLiteral("null")) .toString() .toStdString(); Settings::values.bcat_boxcat_local = @@ -682,6 +682,8 @@ void Config::ReadSystemValues() { Settings::values.language_index = ReadSetting(QStringLiteral("language_index"), 1).toInt(); + Settings::values.region_index = ReadSetting(QStringLiteral("region_index"), 1).toInt(); + const auto rng_seed_enabled = ReadSetting(QStringLiteral("rng_seed_enabled"), false).toBool(); if (rng_seed_enabled) { Settings::values.rng_seed = ReadSetting(QStringLiteral("rng_seed"), 0).toULongLong(); @@ -698,6 +700,8 @@ void Config::ReadSystemValues() { Settings::values.custom_rtc = std::nullopt; } + Settings::values.sound_index = ReadSetting(QStringLiteral("sound_index"), 1).toInt(); + qt_config->endGroup(); } @@ -1114,6 +1118,7 @@ void Config::SaveSystemValues() { WriteSetting(QStringLiteral("use_docked_mode"), Settings::values.use_docked_mode, false); WriteSetting(QStringLiteral("current_user"), Settings::values.current_user, 0); WriteSetting(QStringLiteral("language_index"), Settings::values.language_index, 1); + WriteSetting(QStringLiteral("region_index"), Settings::values.region_index, 1); WriteSetting(QStringLiteral("rng_seed_enabled"), Settings::values.rng_seed.has_value(), false); WriteSetting(QStringLiteral("rng_seed"), Settings::values.rng_seed.value_or(0), 0); @@ -1125,6 +1130,8 @@ void Config::SaveSystemValues() { Settings::values.custom_rtc.value_or(std::chrono::seconds{}).count()), 0); + WriteSetting(QStringLiteral("sound_index"), Settings::values.sound_index, 1); + qt_config->endGroup(); } diff --git a/src/yuzu/configuration/configure_system.cpp b/src/yuzu/configuration/configure_system.cpp index e1b52f8d9..f49cd4c8f 100644 --- a/src/yuzu/configuration/configure_system.cpp +++ b/src/yuzu/configuration/configure_system.cpp @@ -56,6 +56,8 @@ void ConfigureSystem::SetConfiguration() { enabled = !Core::System::GetInstance().IsPoweredOn(); ui->combo_language->setCurrentIndex(Settings::values.language_index); + ui->combo_region->setCurrentIndex(Settings::values.region_index); + ui->combo_sound->setCurrentIndex(Settings::values.sound_index); ui->rng_seed_checkbox->setChecked(Settings::values.rng_seed.has_value()); ui->rng_seed_edit->setEnabled(Settings::values.rng_seed.has_value()); @@ -81,6 +83,8 @@ void ConfigureSystem::ApplyConfiguration() { } Settings::values.language_index = ui->combo_language->currentIndex(); + Settings::values.region_index = ui->combo_region->currentIndex(); + Settings::values.sound_index = ui->combo_sound->currentIndex(); if (ui->rng_seed_checkbox->isChecked()) { Settings::values.rng_seed = ui->rng_seed_edit->text().toULongLong(nullptr, 16); diff --git a/src/yuzu/configuration/configure_system.h b/src/yuzu/configuration/configure_system.h index 1eab3781d..d8fa2d2cc 100644 --- a/src/yuzu/configuration/configure_system.h +++ b/src/yuzu/configuration/configure_system.h @@ -36,5 +36,6 @@ private: bool enabled = false; int language_index = 0; + int region_index = 0; int sound_index = 0; }; diff --git a/src/yuzu/configuration/configure_system.ui b/src/yuzu/configuration/configure_system.ui index 65745a2f8..4e2c7e76e 100644 --- a/src/yuzu/configuration/configure_system.ui +++ b/src/yuzu/configuration/configure_system.ui @@ -22,14 +22,14 @@ <string>System Settings</string> </property> <layout class="QGridLayout" name="gridLayout"> - <item row="1" column="0"> + <item row="2" column="0"> <widget class="QLabel" name="label_sound"> <property name="text"> <string>Sound output mode</string> </property> </widget> </item> - <item row="2" column="0"> + <item row="3" column="0"> <widget class="QLabel" name="label_console_id"> <property name="text"> <string>Console ID:</string> @@ -128,14 +128,60 @@ </item> </widget> </item> - <item row="4" column="0"> + <item row="1" column="0"> + <widget class="QLabel" name="label_region"> + <property name="text"> + <string>Region:</string> + </property> + </widget> + </item> + <item row="1" column="1"> + <widget class="QComboBox" name="combo_region"> + <item> + <property name="text"> + <string>Japan</string> + </property> + </item> + <item> + <property name="text"> + <string>USA</string> + </property> + </item> + <item> + <property name="text"> + <string>Europe</string> + </property> + </item> + <item> + <property name="text"> + <string>Australia</string> + </property> + </item> + <item> + <property name="text"> + <string>China</string> + </property> + </item> + <item> + <property name="text"> + <string>Korea</string> + </property> + </item> + <item> + <property name="text"> + <string>Taiwan</string> + </property> + </item> + </widget> + </item> + <item row="5" column="0"> <widget class="QCheckBox" name="rng_seed_checkbox"> <property name="text"> <string>RNG Seed</string> </property> </widget> </item> - <item row="1" column="1"> + <item row="2" column="1"> <widget class="QComboBox" name="combo_sound"> <item> <property name="text"> @@ -161,7 +207,7 @@ </property> </widget> </item> - <item row="2" column="1"> + <item row="3" column="1"> <widget class="QPushButton" name="button_regenerate_console_id"> <property name="sizePolicy"> <sizepolicy hsizetype="Fixed" vsizetype="Fixed"> @@ -177,14 +223,14 @@ </property> </widget> </item> - <item row="3" column="0"> + <item row="4" column="0"> <widget class="QCheckBox" name="custom_rtc_checkbox"> <property name="text"> <string>Custom RTC</string> </property> </widget> </item> - <item row="3" column="1"> + <item row="4" column="1"> <widget class="QDateTimeEdit" name="custom_rtc_edit"> <property name="minimumDate"> <date> @@ -198,7 +244,7 @@ </property> </widget> </item> - <item row="4" column="1"> + <item row="5" column="1"> <widget class="QLineEdit" name="rng_seed_edit"> <property name="sizePolicy"> <sizepolicy hsizetype="Minimum" vsizetype="Fixed"> diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 907abaa51..f4cd905c9 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -452,7 +452,7 @@ void Config::ReadValues() { Settings::values.yuzu_token = sdl2_config->Get("WebService", "yuzu_token", ""); // Services - Settings::values.bcat_backend = sdl2_config->Get("Services", "bcat_backend", "boxcat"); + Settings::values.bcat_backend = sdl2_config->Get("Services", "bcat_backend", "null"); Settings::values.bcat_boxcat_local = sdl2_config->GetBoolean("Services", "bcat_boxcat_local", false); } |