diff options
-rw-r--r-- | src/core/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/core/hle/kernel/thread.cpp | 7 | ||||
-rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp | 32 | ||||
-rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_nvdec.h | 38 | ||||
-rw-r--r-- | src/core/hle/service/nvdrv/nvdrv.cpp | 2 | ||||
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 39 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 6 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 7 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 65 | ||||
-rw-r--r-- | src/video_core/textures/decoders.cpp | 6 |
10 files changed, 188 insertions, 16 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 7ab60c5bc..aff1d2180 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -175,6 +175,8 @@ add_library(core STATIC hle/service/nvdrv/devices/nvhost_ctrl_gpu.h hle/service/nvdrv/devices/nvhost_gpu.cpp hle/service/nvdrv/devices/nvhost_gpu.h + hle/service/nvdrv/devices/nvhost_nvdec.cpp + hle/service/nvdrv/devices/nvhost_nvdec.h hle/service/nvdrv/devices/nvmap.cpp hle/service/nvdrv/devices/nvmap.h hle/service/nvdrv/interface.cpp diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 0075e4a0f..cffa7ca83 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -133,8 +133,11 @@ static void ThreadWakeupCallback(u64 thread_handle, int cycles_late) { auto lock_owner = thread->lock_owner; // Threads waking up by timeout from WaitProcessWideKey do not perform priority inheritance - // and don't have a lock owner. - ASSERT(lock_owner == nullptr); + // and don't have a lock owner unless SignalProcessWideKey was called first and the thread + // wasn't awakened due to the mutex already being acquired. + if (lock_owner) { + lock_owner->RemoveMutexWaiter(thread); + } } if (resume) diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp new file mode 100644 index 000000000..0b6c22898 --- /dev/null +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp @@ -0,0 +1,32 @@ +// Copyright 2018 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/logging/log.h" +#include "core/hle/service/nvdrv/devices/nvhost_nvdec.h" + +namespace Service::Nvidia::Devices { + +u32 nvhost_nvdec::ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) { + NGLOG_DEBUG(Service_NVDRV, "called, command=0x{:08X}, input_size=0x{:X}, output_size=0x{:X}", + command.raw, input.size(), output.size()); + + switch (static_cast<IoctlCommand>(command.raw)) { + case IoctlCommand::IocSetNVMAPfdCommand: + return SetNVMAPfd(input, output); + } + + UNIMPLEMENTED_MSG("Unimplemented ioctl"); + return 0; +} + +u32 nvhost_nvdec::SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output) { + IoctlSetNvmapFD params{}; + std::memcpy(¶ms, input.data(), input.size()); + NGLOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd); + nvmap_fd = params.nvmap_fd; + return 0; +} + +} // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h new file mode 100644 index 000000000..0192aecdd --- /dev/null +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h @@ -0,0 +1,38 @@ +// Copyright 2018 yuzu emulator team +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <cstdlib> +#include <cstring> +#include <vector> +#include "common/common_types.h" +#include "core/hle/service/nvdrv/devices/nvdevice.h" + +namespace Service::Nvidia::Devices { + +class nvhost_nvdec final : public nvdevice { +public: + nvhost_nvdec() = default; + ~nvhost_nvdec() override = default; + + u32 ioctl(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override; + +private: + enum class IoctlCommand : u32_le { + IocSetNVMAPfdCommand = 0x40044801, + }; + + struct IoctlSetNvmapFD { + u32_le nvmap_fd; + }; + static_assert(sizeof(IoctlSetNvmapFD) == 4, "IoctlSetNvmapFD is incorrect size"); + + u32_le nvmap_fd{}; + + u32 SetNVMAPfd(const std::vector<u8>& input, std::vector<u8>& output); +}; + +} // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index a6a4ab7d3..cc5cfe34e 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -9,6 +9,7 @@ #include "core/hle/service/nvdrv/devices/nvhost_ctrl.h" #include "core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h" #include "core/hle/service/nvdrv/devices/nvhost_gpu.h" +#include "core/hle/service/nvdrv/devices/nvhost_nvdec.h" #include "core/hle/service/nvdrv/devices/nvmap.h" #include "core/hle/service/nvdrv/interface.h" #include "core/hle/service/nvdrv/nvdrv.h" @@ -36,6 +37,7 @@ Module::Module() { devices["/dev/nvmap"] = nvmap_dev; devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(nvmap_dev); devices["/dev/nvhost-ctrl"] = std::make_shared<Devices::nvhost_ctrl>(); + devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(); } u32 Module::Open(std::string device_name) { diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index f32a17057..da64430e9 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -261,6 +261,33 @@ union Instruction { BitField<50, 1, u64> saturate_a; } conversion; + union { + BitField<31, 4, u64> component_mask; + + bool IsComponentEnabled(size_t component) const { + return ((1 << component) & component_mask) != 0; + } + } tex; + + union { + BitField<50, 3, u64> component_mask_selector; + BitField<28, 8, Register> gpr28; + + bool HasTwoDestinations() const { + return gpr28.Value() != Register::ZeroIndex; + } + + bool IsComponentEnabled(size_t component) const { + static constexpr std::array<size_t, 5> one_dest_mask{0x1, 0x2, 0x4, 0x8, 0x3}; + static constexpr std::array<size_t, 5> two_dest_mask{0x7, 0xb, 0xd, 0xe, 0xf}; + const auto& mask{HasTwoDestinations() ? two_dest_mask : one_dest_mask}; + + ASSERT(component_mask_selector < mask.size()); + + return ((1 << component) & mask[component_mask_selector]) != 0; + } + } texs; + BitField<61, 1, u64> is_b_imm; BitField<60, 1, u64> is_b_gpr; BitField<59, 1, u64> is_c_gpr; @@ -281,6 +308,7 @@ public: KIL, LD_A, ST_A, + TEX, TEXQ, // Texture Query TEXS, // Texture Fetch with scalar/non-vec4 source/destinations TLDS, // Texture Load with scalar/non-vec4 source/destinations @@ -297,8 +325,10 @@ public: FMUL_R, FMUL_IMM, FMUL32_IMM, - MUFU, // Multi-Function Operator - RRO, // Range Reduction Operator + MUFU, // Multi-Function Operator + RRO_C, // Range Reduction Operator + RRO_R, + RRO_IMM, F2F_C, F2F_R, F2F_IMM, @@ -442,6 +472,7 @@ private: INST("111000110011----", Id::KIL, Type::Flow, "KIL"), INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), + INST("1100000000111---", Id::TEX, Type::Memory, "TEX"), INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"), INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"), INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"), @@ -459,7 +490,9 @@ private: INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"), INST("00011110--------", Id::FMUL32_IMM, Type::Arithmetic, "FMUL32_IMM"), INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), - INST("0101110010010---", Id::RRO, Type::Arithmetic, "RRO"), + INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), + INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), + INST("0011100-10010---", Id::RRO_IMM, Type::Arithmetic, "RRO_IMM"), INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"), INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"), INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"), diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 10a460a82..65d643447 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -53,6 +53,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45 + {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN1 }}; static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { @@ -113,7 +114,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra: MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>, MortonCopy<true, PixelFormat::R8>, MortonCopy<true, PixelFormat::RGBA16F>, MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>, - MortonCopy<true, PixelFormat::DXT45>, + MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>, }; static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr, @@ -126,7 +127,8 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra: MortonCopy<false, PixelFormat::A1B5G5R5>, MortonCopy<false, PixelFormat::R8>, MortonCopy<false, PixelFormat::RGBA16F>, - // TODO(Subv): Swizzling the DXT1/DXT23/DXT45 formats is not yet supported + // TODO(Subv): Swizzling the DXT1/DXT23/DXT45/DXN1 formats is not yet supported + nullptr, nullptr, nullptr, nullptr, diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 07461017d..6f08678ab 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -63,6 +63,7 @@ struct SurfaceParams { DXT1 = 6, DXT23 = 7, DXT45 = 8, + DXN1 = 9, // This is also known as BC4 Max, Invalid = 255, @@ -107,6 +108,7 @@ struct SurfaceParams { 4, // DXT1 4, // DXT23 4, // DXT45 + 4, // DXN1 }}; ASSERT(static_cast<size_t>(format) < compression_factor_table.size()); @@ -130,6 +132,7 @@ struct SurfaceParams { 64, // DXT1 128, // DXT23 128, // DXT45 + 64, // DXN1 }}; ASSERT(static_cast<size_t>(format) < bpp_table.size()); @@ -185,6 +188,8 @@ struct SurfaceParams { return PixelFormat::DXT23; case Tegra::Texture::TextureFormat::DXT45: return PixelFormat::DXT45; + case Tegra::Texture::TextureFormat::DXN1: + return PixelFormat::DXN1; default: NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format)); UNREACHABLE(); @@ -212,6 +217,8 @@ struct SurfaceParams { return Tegra::Texture::TextureFormat::DXT23; case PixelFormat::DXT45: return Tegra::Texture::TextureFormat::DXT45; + case PixelFormat::DXN1: + return Tegra::Texture::TextureFormat::DXN1; default: UNREACHABLE(); } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 68efe74b8..bb5209a7e 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -792,8 +792,13 @@ private: 1, 1); break; } - case OpCode::Id::RRO: { - NGLOG_DEBUG(HW_GPU, "Skipping RRO instruction"); + case OpCode::Id::RRO_C: + case OpCode::Id::RRO_R: + case OpCode::Id::RRO_IMM: { + // Currently RRO is only implemented as a register move. + // Usage of `abs_b` and `negate_b` here should also be correct. + regs.SetRegisterToFloat(instr.gpr0, 0, op_b, 1, 1); + NGLOG_WARNING(HW_GPU, "RRO instruction is incomplete"); break; } default: { @@ -891,10 +896,10 @@ private: instr.gpr0); break; } - case OpCode::Id::TEXS: { + case OpCode::Id::TEX: { ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested"); const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); - const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20); + const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1); const std::string sampler = GetSampler(instr.sampler); const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");"; // Add an extra scope and declare the texture coords inside to prevent overwriting @@ -903,8 +908,52 @@ private: ++shader.scope; shader.AddLine(coord); const std::string texture = "texture(" + sampler + ", coords)"; - for (unsigned elem = 0; elem < instr.attribute.fmt20.size; ++elem) { - regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, elem); + + size_t dest_elem{}; + for (size_t elem = 0; elem < instr.attribute.fmt20.size; ++elem) { + if (!instr.tex.IsComponentEnabled(elem)) { + // Skip disabled components + continue; + } + regs.SetRegisterToFloat(instr.gpr0, elem, texture, 1, 4, false, dest_elem); + ++dest_elem; + } + --shader.scope; + shader.AddLine("}"); + break; + } + case OpCode::Id::TEXS: { + ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested"); + const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); + const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20); + const std::string sampler = GetSampler(instr.sampler); + const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");"; + // Add an extra scope and declare the texture coords inside to prevent + // overwriting them in case they are used as outputs of the texs instruction. + shader.AddLine("{"); + ++shader.scope; + shader.AddLine(coord); + const std::string texture = "texture(" + sampler + ", coords)"; + + // TEXS has two destination registers. RG goes into gpr0+0 and gpr0+1, and BA goes + // into gpr28+0 and gpr28+1 + size_t offset{}; + + for (const auto& dest : {instr.gpr0.Value(), instr.gpr28.Value()}) { + for (unsigned elem = 0; elem < 2; ++elem) { + if (!instr.texs.IsComponentEnabled(elem)) { + // Skip disabled components + continue; + } + regs.SetRegisterToFloat(dest, elem + offset, texture, 1, 4, false, elem); + } + + if (!instr.texs.HasTwoDestinations()) { + // Skip the second destination + break; + } + + offset += 2; } --shader.scope; shader.AddLine("}"); @@ -961,8 +1010,8 @@ private: '(' + predicate + ") " + combiner + " (" + second_pred + ')'); if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { - // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if - // enabled + // Set the secondary predicate to the result of !Predicate OP SecondPredicate, + // if enabled SetPredicate(instr.fsetp.pred0, "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); } diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 0179663e8..2d2af5554 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -46,6 +46,7 @@ void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_ u32 BytesPerPixel(TextureFormat format) { switch (format) { case TextureFormat::DXT1: + case TextureFormat::DXN1: // In this case a 'pixel' actually refers to a 4x4 tile. return 8; case TextureFormat::DXT23: @@ -79,7 +80,9 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, case TextureFormat::DXT1: case TextureFormat::DXT23: case TextureFormat::DXT45: - // In the DXT formats, each 4x4 tile is swizzled instead of just individual pixel values. + case TextureFormat::DXN1: + // In the DXT and DXN formats, each 4x4 tile is swizzled instead of just individual pixel + // values. CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data, unswizzled_data.data(), true, block_height); break; @@ -109,6 +112,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat case TextureFormat::DXT1: case TextureFormat::DXT23: case TextureFormat::DXT45: + case TextureFormat::DXN1: case TextureFormat::A8R8G8B8: case TextureFormat::A2B10G10R10: case TextureFormat::A1B5G5R5: |