diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | 7 | ||||
-rw-r--r-- | src/video_core/engines/fermi_2d.h | 14 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 24 | ||||
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 36 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 35 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 9 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 82 | ||||
-rw-r--r-- | src/video_core/textures/texture.h | 17 |
8 files changed, 203 insertions, 21 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index 7555bbe7d..8d194e175 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp @@ -167,10 +167,11 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou auto& system_instance = Core::System::GetInstance(); // Remove this memory region from the rasterizer cache. - system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(params.offset, - itr->second.size); - auto& gpu = system_instance.GPU(); + auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset); + ASSERT(cpu_addr); + system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size); + params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); buffer_mappings.erase(itr->second.offset); diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 81d15c62a..2a6e8bbbb 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -36,9 +36,9 @@ public: RenderTargetFormat format; BitField<0, 1, u32> linear; union { - BitField<0, 4, u32> block_depth; + BitField<0, 4, u32> block_width; BitField<4, 4, u32> block_height; - BitField<8, 4, u32> block_width; + BitField<8, 4, u32> block_depth; }; u32 depth; u32 layer; @@ -53,10 +53,20 @@ public: address_low); } + u32 BlockWidth() const { + // The block width is stored in log2 format. + return 1 << block_width; + } + u32 BlockHeight() const { // The block height is stored in log2 format. return 1 << block_height; } + + u32 BlockDepth() const { + // The block depth is stored in log2 format. + return 1 << block_depth; + } }; static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size"); diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 20e1884da..c8d1b6478 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -347,6 +347,16 @@ public: DecrWrap = 8, }; + enum class MemoryLayout : u32 { + Linear = 0, + BlockLinear = 1, + }; + + enum class InvMemoryLayout : u32 { + BlockLinear = 0, + Linear = 1, + }; + struct Cull { enum class FrontFace : u32 { ClockWise = 0x0900, @@ -432,7 +442,12 @@ public: u32 width; u32 height; Tegra::RenderTargetFormat format; - u32 block_dimensions; + union { + BitField<0, 3, u32> block_width; + BitField<4, 3, u32> block_height; + BitField<8, 3, u32> block_depth; + BitField<12, 1, InvMemoryLayout> type; + } memory_layout; u32 array_mode; u32 layer_stride; u32 base_layer; @@ -562,7 +577,12 @@ public: u32 address_high; u32 address_low; Tegra::DepthFormat format; - u32 block_dimensions; + union { + BitField<0, 4, u32> block_width; + BitField<4, 4, u32> block_height; + BitField<8, 4, u32> block_depth; + BitField<20, 1, InvMemoryLayout> type; + } memory_layout; u32 layer_stride; GPUVAddr Address() const { diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 550ab1148..9a59b65b3 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -214,6 +214,18 @@ enum class IMinMaxExchange : u64 { XHi = 3, }; +enum class VmadType : u64 { + Size16_Low = 0, + Size16_High = 1, + Size32 = 2, + Invalid = 3, +}; + +enum class VmadShr : u64 { + Shr7 = 1, + Shr15 = 2, +}; + enum class XmadMode : u64 { None = 0, CLo = 1, @@ -452,6 +464,7 @@ union Instruction { BitField<48, 16, u64> opcode; union { + BitField<20, 16, u64> imm20_16; BitField<20, 19, u64> imm20_19; BitField<20, 32, s64> imm20_32; BitField<45, 1, u64> negate_b; @@ -493,6 +506,10 @@ union Instruction { } } lop3; + u16 GetImm20_16() const { + return static_cast<u16>(imm20_16); + } + u32 GetImm20_19() const { u32 imm{static_cast<u32>(imm20_19)}; imm <<= 12; @@ -1017,6 +1034,23 @@ union Instruction { } isberd; union { + BitField<48, 1, u64> signed_a; + BitField<38, 1, u64> is_byte_chunk_a; + BitField<36, 2, VmadType> type_a; + BitField<36, 2, u64> byte_height_a; + + BitField<49, 1, u64> signed_b; + BitField<50, 1, u64> use_register_b; + BitField<30, 1, u64> is_byte_chunk_b; + BitField<28, 2, VmadType> type_b; + BitField<28, 2, u64> byte_height_b; + + BitField<51, 2, VmadShr> shr; + BitField<55, 1, u64> saturate; // Saturates the result (a * b + c) + BitField<47, 1, u64> cc; + } vmad; + + union { BitField<20, 16, u64> imm20_16; BitField<36, 1, u64> product_shift_left; BitField<37, 1, u64> merge_37; @@ -1083,6 +1117,7 @@ public: IPA, OUT_R, // Emit vertex/primitive ISBERD, + VMAD, FFMA_IMM, // Fused Multiply and Add FFMA_CR, FFMA_RC, @@ -1320,6 +1355,7 @@ private: INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), + INST("01011111--------", Id::VMAD, Type::Trivial, "VMAD"), INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"), INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"), INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"), diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 56ff83eff..65a220c41 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -45,7 +45,9 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { SurfaceParams params{}; params.addr = TryGetCpuAddr(config.tic.Address()); params.is_tiled = config.tic.IsTiled(); + params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0, params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, + params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0, params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value()); params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); @@ -97,8 +99,11 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]}; SurfaceParams params{}; params.addr = TryGetCpuAddr(config.Address()); - params.is_tiled = true; - params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; + params.is_tiled = + config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; + params.block_width = 1 << config.memory_layout.block_width; + params.block_height = 1 << config.memory_layout.block_height; + params.block_depth = 1 << config.memory_layout.block_depth; params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); params.component_type = ComponentTypeFromRenderTarget(config.format); params.type = GetFormatType(params.pixel_format); @@ -120,13 +125,16 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { return params; } -/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer(u32 zeta_width, u32 zeta_height, - Tegra::GPUVAddr zeta_address, - Tegra::DepthFormat format) { +/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer( + u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format, + u32 block_width, u32 block_height, u32 block_depth, + Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) { SurfaceParams params{}; params.addr = TryGetCpuAddr(zeta_address); - params.is_tiled = true; - params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; + params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; + params.block_width = 1 << std::min(block_width, 5U); + params.block_height = 1 << std::min(block_height, 5U); + params.block_depth = 1 << std::min(block_depth, 5U); params.pixel_format = PixelFormatFromDepthFormat(format); params.component_type = ComponentTypeFromDepthFormat(format); params.type = GetFormatType(params.pixel_format); @@ -148,7 +156,9 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) { SurfaceParams params{}; params.addr = TryGetCpuAddr(config.Address()); params.is_tiled = !config.linear; - params.block_height = params.is_tiled ? config.BlockHeight() : 0, + params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0, + params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0, + params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0, params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); params.component_type = ComponentTypeFromRenderTarget(config.format); params.type = GetFormatType(params.pixel_format); @@ -818,6 +828,11 @@ void CachedSurface::LoadGLBuffer() { if (params.is_tiled) { gl_buffer.resize(total_size); + ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", + params.block_width, static_cast<u32>(params.target)); + ASSERT_MSG(params.block_depth == 1, "Block depth is defined as {} on texture type {}", + params.block_depth, static_cast<u32>(params.target)); + // TODO(bunnei): This only unswizzles and copies a 2D texture - we do not yet know how to do // this for 3D textures, etc. switch (params.target) { @@ -989,7 +1004,9 @@ Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) { } SurfaceParams depth_params{SurfaceParams::CreateForDepthBuffer( - regs.zeta_width, regs.zeta_height, regs.zeta.Address(), regs.zeta.format)}; + regs.zeta_width, regs.zeta_height, regs.zeta.Address(), regs.zeta.format, + regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, + regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; return GetSurface(depth_params, preserve_contents); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 0b4940b3c..66d98ad4e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -716,9 +716,10 @@ struct SurfaceParams { static SurfaceParams CreateForFramebuffer(std::size_t index); /// Creates SurfaceParams for a depth buffer configuration - static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height, - Tegra::GPUVAddr zeta_address, - Tegra::DepthFormat format); + static SurfaceParams CreateForDepthBuffer( + u32 zeta_width, u32 zeta_height, Tegra::GPUVAddr zeta_address, Tegra::DepthFormat format, + u32 block_width, u32 block_height, u32 block_depth, + Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type); /// Creates SurfaceParams for a Fermi2D surface copy static SurfaceParams CreateForFermiCopySurface( @@ -733,7 +734,9 @@ struct SurfaceParams { VAddr addr; bool is_tiled; + u32 block_width; u32 block_height; + u32 block_depth; PixelFormat pixel_format; ComponentType component_type; SurfaceType type; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index c82a0dcfa..8dfb49507 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -2953,6 +2953,88 @@ private: LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed"); break; } + case OpCode::Id::VMAD: { + const bool signed_a = instr.vmad.signed_a == 1; + const bool signed_b = instr.vmad.signed_b == 1; + const bool result_signed = signed_a || signed_b; + boost::optional<std::string> forced_result; + + auto Unpack = [&](const std::string& op, bool is_chunk, bool is_signed, + Tegra::Shader::VmadType type, u64 byte_height) { + const std::string value = [&]() { + if (!is_chunk) { + const auto offset = static_cast<u32>(byte_height * 8); + return "((" + op + " >> " + std::to_string(offset) + ") & 0xff)"; + } + const std::string zero = "0"; + + switch (type) { + case Tegra::Shader::VmadType::Size16_Low: + return '(' + op + " & 0xffff)"; + case Tegra::Shader::VmadType::Size16_High: + return '(' + op + " >> 16)"; + case Tegra::Shader::VmadType::Size32: + // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when + // this type is used (1 * 1 + 0 == 0x5b800000). Until a better + // explanation is found: assert. + UNREACHABLE_MSG("Unimplemented"); + return zero; + case Tegra::Shader::VmadType::Invalid: + // Note(Rodrigo): This flag is invalid according to nvdisasm. From my + // testing (even though it's invalid) this makes the whole instruction + // assign zero to target register. + forced_result = boost::make_optional(zero); + return zero; + default: + UNREACHABLE(); + return zero; + } + }(); + + if (is_signed) { + return "int(" + value + ')'; + } + return value; + }; + + const std::string op_a = Unpack(regs.GetRegisterAsInteger(instr.gpr8, 0, false), + instr.vmad.is_byte_chunk_a != 0, signed_a, + instr.vmad.type_a, instr.vmad.byte_height_a); + + std::string op_b; + if (instr.vmad.use_register_b) { + op_b = Unpack(regs.GetRegisterAsInteger(instr.gpr20, 0, false), + instr.vmad.is_byte_chunk_b != 0, signed_b, instr.vmad.type_b, + instr.vmad.byte_height_b); + } else { + op_b = '(' + + std::to_string(signed_b ? static_cast<s16>(instr.alu.GetImm20_16()) + : instr.alu.GetImm20_16()) + + ')'; + } + + const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39, 0, result_signed); + + std::string result; + if (forced_result) { + result = *forced_result; + } else { + result = '(' + op_a + " * " + op_b + " + " + op_c + ')'; + + switch (instr.vmad.shr) { + case Tegra::Shader::VmadShr::Shr7: + result = '(' + result + " >> 7)"; + break; + case Tegra::Shader::VmadShr::Shr15: + result = '(' + result + " >> 15)"; + break; + } + } + regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1, + instr.vmad.saturate == 1, 0, Register::Size::Word, + instr.vmad.cc); + break; + } default: { LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName()); UNREACHABLE(); diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 8f31d825a..58d17abcb 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -161,7 +161,9 @@ struct TICEntry { BitField<21, 3, TICHeaderVersion> header_version; }; union { + BitField<0, 3, u32> block_width; BitField<3, 3, u32> block_height; + BitField<6, 3, u32> block_depth; // High 16 bits of the pitch value BitField<0, 16, u32> pitch_high; @@ -202,13 +204,24 @@ struct TICEntry { return depth_minus_1 + 1; } + u32 BlockWidth() const { + ASSERT(IsTiled()); + // The block height is stored in log2 format. + return 1 << block_width; + } + u32 BlockHeight() const { - ASSERT(header_version == TICHeaderVersion::BlockLinear || - header_version == TICHeaderVersion::BlockLinearColorKey); + ASSERT(IsTiled()); // The block height is stored in log2 format. return 1 << block_height; } + u32 BlockDepth() const { + ASSERT(IsTiled()); + // The block height is stored in log2 format. + return 1 << block_depth; + } + bool IsTiled() const { return header_version == TICHeaderVersion::BlockLinear || header_version == TICHeaderVersion::BlockLinearColorKey; |