summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/core/hle/kernel/errors.h2
-rw-r--r--src/core/hle/kernel/svc.cpp91
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp7
-rw-r--r--src/video_core/engines/shader_bytecode.h36
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp82
5 files changed, 203 insertions, 15 deletions
diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h
index e5fa67ae8..885259618 100644
--- a/src/core/hle/kernel/errors.h
+++ b/src/core/hle/kernel/errors.h
@@ -22,6 +22,7 @@ enum {
HandleTableFull = 105,
InvalidMemoryState = 106,
InvalidMemoryPermissions = 108,
+ InvalidMemoryRange = 110,
InvalidThreadPriority = 112,
InvalidProcessorId = 113,
InvalidHandle = 114,
@@ -56,6 +57,7 @@ constexpr ResultCode ERR_INVALID_ADDRESS(ErrorModule::Kernel, ErrCodes::InvalidA
constexpr ResultCode ERR_INVALID_ADDRESS_STATE(ErrorModule::Kernel, ErrCodes::InvalidMemoryState);
constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS(ErrorModule::Kernel,
ErrCodes::InvalidMemoryPermissions);
+constexpr ResultCode ERR_INVALID_MEMORY_RANGE(ErrorModule::Kernel, ErrCodes::InvalidMemoryRange);
constexpr ResultCode ERR_INVALID_HANDLE(ErrorModule::Kernel, ErrCodes::InvalidHandle);
constexpr ResultCode ERR_INVALID_PROCESSOR_ID(ErrorModule::Kernel, ErrCodes::InvalidProcessorId);
constexpr ResultCode ERR_INVALID_SIZE(ErrorModule::Kernel, ErrCodes::InvalidSize);
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 3afcce3fe..3e4dd61dc 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -39,6 +39,73 @@ namespace {
constexpr bool Is4KBAligned(VAddr address) {
return (address & 0xFFF) == 0;
}
+
+// Checks if address + size is greater than the given address
+// This can return false if the size causes an overflow of a 64-bit type
+// or if the given size is zero.
+constexpr bool IsValidAddressRange(VAddr address, u64 size) {
+ return address + size > address;
+}
+
+// Checks if a given address range lies within a larger address range.
+constexpr bool IsInsideAddressRange(VAddr address, u64 size, VAddr address_range_begin,
+ VAddr address_range_end) {
+ const VAddr end_address = address + size - 1;
+ return address_range_begin <= address && end_address <= address_range_end - 1;
+}
+
+bool IsInsideAddressSpace(const VMManager& vm, VAddr address, u64 size) {
+ return IsInsideAddressRange(address, size, vm.GetAddressSpaceBaseAddress(),
+ vm.GetAddressSpaceEndAddress());
+}
+
+bool IsInsideNewMapRegion(const VMManager& vm, VAddr address, u64 size) {
+ return IsInsideAddressRange(address, size, vm.GetNewMapRegionBaseAddress(),
+ vm.GetNewMapRegionEndAddress());
+}
+
+// Helper function that performs the common sanity checks for svcMapMemory
+// and svcUnmapMemory. This is doable, as both functions perform their sanitizing
+// in the same order.
+ResultCode MapUnmapMemorySanityChecks(const VMManager& vm_manager, VAddr dst_addr, VAddr src_addr,
+ u64 size) {
+ if (!Is4KBAligned(dst_addr) || !Is4KBAligned(src_addr)) {
+ return ERR_INVALID_ADDRESS;
+ }
+
+ if (size == 0 || !Is4KBAligned(size)) {
+ return ERR_INVALID_SIZE;
+ }
+
+ if (!IsValidAddressRange(dst_addr, size)) {
+ return ERR_INVALID_ADDRESS_STATE;
+ }
+
+ if (!IsValidAddressRange(src_addr, size)) {
+ return ERR_INVALID_ADDRESS_STATE;
+ }
+
+ if (!IsInsideAddressSpace(vm_manager, src_addr, size)) {
+ return ERR_INVALID_ADDRESS_STATE;
+ }
+
+ if (!IsInsideNewMapRegion(vm_manager, dst_addr, size)) {
+ return ERR_INVALID_MEMORY_RANGE;
+ }
+
+ const VAddr dst_end_address = dst_addr + size;
+ if (dst_end_address > vm_manager.GetHeapRegionBaseAddress() &&
+ dst_addr < vm_manager.GetHeapRegionEndAddress()) {
+ return ERR_INVALID_MEMORY_RANGE;
+ }
+
+ if (dst_end_address > vm_manager.GetNewMapRegionBaseAddress() &&
+ dst_addr < vm_manager.GetMapRegionEndAddress()) {
+ return ERR_INVALID_MEMORY_RANGE;
+ }
+
+ return RESULT_SUCCESS;
+}
} // Anonymous namespace
/// Set the process heap to a given Size. It can both extend and shrink the heap.
@@ -69,15 +136,15 @@ static ResultCode MapMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,
src_addr, size);
- if (!Is4KBAligned(dst_addr) || !Is4KBAligned(src_addr)) {
- return ERR_INVALID_ADDRESS;
- }
+ auto* const current_process = Core::CurrentProcess();
+ const auto& vm_manager = current_process->VMManager();
- if (size == 0 || !Is4KBAligned(size)) {
- return ERR_INVALID_SIZE;
+ const auto result = MapUnmapMemorySanityChecks(vm_manager, dst_addr, src_addr, size);
+ if (result != RESULT_SUCCESS) {
+ return result;
}
- return Core::CurrentProcess()->MirrorMemory(dst_addr, src_addr, size);
+ return current_process->MirrorMemory(dst_addr, src_addr, size);
}
/// Unmaps a region that was previously mapped with svcMapMemory
@@ -85,15 +152,15 @@ static ResultCode UnmapMemory(VAddr dst_addr, VAddr src_addr, u64 size) {
LOG_TRACE(Kernel_SVC, "called, dst_addr=0x{:X}, src_addr=0x{:X}, size=0x{:X}", dst_addr,
src_addr, size);
- if (!Is4KBAligned(dst_addr) || !Is4KBAligned(src_addr)) {
- return ERR_INVALID_ADDRESS;
- }
+ auto* const current_process = Core::CurrentProcess();
+ const auto& vm_manager = current_process->VMManager();
- if (size == 0 || !Is4KBAligned(size)) {
- return ERR_INVALID_SIZE;
+ const auto result = MapUnmapMemorySanityChecks(vm_manager, dst_addr, src_addr, size);
+ if (result != RESULT_SUCCESS) {
+ return result;
}
- return Core::CurrentProcess()->UnmapMemory(dst_addr, src_addr, size);
+ return current_process->UnmapMemory(dst_addr, src_addr, size);
}
/// Connect to an OS service given the port name, returns the handle to the port to out
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 7555bbe7d..8d194e175 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -167,10 +167,11 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
auto& system_instance = Core::System::GetInstance();
// Remove this memory region from the rasterizer cache.
- system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(params.offset,
- itr->second.size);
-
auto& gpu = system_instance.GPU();
+ auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
+ ASSERT(cpu_addr);
+ system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size);
+
params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);
buffer_mappings.erase(itr->second.offset);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 550ab1148..9a59b65b3 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -214,6 +214,18 @@ enum class IMinMaxExchange : u64 {
XHi = 3,
};
+enum class VmadType : u64 {
+ Size16_Low = 0,
+ Size16_High = 1,
+ Size32 = 2,
+ Invalid = 3,
+};
+
+enum class VmadShr : u64 {
+ Shr7 = 1,
+ Shr15 = 2,
+};
+
enum class XmadMode : u64 {
None = 0,
CLo = 1,
@@ -452,6 +464,7 @@ union Instruction {
BitField<48, 16, u64> opcode;
union {
+ BitField<20, 16, u64> imm20_16;
BitField<20, 19, u64> imm20_19;
BitField<20, 32, s64> imm20_32;
BitField<45, 1, u64> negate_b;
@@ -493,6 +506,10 @@ union Instruction {
}
} lop3;
+ u16 GetImm20_16() const {
+ return static_cast<u16>(imm20_16);
+ }
+
u32 GetImm20_19() const {
u32 imm{static_cast<u32>(imm20_19)};
imm <<= 12;
@@ -1017,6 +1034,23 @@ union Instruction {
} isberd;
union {
+ BitField<48, 1, u64> signed_a;
+ BitField<38, 1, u64> is_byte_chunk_a;
+ BitField<36, 2, VmadType> type_a;
+ BitField<36, 2, u64> byte_height_a;
+
+ BitField<49, 1, u64> signed_b;
+ BitField<50, 1, u64> use_register_b;
+ BitField<30, 1, u64> is_byte_chunk_b;
+ BitField<28, 2, VmadType> type_b;
+ BitField<28, 2, u64> byte_height_b;
+
+ BitField<51, 2, VmadShr> shr;
+ BitField<55, 1, u64> saturate; // Saturates the result (a * b + c)
+ BitField<47, 1, u64> cc;
+ } vmad;
+
+ union {
BitField<20, 16, u64> imm20_16;
BitField<36, 1, u64> product_shift_left;
BitField<37, 1, u64> merge_37;
@@ -1083,6 +1117,7 @@ public:
IPA,
OUT_R, // Emit vertex/primitive
ISBERD,
+ VMAD,
FFMA_IMM, // Fused Multiply and Add
FFMA_CR,
FFMA_RC,
@@ -1320,6 +1355,7 @@ private:
INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"),
+ INST("01011111--------", Id::VMAD, Type::Trivial, "VMAD"),
INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index c82a0dcfa..8dfb49507 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -2953,6 +2953,88 @@ private:
LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed");
break;
}
+ case OpCode::Id::VMAD: {
+ const bool signed_a = instr.vmad.signed_a == 1;
+ const bool signed_b = instr.vmad.signed_b == 1;
+ const bool result_signed = signed_a || signed_b;
+ boost::optional<std::string> forced_result;
+
+ auto Unpack = [&](const std::string& op, bool is_chunk, bool is_signed,
+ Tegra::Shader::VmadType type, u64 byte_height) {
+ const std::string value = [&]() {
+ if (!is_chunk) {
+ const auto offset = static_cast<u32>(byte_height * 8);
+ return "((" + op + " >> " + std::to_string(offset) + ") & 0xff)";
+ }
+ const std::string zero = "0";
+
+ switch (type) {
+ case Tegra::Shader::VmadType::Size16_Low:
+ return '(' + op + " & 0xffff)";
+ case Tegra::Shader::VmadType::Size16_High:
+ return '(' + op + " >> 16)";
+ case Tegra::Shader::VmadType::Size32:
+ // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when
+ // this type is used (1 * 1 + 0 == 0x5b800000). Until a better
+ // explanation is found: assert.
+ UNREACHABLE_MSG("Unimplemented");
+ return zero;
+ case Tegra::Shader::VmadType::Invalid:
+ // Note(Rodrigo): This flag is invalid according to nvdisasm. From my
+ // testing (even though it's invalid) this makes the whole instruction
+ // assign zero to target register.
+ forced_result = boost::make_optional(zero);
+ return zero;
+ default:
+ UNREACHABLE();
+ return zero;
+ }
+ }();
+
+ if (is_signed) {
+ return "int(" + value + ')';
+ }
+ return value;
+ };
+
+ const std::string op_a = Unpack(regs.GetRegisterAsInteger(instr.gpr8, 0, false),
+ instr.vmad.is_byte_chunk_a != 0, signed_a,
+ instr.vmad.type_a, instr.vmad.byte_height_a);
+
+ std::string op_b;
+ if (instr.vmad.use_register_b) {
+ op_b = Unpack(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
+ instr.vmad.is_byte_chunk_b != 0, signed_b, instr.vmad.type_b,
+ instr.vmad.byte_height_b);
+ } else {
+ op_b = '(' +
+ std::to_string(signed_b ? static_cast<s16>(instr.alu.GetImm20_16())
+ : instr.alu.GetImm20_16()) +
+ ')';
+ }
+
+ const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39, 0, result_signed);
+
+ std::string result;
+ if (forced_result) {
+ result = *forced_result;
+ } else {
+ result = '(' + op_a + " * " + op_b + " + " + op_c + ')';
+
+ switch (instr.vmad.shr) {
+ case Tegra::Shader::VmadShr::Shr7:
+ result = '(' + result + " >> 7)";
+ break;
+ case Tegra::Shader::VmadShr::Shr15:
+ result = '(' + result + " >> 15)";
+ break;
+ }
+ }
+ regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1,
+ instr.vmad.saturate == 1, 0, Register::Size::Word,
+ instr.vmad.cc);
+ break;
+ }
default: {
LOG_CRITICAL(HW_GPU, "Unhandled instruction: {}", opcode->GetName());
UNREACHABLE();