summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/core/hle/kernel/k_page_table.cpp159
-rw-r--r--src/core/hle/kernel/k_page_table.h4
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp580
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py92
-rw-r--r--src/video_core/engines/maxwell_dma.cpp20
-rw-r--r--src/video_core/engines/maxwell_dma.h2
6 files changed, 757 insertions, 100 deletions
diff --git a/src/core/hle/kernel/k_page_table.cpp b/src/core/hle/kernel/k_page_table.cpp
index dfea0b6e2..0602de1f7 100644
--- a/src/core/hle/kernel/k_page_table.cpp
+++ b/src/core/hle/kernel/k_page_table.cpp
@@ -285,72 +285,141 @@ ResultCode KPageTable::MapProcessCode(VAddr addr, std::size_t num_pages, KMemory
return ResultSuccess;
}
-ResultCode KPageTable::MapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) {
+ResultCode KPageTable::MapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size) {
+ // Validate the mapping request.
+ R_UNLESS(this->CanContain(dst_address, size, KMemoryState::AliasCode),
+ ResultInvalidMemoryRegion);
+
+ // Lock the table.
KScopedLightLock lk(general_lock);
- const std::size_t num_pages{size / PageSize};
+ // Verify that the source memory is normal heap.
+ KMemoryState src_state{};
+ KMemoryPermission src_perm{};
+ std::size_t num_src_allocator_blocks{};
+ R_TRY(this->CheckMemoryState(&src_state, &src_perm, nullptr, &num_src_allocator_blocks,
+ src_address, size, KMemoryState::All, KMemoryState::Normal,
+ KMemoryPermission::All, KMemoryPermission::UserReadWrite,
+ KMemoryAttribute::All, KMemoryAttribute::None));
- KMemoryState state{};
- KMemoryPermission perm{};
- CASCADE_CODE(CheckMemoryState(&state, &perm, nullptr, nullptr, src_addr, size,
- KMemoryState::All, KMemoryState::Normal, KMemoryPermission::All,
- KMemoryPermission::UserReadWrite, KMemoryAttribute::Mask,
- KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped));
+ // Verify that the destination memory is unmapped.
+ std::size_t num_dst_allocator_blocks{};
+ R_TRY(this->CheckMemoryState(&num_dst_allocator_blocks, dst_address, size, KMemoryState::All,
+ KMemoryState::Free, KMemoryPermission::None,
+ KMemoryPermission::None, KMemoryAttribute::None,
+ KMemoryAttribute::None));
- if (IsRegionMapped(dst_addr, size)) {
- return ResultInvalidCurrentMemory;
- }
+ // Map the code memory.
+ {
+ // Determine the number of pages being operated on.
+ const std::size_t num_pages = size / PageSize;
- KPageLinkedList page_linked_list;
- AddRegionToPages(src_addr, num_pages, page_linked_list);
+ // Create page groups for the memory being mapped.
+ KPageLinkedList pg;
+ AddRegionToPages(src_address, num_pages, pg);
- {
- auto block_guard = detail::ScopeExit(
- [&] { Operate(src_addr, num_pages, perm, OperationType::ChangePermissions); });
+ // Reprotect the source as kernel-read/not mapped.
+ const auto new_perm = static_cast<KMemoryPermission>(KMemoryPermission::KernelRead |
+ KMemoryPermission::NotMapped);
+ R_TRY(Operate(src_address, num_pages, new_perm, OperationType::ChangePermissions));
- CASCADE_CODE(Operate(src_addr, num_pages, KMemoryPermission::None,
- OperationType::ChangePermissions));
- CASCADE_CODE(MapPages(dst_addr, page_linked_list, KMemoryPermission::None));
+ // Ensure that we unprotect the source pages on failure.
+ auto unprot_guard = SCOPE_GUARD({
+ ASSERT(this->Operate(src_address, num_pages, src_perm, OperationType::ChangePermissions)
+ .IsSuccess());
+ });
- block_guard.Cancel();
- }
+ // Map the alias pages.
+ R_TRY(MapPages(dst_address, pg, new_perm));
- block_manager->Update(src_addr, num_pages, state, KMemoryPermission::None,
- KMemoryAttribute::Locked);
- block_manager->Update(dst_addr, num_pages, KMemoryState::AliasCode);
+ // We successfully mapped the alias pages, so we don't need to unprotect the src pages on
+ // failure.
+ unprot_guard.Cancel();
+
+ // Apply the memory block updates.
+ block_manager->Update(src_address, num_pages, src_state, new_perm,
+ KMemoryAttribute::Locked);
+ block_manager->Update(dst_address, num_pages, KMemoryState::AliasCode, new_perm,
+ KMemoryAttribute::None);
+ }
return ResultSuccess;
}
-ResultCode KPageTable::UnmapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size) {
+ResultCode KPageTable::UnmapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size) {
+ // Validate the mapping request.
+ R_UNLESS(this->CanContain(dst_address, size, KMemoryState::AliasCode),
+ ResultInvalidMemoryRegion);
+
+ // Lock the table.
KScopedLightLock lk(general_lock);
- if (!size) {
- return ResultSuccess;
+ // Verify that the source memory is locked normal heap.
+ std::size_t num_src_allocator_blocks{};
+ R_TRY(this->CheckMemoryState(std::addressof(num_src_allocator_blocks), src_address, size,
+ KMemoryState::All, KMemoryState::Normal, KMemoryPermission::None,
+ KMemoryPermission::None, KMemoryAttribute::All,
+ KMemoryAttribute::Locked));
+
+ // Verify that the destination memory is aliasable code.
+ std::size_t num_dst_allocator_blocks{};
+ R_TRY(this->CheckMemoryStateContiguous(
+ std::addressof(num_dst_allocator_blocks), dst_address, size, KMemoryState::FlagCanCodeAlias,
+ KMemoryState::FlagCanCodeAlias, KMemoryPermission::None, KMemoryPermission::None,
+ KMemoryAttribute::All, KMemoryAttribute::None));
+
+ // Determine whether any pages being unmapped are code.
+ bool any_code_pages = false;
+ {
+ KMemoryBlockManager::const_iterator it = block_manager->FindIterator(dst_address);
+ while (true) {
+ // Get the memory info.
+ const KMemoryInfo info = it->GetMemoryInfo();
+
+ // Check if the memory has code flag.
+ if ((info.GetState() & KMemoryState::FlagCode) != KMemoryState::None) {
+ any_code_pages = true;
+ break;
+ }
+
+ // Check if we're done.
+ if (dst_address + size - 1 <= info.GetLastAddress()) {
+ break;
+ }
+
+ // Advance.
+ ++it;
+ }
}
- const std::size_t num_pages{size / PageSize};
+ // Ensure that we maintain the instruction cache.
+ bool reprotected_pages = false;
+ SCOPE_EXIT({
+ if (reprotected_pages && any_code_pages) {
+ system.InvalidateCpuInstructionCacheRange(dst_address, size);
+ }
+ });
- CASCADE_CODE(CheckMemoryState(nullptr, nullptr, nullptr, nullptr, src_addr, size,
- KMemoryState::All, KMemoryState::Normal, KMemoryPermission::None,
- KMemoryPermission::None, KMemoryAttribute::Mask,
- KMemoryAttribute::Locked, KMemoryAttribute::IpcAndDeviceMapped));
+ // Unmap.
+ {
+ // Determine the number of pages being operated on.
+ const std::size_t num_pages = size / PageSize;
- KMemoryState state{};
- CASCADE_CODE(CheckMemoryState(
- &state, nullptr, nullptr, nullptr, dst_addr, PageSize, KMemoryState::FlagCanCodeAlias,
- KMemoryState::FlagCanCodeAlias, KMemoryPermission::None, KMemoryPermission::None,
- KMemoryAttribute::Mask, KMemoryAttribute::None, KMemoryAttribute::IpcAndDeviceMapped));
- CASCADE_CODE(CheckMemoryState(dst_addr, size, KMemoryState::All, state, KMemoryPermission::None,
- KMemoryPermission::None, KMemoryAttribute::Mask,
- KMemoryAttribute::None));
- CASCADE_CODE(Operate(dst_addr, num_pages, KMemoryPermission::None, OperationType::Unmap));
+ // Unmap the aliased copy of the pages.
+ R_TRY(Operate(dst_address, num_pages, KMemoryPermission::None, OperationType::Unmap));
- block_manager->Update(dst_addr, num_pages, KMemoryState::Free);
- block_manager->Update(src_addr, num_pages, KMemoryState::Normal,
- KMemoryPermission::UserReadWrite);
+ // Try to set the permissions for the source pages back to what they should be.
+ R_TRY(Operate(src_address, num_pages, KMemoryPermission::UserReadWrite,
+ OperationType::ChangePermissions));
- system.InvalidateCpuInstructionCacheRange(dst_addr, size);
+ // Apply the memory block updates.
+ block_manager->Update(dst_address, num_pages, KMemoryState::None);
+ block_manager->Update(src_address, num_pages, KMemoryState::Normal,
+ KMemoryPermission::UserReadWrite);
+
+ // Note that we reprotected pages.
+ reprotected_pages = true;
+ }
return ResultSuccess;
}
diff --git a/src/core/hle/kernel/k_page_table.h b/src/core/hle/kernel/k_page_table.h
index 194177332..aea1b8f63 100644
--- a/src/core/hle/kernel/k_page_table.h
+++ b/src/core/hle/kernel/k_page_table.h
@@ -36,8 +36,8 @@ public:
KMemoryManager::Pool pool);
ResultCode MapProcessCode(VAddr addr, std::size_t pages_count, KMemoryState state,
KMemoryPermission perm);
- ResultCode MapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size);
- ResultCode UnmapCodeMemory(VAddr dst_addr, VAddr src_addr, std::size_t size);
+ ResultCode MapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size);
+ ResultCode UnmapCodeMemory(VAddr dst_address, VAddr src_address, std::size_t size);
ResultCode UnmapProcessMemory(VAddr dst_addr, std::size_t size, KPageTable& src_page_table,
VAddr src_addr);
ResultCode MapPhysicalMemory(VAddr addr, std::size_t size);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
index e0fe47912..f3c7ceb57 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
@@ -13,59 +13,535 @@ namespace {
// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table)
IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c,
u64 ttbl) {
- IR::U32 r{ir.Imm32(0)};
- const IR::U32 not_a{ir.BitwiseNot(a)};
- const IR::U32 not_b{ir.BitwiseNot(b)};
- const IR::U32 not_c{ir.BitwiseNot(c)};
- if (ttbl & 0x01) {
- // r |= ~a & ~b & ~c;
- const auto lhs{ir.BitwiseAnd(not_a, not_b)};
- const auto rhs{ir.BitwiseAnd(lhs, not_c)};
- r = ir.BitwiseOr(r, rhs);
+ switch (ttbl) {
+ // generated code, do not edit manually
+ case 0:
+ return ir.Imm32(0);
+ case 1:
+ return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseOr(b, c)));
+ case 2:
+ return ir.BitwiseAnd(c, ir.BitwiseNot(ir.BitwiseOr(a, b)));
+ case 3:
+ return ir.BitwiseNot(ir.BitwiseOr(a, b));
+ case 4:
+ return ir.BitwiseAnd(b, ir.BitwiseNot(ir.BitwiseOr(a, c)));
+ case 5:
+ return ir.BitwiseNot(ir.BitwiseOr(a, c));
+ case 6:
+ return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseXor(b, c));
+ case 7:
+ return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseAnd(b, c)));
+ case 8:
+ return ir.BitwiseAnd(ir.BitwiseAnd(b, c), ir.BitwiseNot(a));
+ case 9:
+ return ir.BitwiseNot(ir.BitwiseOr(a, ir.BitwiseXor(b, c)));
+ case 10:
+ return ir.BitwiseAnd(c, ir.BitwiseNot(a));
+ case 11:
+ return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(c, ir.BitwiseNot(b)));
+ case 12:
+ return ir.BitwiseAnd(b, ir.BitwiseNot(a));
+ case 13:
+ return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(b, ir.BitwiseNot(c)));
+ case 14:
+ return ir.BitwiseAnd(ir.BitwiseNot(a), ir.BitwiseOr(b, c));
+ case 15:
+ return ir.BitwiseNot(a);
+ case 16:
+ return ir.BitwiseAnd(a, ir.BitwiseNot(ir.BitwiseOr(b, c)));
+ case 17:
+ return ir.BitwiseNot(ir.BitwiseOr(b, c));
+ case 18:
+ return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseXor(a, c));
+ case 19:
+ return ir.BitwiseNot(ir.BitwiseOr(b, ir.BitwiseAnd(a, c)));
+ case 20:
+ return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseXor(a, b));
+ case 21:
+ return ir.BitwiseNot(ir.BitwiseOr(c, ir.BitwiseAnd(a, b)));
+ case 22:
+ return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseAnd(a, b)));
+ case 23:
+ return ir.BitwiseXor(ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)),
+ ir.BitwiseNot(a));
+ case 24:
+ return ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c));
+ case 25:
+ return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, c)));
+ case 26:
+ return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(b)), ir.BitwiseXor(a, c));
+ case 27:
+ return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseOr(b, c));
+ case 28:
+ return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(c)), ir.BitwiseXor(a, b));
+ case 29:
+ return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseOr(b, c));
+ case 30:
+ return ir.BitwiseXor(a, ir.BitwiseOr(b, c));
+ case 31:
+ return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseOr(b, c)));
+ case 32:
+ return ir.BitwiseAnd(ir.BitwiseAnd(a, c), ir.BitwiseNot(b));
+ case 33:
+ return ir.BitwiseNot(ir.BitwiseOr(b, ir.BitwiseXor(a, c)));
+ case 34:
+ return ir.BitwiseAnd(c, ir.BitwiseNot(b));
+ case 35:
+ return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(c, ir.BitwiseNot(a)));
+ case 36:
+ return ir.BitwiseAnd(ir.BitwiseXor(a, b), ir.BitwiseXor(b, c));
+ case 37:
+ return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, c)));
+ case 38:
+ return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(a)), ir.BitwiseXor(b, c));
+ case 39:
+ return ir.BitwiseXor(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(c)));
+ case 40:
+ return ir.BitwiseAnd(c, ir.BitwiseXor(a, b));
+ case 41:
+ return ir.BitwiseXor(ir.BitwiseOr(a, b),
+ ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(c)));
+ case 42:
+ return ir.BitwiseAnd(c, ir.BitwiseNot(ir.BitwiseAnd(a, b)));
+ case 43:
+ return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(c)),
+ ir.BitwiseOr(b, ir.BitwiseXor(a, c)));
+ case 44:
+ return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, b));
+ case 45:
+ return ir.BitwiseXor(a, ir.BitwiseOr(b, ir.BitwiseNot(c)));
+ case 46:
+ return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseOr(b, c));
+ case 47:
+ return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(b)), ir.BitwiseNot(a));
+ case 48:
+ return ir.BitwiseAnd(a, ir.BitwiseNot(b));
+ case 49:
+ return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(a, ir.BitwiseNot(c)));
+ case 50:
+ return ir.BitwiseAnd(ir.BitwiseNot(b), ir.BitwiseOr(a, c));
+ case 51:
+ return ir.BitwiseNot(b);
+ case 52:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, b));
+ case 53:
+ return ir.BitwiseXor(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(a)));
+ case 54:
+ return ir.BitwiseXor(b, ir.BitwiseOr(a, c));
+ case 55:
+ return ir.BitwiseNot(ir.BitwiseAnd(b, ir.BitwiseOr(a, c)));
+ case 56:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, b));
+ case 57:
+ return ir.BitwiseXor(b, ir.BitwiseOr(a, ir.BitwiseNot(c)));
+ case 58:
+ return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseOr(a, c));
+ case 59:
+ return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(a)), ir.BitwiseNot(b));
+ case 60:
+ return ir.BitwiseXor(a, b);
+ case 61:
+ return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, c)), ir.BitwiseXor(a, b));
+ case 62:
+ return ir.BitwiseOr(ir.BitwiseAnd(c, ir.BitwiseNot(a)), ir.BitwiseXor(a, b));
+ case 63:
+ return ir.BitwiseNot(ir.BitwiseAnd(a, b));
+ case 64:
+ return ir.BitwiseAnd(ir.BitwiseAnd(a, b), ir.BitwiseNot(c));
+ case 65:
+ return ir.BitwiseNot(ir.BitwiseOr(c, ir.BitwiseXor(a, b)));
+ case 66:
+ return ir.BitwiseAnd(ir.BitwiseXor(a, c), ir.BitwiseXor(b, c));
+ case 67:
+ return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, b)));
+ case 68:
+ return ir.BitwiseAnd(b, ir.BitwiseNot(c));
+ case 69:
+ return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(b, ir.BitwiseNot(a)));
+ case 70:
+ return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, c));
+ case 71:
+ return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(b)));
+ case 72:
+ return ir.BitwiseAnd(b, ir.BitwiseXor(a, c));
+ case 73:
+ return ir.BitwiseXor(ir.BitwiseOr(a, c),
+ ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(b)));
+ case 74:
+ return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, c));
+ case 75:
+ return ir.BitwiseXor(a, ir.BitwiseOr(c, ir.BitwiseNot(b)));
+ case 76:
+ return ir.BitwiseAnd(b, ir.BitwiseNot(ir.BitwiseAnd(a, c)));
+ case 77:
+ return ir.BitwiseXor(ir.BitwiseOr(a, ir.BitwiseNot(b)),
+ ir.BitwiseOr(c, ir.BitwiseXor(a, b)));
+ case 78:
+ return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseOr(b, c));
+ case 79:
+ return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(c)), ir.BitwiseNot(a));
+ case 80:
+ return ir.BitwiseAnd(a, ir.BitwiseNot(c));
+ case 81:
+ return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(a, ir.BitwiseNot(b)));
+ case 82:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, c));
+ case 83:
+ return ir.BitwiseXor(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(a)));
+ case 84:
+ return ir.BitwiseAnd(ir.BitwiseNot(c), ir.BitwiseOr(a, b));
+ case 85:
+ return ir.BitwiseNot(c);
+ case 86:
+ return ir.BitwiseXor(c, ir.BitwiseOr(a, b));
+ case 87:
+ return ir.BitwiseNot(ir.BitwiseAnd(c, ir.BitwiseOr(a, b)));
+ case 88:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, c));
+ case 89:
+ return ir.BitwiseXor(c, ir.BitwiseOr(a, ir.BitwiseNot(b)));
+ case 90:
+ return ir.BitwiseXor(a, c);
+ case 91:
+ return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)), ir.BitwiseXor(a, c));
+ case 92:
+ return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseOr(a, b));
+ case 93:
+ return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseNot(c));
+ case 94:
+ return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseXor(a, c));
+ case 95:
+ return ir.BitwiseNot(ir.BitwiseAnd(a, c));
+ case 96:
+ return ir.BitwiseAnd(a, ir.BitwiseXor(b, c));
+ case 97:
+ return ir.BitwiseXor(ir.BitwiseOr(b, c),
+ ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(a)));
+ case 98:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(b, c));
+ case 99:
+ return ir.BitwiseXor(b, ir.BitwiseOr(c, ir.BitwiseNot(a)));
+ case 100:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, c));
+ case 101:
+ return ir.BitwiseXor(c, ir.BitwiseOr(b, ir.BitwiseNot(a)));
+ case 102:
+ return ir.BitwiseXor(b, c);
+ case 103:
+ return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)), ir.BitwiseXor(b, c));
+ case 104:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(c, ir.BitwiseAnd(a, b)));
+ case 105:
+ return ir.BitwiseXor(ir.BitwiseNot(a), ir.BitwiseXor(b, c));
+ case 106:
+ return ir.BitwiseXor(c, ir.BitwiseAnd(a, b));
+ case 107:
+ return ir.BitwiseXor(ir.BitwiseAnd(c, ir.BitwiseOr(a, b)),
+ ir.BitwiseXor(a, ir.BitwiseNot(b)));
+ case 108:
+ return ir.BitwiseXor(b, ir.BitwiseAnd(a, c));
+ case 109:
+ return ir.BitwiseXor(ir.BitwiseAnd(b, ir.BitwiseOr(a, c)),
+ ir.BitwiseXor(a, ir.BitwiseNot(c)));
+ case 110:
+ return ir.BitwiseOr(ir.BitwiseAnd(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, c));
+ case 111:
+ return ir.BitwiseOr(ir.BitwiseNot(a), ir.BitwiseXor(b, c));
+ case 112:
+ return ir.BitwiseAnd(a, ir.BitwiseNot(ir.BitwiseAnd(b, c)));
+ case 113:
+ return ir.BitwiseXor(ir.BitwiseOr(b, ir.BitwiseNot(a)),
+ ir.BitwiseOr(c, ir.BitwiseXor(a, b)));
+ case 114:
+ return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseOr(a, c));
+ case 115:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(c)), ir.BitwiseNot(b));
+ case 116:
+ return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseOr(a, b));
+ case 117:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseNot(c));
+ case 118:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, c));
+ case 119:
+ return ir.BitwiseNot(ir.BitwiseAnd(b, c));
+ case 120:
+ return ir.BitwiseXor(a, ir.BitwiseAnd(b, c));
+ case 121:
+ return ir.BitwiseXor(ir.BitwiseAnd(a, ir.BitwiseOr(b, c)),
+ ir.BitwiseXor(b, ir.BitwiseNot(c)));
+ case 122:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, c));
+ case 123:
+ return ir.BitwiseOr(ir.BitwiseNot(b), ir.BitwiseXor(a, c));
+ case 124:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, b));
+ case 125:
+ return ir.BitwiseOr(ir.BitwiseNot(c), ir.BitwiseXor(a, b));
+ case 126:
+ return ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c));
+ case 127:
+ return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseAnd(b, c)));
+ case 128:
+ return ir.BitwiseAnd(a, ir.BitwiseAnd(b, c));
+ case 129:
+ return ir.BitwiseNot(ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
+ case 130:
+ return ir.BitwiseAnd(c, ir.BitwiseXor(a, ir.BitwiseNot(b)));
+ case 131:
+ return ir.BitwiseAnd(ir.BitwiseOr(c, ir.BitwiseNot(a)), ir.BitwiseXor(a, ir.BitwiseNot(b)));
+ case 132:
+ return ir.BitwiseAnd(b, ir.BitwiseXor(a, ir.BitwiseNot(c)));
+ case 133:
+ return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(a, ir.BitwiseNot(c)));
+ case 134:
+ return ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
+ case 135:
+ return ir.BitwiseXor(ir.BitwiseAnd(b, c), ir.BitwiseNot(a));
+ case 136:
+ return ir.BitwiseAnd(b, c);
+ case 137:
+ return ir.BitwiseAnd(ir.BitwiseOr(b, ir.BitwiseNot(a)), ir.BitwiseXor(b, ir.BitwiseNot(c)));
+ case 138:
+ return ir.BitwiseAnd(c, ir.BitwiseOr(b, ir.BitwiseNot(a)));
+ case 139:
+ return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(ir.BitwiseOr(a, b)));
+ case 140:
+ return ir.BitwiseAnd(b, ir.BitwiseOr(c, ir.BitwiseNot(a)));
+ case 141:
+ return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(ir.BitwiseOr(a, c)));
+ case 142:
+ return ir.BitwiseXor(a, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
+ case 143:
+ return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseNot(a));
+ case 144:
+ return ir.BitwiseAnd(a, ir.BitwiseXor(b, ir.BitwiseNot(c)));
+ case 145:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, ir.BitwiseNot(c)));
+ case 146:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
+ case 147:
+ return ir.BitwiseXor(ir.BitwiseAnd(a, c), ir.BitwiseNot(b));
+ case 148:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
+ case 149:
+ return ir.BitwiseXor(ir.BitwiseAnd(a, b), ir.BitwiseNot(c));
+ case 150:
+ return ir.BitwiseXor(a, ir.BitwiseXor(b, c));
+ case 151:
+ return ir.BitwiseOr(ir.BitwiseNot(ir.BitwiseOr(a, b)),
+ ir.BitwiseXor(a, ir.BitwiseXor(b, c)));
+ case 152:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, ir.BitwiseNot(c)));
+ case 153:
+ return ir.BitwiseXor(b, ir.BitwiseNot(c));
+ case 154:
+ return ir.BitwiseXor(c, ir.BitwiseAnd(a, ir.BitwiseNot(b)));
+ case 155:
+ return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(b, c)));
+ case 156:
+ return ir.BitwiseXor(b, ir.BitwiseAnd(a, ir.BitwiseNot(c)));
+ case 157:
+ return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(b, c)));
+ case 158:
+ return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseOr(b, c)));
+ case 159:
+ return ir.BitwiseNot(ir.BitwiseAnd(a, ir.BitwiseXor(b, c)));
+ case 160:
+ return ir.BitwiseAnd(a, c);
+ case 161:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseXor(a, ir.BitwiseNot(c)));
+ case 162:
+ return ir.BitwiseAnd(c, ir.BitwiseOr(a, ir.BitwiseNot(b)));
+ case 163:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(ir.BitwiseOr(a, b)));
+ case 164:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c)));
+ case 165:
+ return ir.BitwiseXor(a, ir.BitwiseNot(c));
+ case 166:
+ return ir.BitwiseXor(c, ir.BitwiseAnd(b, ir.BitwiseNot(a)));
+ case 167:
+ return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseXor(a, c)));
+ case 168:
+ return ir.BitwiseAnd(c, ir.BitwiseOr(a, b));
+ case 169:
+ return ir.BitwiseXor(ir.BitwiseNot(c), ir.BitwiseOr(a, b));
+ case 170:
+ return c;
+ case 171:
+ return ir.BitwiseOr(c, ir.BitwiseNot(ir.BitwiseOr(a, b)));
+ case 172:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(a)));
+ case 173:
+ return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseNot(c)));
+ case 174:
+ return ir.BitwiseOr(c, ir.BitwiseAnd(b, ir.BitwiseNot(a)));
+ case 175:
+ return ir.BitwiseOr(c, ir.BitwiseNot(a));
+ case 176:
+ return ir.BitwiseAnd(a, ir.BitwiseOr(c, ir.BitwiseNot(b)));
+ case 177:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(ir.BitwiseOr(b, c)));
+ case 178:
+ return ir.BitwiseXor(b, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
+ case 179:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseNot(b));
+ case 180:
+ return ir.BitwiseXor(a, ir.BitwiseAnd(b, ir.BitwiseNot(c)));
+ case 181:
+ return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, c)));
+ case 182:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(b, ir.BitwiseOr(a, c)));
+ case 183:
+ return ir.BitwiseNot(ir.BitwiseAnd(b, ir.BitwiseXor(a, c)));
+ case 184:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseNot(b)));
+ case 185:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(b, ir.BitwiseNot(c)));
+ case 186:
+ return ir.BitwiseOr(c, ir.BitwiseAnd(a, ir.BitwiseNot(b)));
+ case 187:
+ return ir.BitwiseOr(c, ir.BitwiseNot(b));
+ case 188:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, b));
+ case 189:
+ return ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c)));
+ case 190:
+ return ir.BitwiseOr(c, ir.BitwiseXor(a, b));
+ case 191:
+ return ir.BitwiseOr(c, ir.BitwiseNot(ir.BitwiseAnd(a, b)));
+ case 192:
+ return ir.BitwiseAnd(a, b);
+ case 193:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseXor(a, ir.BitwiseNot(b)));
+ case 194:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
+ case 195:
+ return ir.BitwiseXor(a, ir.BitwiseNot(b));
+ case 196:
+ return ir.BitwiseAnd(b, ir.BitwiseOr(a, ir.BitwiseNot(c)));
+ case 197:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(ir.BitwiseOr(a, c)));
+ case 198:
+ return ir.BitwiseXor(b, ir.BitwiseAnd(c, ir.BitwiseNot(a)));
+ case 199:
+ return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseXor(a, b)));
+ case 200:
+ return ir.BitwiseAnd(b, ir.BitwiseOr(a, c));
+ case 201:
+ return ir.BitwiseXor(ir.BitwiseNot(b), ir.BitwiseOr(a, c));
+ case 202:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(a)));
+ case 203:
+ return ir.BitwiseOr(ir.BitwiseAnd(b, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
+ case 204:
+ return b;
+ case 205:
+ return ir.BitwiseOr(b, ir.BitwiseNot(ir.BitwiseOr(a, c)));
+ case 206:
+ return ir.BitwiseOr(b, ir.BitwiseAnd(c, ir.BitwiseNot(a)));
+ case 207:
+ return ir.BitwiseOr(b, ir.BitwiseNot(a));
+ case 208:
+ return ir.BitwiseAnd(a, ir.BitwiseOr(b, ir.BitwiseNot(c)));
+ case 209:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(ir.BitwiseOr(b, c)));
+ case 210:
+ return ir.BitwiseXor(a, ir.BitwiseAnd(c, ir.BitwiseNot(b)));
+ case 211:
+ return ir.BitwiseNot(ir.BitwiseAnd(ir.BitwiseOr(b, c), ir.BitwiseXor(a, b)));
+ case 212:
+ return ir.BitwiseXor(c, ir.BitwiseOr(ir.BitwiseXor(a, b), ir.BitwiseXor(a, c)));
+ case 213:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseNot(c));
+ case 214:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(c, ir.BitwiseOr(a, b)));
+ case 215:
+ return ir.BitwiseNot(ir.BitwiseAnd(c, ir.BitwiseXor(a, b)));
+ case 216:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, c), ir.BitwiseOr(b, ir.BitwiseNot(c)));
+ case 217:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, ir.BitwiseNot(c)));
+ case 218:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, c));
+ case 219:
+ return ir.BitwiseOr(ir.BitwiseXor(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
+ case 220:
+ return ir.BitwiseOr(b, ir.BitwiseAnd(a, ir.BitwiseNot(c)));
+ case 221:
+ return ir.BitwiseOr(b, ir.BitwiseNot(c));
+ case 222:
+ return ir.BitwiseOr(b, ir.BitwiseXor(a, c));
+ case 223:
+ return ir.BitwiseOr(b, ir.BitwiseNot(ir.BitwiseAnd(a, c)));
+ case 224:
+ return ir.BitwiseAnd(a, ir.BitwiseOr(b, c));
+ case 225:
+ return ir.BitwiseXor(ir.BitwiseNot(a), ir.BitwiseOr(b, c));
+ case 226:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(b)), ir.BitwiseOr(b, c));
+ case 227:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, c), ir.BitwiseXor(a, ir.BitwiseNot(b)));
+ case 228:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, ir.BitwiseNot(c)), ir.BitwiseOr(b, c));
+ case 229:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(a, ir.BitwiseNot(c)));
+ case 230:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, b), ir.BitwiseXor(b, c));
+ case 231:
+ return ir.BitwiseOr(ir.BitwiseXor(a, ir.BitwiseNot(b)), ir.BitwiseXor(b, c));
+ case 232:
+ return ir.BitwiseAnd(ir.BitwiseOr(a, b), ir.BitwiseOr(c, ir.BitwiseAnd(a, b)));
+ case 233:
+ return ir.BitwiseOr(ir.BitwiseAnd(a, b),
+ ir.BitwiseXor(ir.BitwiseNot(c), ir.BitwiseOr(a, b)));
+ case 234:
+ return ir.BitwiseOr(c, ir.BitwiseAnd(a, b));
+ case 235:
+ return ir.BitwiseOr(c, ir.BitwiseXor(a, ir.BitwiseNot(b)));
+ case 236:
+ return ir.BitwiseOr(b, ir.BitwiseAnd(a, c));
+ case 237:
+ return ir.BitwiseOr(b, ir.BitwiseXor(a, ir.BitwiseNot(c)));
+ case 238:
+ return ir.BitwiseOr(b, c);
+ case 239:
+ return ir.BitwiseOr(ir.BitwiseNot(a), ir.BitwiseOr(b, c));
+ case 240:
+ return a;
+ case 241:
+ return ir.BitwiseOr(a, ir.BitwiseNot(ir.BitwiseOr(b, c)));
+ case 242:
+ return ir.BitwiseOr(a, ir.BitwiseAnd(c, ir.BitwiseNot(b)));
+ case 243:
+ return ir.BitwiseOr(a, ir.BitwiseNot(b));
+ case 244:
+ return ir.BitwiseOr(a, ir.BitwiseAnd(b, ir.BitwiseNot(c)));
+ case 245:
+ return ir.BitwiseOr(a, ir.BitwiseNot(c));
+ case 246:
+ return ir.BitwiseOr(a, ir.BitwiseXor(b, c));
+ case 247:
+ return ir.BitwiseOr(a, ir.BitwiseNot(ir.BitwiseAnd(b, c)));
+ case 248:
+ return ir.BitwiseOr(a, ir.BitwiseAnd(b, c));
+ case 249:
+ return ir.BitwiseOr(a, ir.BitwiseXor(b, ir.BitwiseNot(c)));
+ case 250:
+ return ir.BitwiseOr(a, c);
+ case 251:
+ return ir.BitwiseOr(ir.BitwiseNot(b), ir.BitwiseOr(a, c));
+ case 252:
+ return ir.BitwiseOr(a, b);
+ case 253:
+ return ir.BitwiseOr(ir.BitwiseNot(c), ir.BitwiseOr(a, b));
+ case 254:
+ return ir.BitwiseOr(a, ir.BitwiseOr(b, c));
+ case 255:
+ return ir.Imm32(0xFFFFFFFF);
+ // end of generated code
}
- if (ttbl & 0x02) {
- // r |= ~a & ~b & c;
- const auto lhs{ir.BitwiseAnd(not_a, not_b)};
- const auto rhs{ir.BitwiseAnd(lhs, c)};
- r = ir.BitwiseOr(r, rhs);
- }
- if (ttbl & 0x04) {
- // r |= ~a & b & ~c;
- const auto lhs{ir.BitwiseAnd(not_a, b)};
- const auto rhs{ir.BitwiseAnd(lhs, not_c)};
- r = ir.BitwiseOr(r, rhs);
- }
- if (ttbl & 0x08) {
- // r |= ~a & b & c;
- const auto lhs{ir.BitwiseAnd(not_a, b)};
- const auto rhs{ir.BitwiseAnd(lhs, c)};
- r = ir.BitwiseOr(r, rhs);
- }
- if (ttbl & 0x10) {
- // r |= a & ~b & ~c;
- const auto lhs{ir.BitwiseAnd(a, not_b)};
- const auto rhs{ir.BitwiseAnd(lhs, not_c)};
- r = ir.BitwiseOr(r, rhs);
- }
- if (ttbl & 0x20) {
- // r |= a & ~b & c;
- const auto lhs{ir.BitwiseAnd(a, not_b)};
- const auto rhs{ir.BitwiseAnd(lhs, c)};
- r = ir.BitwiseOr(r, rhs);
- }
- if (ttbl & 0x40) {
- // r |= a & b & ~c;
- const auto lhs{ir.BitwiseAnd(a, b)};
- const auto rhs{ir.BitwiseAnd(lhs, not_c)};
- r = ir.BitwiseOr(r, rhs);
- }
- if (ttbl & 0x80) {
- // r |= a & b & c;
- const auto lhs{ir.BitwiseAnd(a, b)};
- const auto rhs{ir.BitwiseAnd(lhs, c)};
- r = ir.BitwiseOr(r, rhs);
- }
- return r;
+ throw NotImplementedException("LOP3 with out of range ttbl");
}
IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py
new file mode 100644
index 000000000..8f547c266
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py
@@ -0,0 +1,92 @@
+# Copyright © 2022 degasus <markus@selfnet.de>
+# This work is free. You can redistribute it and/or modify it under the
+# terms of the Do What The Fuck You Want To Public License, Version 2,
+# as published by Sam Hocevar. See http://www.wtfpl.net/ for more details.
+
+from itertools import product
+
+# The primitive instructions
+OPS = {
+ 'ir.BitwiseAnd({}, {})' : (2, 1, lambda a,b: a&b),
+ 'ir.BitwiseOr({}, {})' : (2, 1, lambda a,b: a|b),
+ 'ir.BitwiseXor({}, {})' : (2, 1, lambda a,b: a^b),
+ 'ir.BitwiseNot({})' : (1, 0.1, lambda a: (~a) & 255), # Only tiny cost, as this can often inlined in other instructions
+}
+
+# Our database of combination of instructions
+optimized_calls = {}
+def cmp(lhs, rhs):
+ if lhs is None: # new entry
+ return True
+ if lhs[3] > rhs[3]: # costs
+ return True
+ if lhs[3] < rhs[3]: # costs
+ return False
+ if len(lhs[0]) > len(rhs[0]): # string len
+ return True
+ if len(lhs[0]) < len(rhs[0]): # string len
+ return False
+ if lhs[0] > rhs[0]: # string sorting
+ return True
+ if lhs[0] < rhs[0]: # string sorting
+ return False
+ assert lhs == rhs, "redundant instruction, bug in brute force"
+ return False
+def register(imm, instruction, count, latency):
+ # Use the sum of instruction count and latency as costs to evaluate which combination is best
+ costs = count + latency
+
+ old = optimized_calls.get(imm, None)
+ new = (instruction, count, latency, costs)
+
+ # Update if new or better
+ if cmp(old, new):
+ optimized_calls[imm] = new
+ return True
+
+ return False
+
+# Constants: 0, 1 (for free)
+register(0, 'ir.Imm32(0)', 0, 0)
+register(255, 'ir.Imm32(0xFFFFFFFF)', 0, 0)
+
+# Inputs: a, b, c (for free)
+ta = 0xF0
+tb = 0xCC
+tc = 0xAA
+inputs = {
+ ta : 'a',
+ tb : 'b',
+ tc : 'c',
+}
+for imm, instruction in inputs.items():
+ register(imm, instruction, 0, 0)
+ register((~imm) & 255, 'ir.BitwiseNot({})'.format(instruction), 0.099, 0.099) # slightly cheaper NEG on inputs
+
+# Try to combine two values from the db with an instruction.
+# If it is better than the old method, update it.
+while True:
+ registered = 0
+ calls_copy = optimized_calls.copy()
+ for OP, (argc, cost, f) in OPS.items():
+ for args in product(calls_copy.items(), repeat=argc):
+ # unpack(transponse) the arrays
+ imm = [arg[0] for arg in args]
+ value = [arg[1][0] for arg in args]
+ count = [arg[1][1] for arg in args]
+ latency = [arg[1][2] for arg in args]
+
+ registered += register(
+ f(*imm),
+ OP.format(*value),
+ sum(count) + cost,
+ max(latency) + cost)
+ if registered == 0:
+ # No update at all? So terminate
+ break
+
+# Hacky output. Please improve me to output valid C++ instead.
+s = """ case {imm}:
+ return {op};"""
+for imm in range(256):
+ print(s.format(imm=imm, op=optimized_calls[imm][0]))
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 67388d980..1fc1358bc 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -53,7 +53,6 @@ void MaxwellDMA::Launch() {
// TODO(Subv): Perform more research and implement all features of this engine.
const LaunchDMA& launch = regs.launch_dma;
- ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE);
ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
ASSERT(regs.dst_params.origin.x == 0);
@@ -79,6 +78,7 @@ void MaxwellDMA::Launch() {
CopyPitchToBlockLinear();
}
}
+ ReleaseSemaphore();
}
void MaxwellDMA::CopyPitchToPitch() {
@@ -244,4 +244,22 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size);
}
+void MaxwellDMA::ReleaseSemaphore() {
+ const auto type = regs.launch_dma.semaphore_type;
+ const GPUVAddr address = regs.semaphore.address;
+ switch (type) {
+ case LaunchDMA::SemaphoreType::NONE:
+ break;
+ case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE:
+ memory_manager.Write<u32>(address, regs.semaphore.payload);
+ break;
+ case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE:
+ memory_manager.Write<u64>(address, static_cast<u64>(regs.semaphore.payload));
+ memory_manager.Write<u64>(address + 8, system.GPU().GetTicks());
+ break;
+ default:
+ UNREACHABLE_MSG("Unknown semaphore type: {}", static_cast<u32>(type.Value()));
+ }
+}
+
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index a04514425..2692cac8a 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -224,6 +224,8 @@ private:
void FastCopyBlockLinearToPitch();
+ void ReleaseSemaphore();
+
Core::System& system;
MemoryManager& memory_manager;