diff options
Diffstat (limited to '')
23 files changed, 1107 insertions, 486 deletions
diff --git a/.appveyor/UtilityFunctions.ps1 b/.appveyor/UtilityFunctions.ps1 new file mode 100644 index 000000000..fd7476314 --- /dev/null +++ b/.appveyor/UtilityFunctions.ps1 @@ -0,0 +1,39 @@ +# Set-up Visual Studio Command Prompt environment for PowerShell +pushd "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\Tools\" +cmd /c "VsDevCmd.bat -arch=x64 & set" | foreach { + if ($_ -match "=") { + $v = $_.split("="); Set-Item -Force -Path "ENV:\$($v[0])" -Value "$($v[1])" + } +} +popd + +function Which ($search_path, $name) { + ($search_path).Split(";") | Get-ChildItem -Filter $name | Select -First 1 -Exp FullName +} + +function GetDeps ($search_path, $binary) { + ((dumpbin /dependents $binary).Where({ $_ -match "dependencies:"}, "SkipUntil") | Select-String "[^ ]*\.dll").Matches | foreach { + Which $search_path $_.Value + } +} + +function RecursivelyGetDeps ($search_path, $binary) { + $final_deps = @() + $deps_to_process = GetDeps $search_path $binary + while ($deps_to_process.Count -gt 0) { + $current, $deps_to_process = $deps_to_process + if ($final_deps -contains $current) { continue } + + # Is this a system dll file? + # We use the same algorithm that cmake uses to determine this. + if ($current -match "$([regex]::Escape($env:SystemRoot))\\sys") { continue } + if ($current -match "$([regex]::Escape($env:WinDir))\\sys") { continue } + if ($current -match "\\msvc[^\\]+dll") { continue } + if ($current -match "\\api-ms-win-[^\\]+dll") { continue } + + $final_deps += $current + $new_deps = GetDeps $search_path $current + $deps_to_process += ($new_deps | ?{-not ($final_deps -contains $_)}) + } + return $final_deps +} diff --git a/appveyor.yml b/appveyor.yml index fe6b649f5..72cda26a7 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -121,23 +121,16 @@ after_build: Get-ChildItem "$CMAKE_BINARY_DIR" -Filter "yuzu*.exe" | Copy-Item -destination $RELEASE_DIST Copy-Item -path "$CMAKE_SOURCE_DIR/license.txt" -destination $RELEASE_DIST Copy-Item -path "$CMAKE_SOURCE_DIR/README.md" -destination $RELEASE_DIST + # copy all the dll dependencies to the release folder - # hardcoded list because we don't build static and determining the list of dlls from the binary is a pain. - $MingwDLLs = "Qt5Core.dll","Qt5Widgets.dll","Qt5Gui.dll","Qt5OpenGL.dll", - # QT dll dependencies - "libbz2-*.dll","libicudt*.dll","libicuin*.dll","libicuuc*.dll","libffi-*.dll", - "libfreetype-*.dll","libglib-*.dll","libgobject-*.dll","libgraphite2.dll","libiconv-*.dll", - "libharfbuzz-*.dll","libintl-*.dll","libpcre-*.dll","libpcre2-16-*.dll","libpcre16-*.dll","libpng16-*.dll", - # Runtime/Other dependencies - "libgcc_s_seh-*.dll","libstdc++-*.dll","libwinpthread-*.dll","SDL2.dll","zlib1.dll" + . "./.appveyor/UtilityFunctions.ps1" + $DLLSearchPath = "C:\msys64\mingw64\bin;$env:PATH" + $MingwDLLs = RecursivelyGetDeps $DLLSearchPath "$RELEASE_DIST\yuzu.exe" + $MingwDLLs += RecursivelyGetDeps $DLLSearchPath "$RELEASE_DIST\yuzu_cmd.exe" + Write-Host "Detected the following dependencies:" + Write-Host $MingwDLLs foreach ($file in $MingwDLLs) { - Copy-Item -path "C:/msys64/mingw64/bin/$file" -force -destination "$RELEASE_DIST" - } - # the above list copies a few extra debug dlls that aren't needed (thanks globbing patterns!) - # so we can remove them by hardcoding another list of extra dlls to remove - $DebugDLLs = "libicudtd*.dll","libicuind*.dll","libicuucd*.dll" - foreach ($file in $DebugDLLs) { - Remove-Item -path "$RELEASE_DIST/$file" + Copy-Item -path "$file" -force -destination "$RELEASE_DIST" } # copy the qt windows plugin dll to platforms diff --git a/src/core/memory.cpp b/src/core/memory.cpp index d6469dd3d..291bf066f 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -15,6 +15,7 @@ #include "core/core.h" #include "core/hle/kernel/memory.h" #include "core/hle/kernel/process.h" +#include "core/hle/lock.h" #include "core/memory.h" #include "core/memory_setup.h" #include "video_core/renderer_base.h" @@ -115,91 +116,120 @@ static std::set<MemoryHookPointer> GetSpecialHandlers(VAddr vaddr, u64 size) { return GetSpecialHandlers(page_table, vaddr, size); } -template <typename T> -boost::optional<T> ReadSpecial(VAddr addr); +/** + * Gets a pointer to the exact memory at the virtual address (i.e. not page aligned) + * using a VMA from the current process + */ +static u8* GetPointerFromVMA(const Kernel::Process& process, VAddr vaddr) { + u8* direct_pointer = nullptr; + + auto& vm_manager = process.vm_manager; + + auto it = vm_manager.FindVMA(vaddr); + ASSERT(it != vm_manager.vma_map.end()); + + auto& vma = it->second; + switch (vma.type) { + case Kernel::VMAType::AllocatedMemoryBlock: + direct_pointer = vma.backing_block->data() + vma.offset; + break; + case Kernel::VMAType::BackingMemory: + direct_pointer = vma.backing_memory; + break; + case Kernel::VMAType::Free: + return nullptr; + default: + UNREACHABLE(); + } + + return direct_pointer + (vaddr - vma.base); +} + +/** + * Gets a pointer to the exact memory at the virtual address (i.e. not page aligned) + * using a VMA from the current process. + */ +static u8* GetPointerFromVMA(VAddr vaddr) { + return GetPointerFromVMA(*Core::CurrentProcess(), vaddr); +} template <typename T> T Read(const VAddr vaddr) { - if ((vaddr >> PAGE_BITS) >= PAGE_TABLE_NUM_ENTRIES) { - LOG_ERROR(HW_Memory, "Read%lu after page table @ 0x%016" PRIX64, sizeof(T) * 8, vaddr); - return 0; + const u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; + if (page_pointer) { + // NOTE: Avoid adding any extra logic to this fast-path block + T value; + std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T)); + return value; } - const PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; + // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state + std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); + + PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; switch (type) { case PageType::Unmapped: - LOG_ERROR(HW_Memory, "unmapped Read%zu @ 0x%016" PRIX64, sizeof(T) * 8, vaddr); + LOG_ERROR(HW_Memory, "unmapped Read%lu @ 0x%08X", sizeof(T) * 8, vaddr); return 0; - case PageType::Special: { - if (auto result = ReadSpecial<T>(vaddr)) - return *result; - [[fallthrough]]; - } - case PageType::Memory: { - const u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; - ASSERT_MSG(page_pointer, "Mapped memory page without a pointer @ %016" PRIX64, vaddr); + case PageType::Memory: + ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr); + break; + case PageType::RasterizerCachedMemory: { + RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Flush); T value; - std::memcpy(&value, &page_pointer[vaddr & PAGE_MASK], sizeof(T)); + std::memcpy(&value, GetPointerFromVMA(vaddr), sizeof(T)); return value; } + default: + UNREACHABLE(); } - UNREACHABLE(); - return 0; } template <typename T> -bool WriteSpecial(VAddr addr, const T data); - -template <typename T> void Write(const VAddr vaddr, const T data) { - if ((vaddr >> PAGE_BITS) >= PAGE_TABLE_NUM_ENTRIES) { - LOG_ERROR(HW_Memory, "Write%lu after page table 0x%08X @ 0x%016" PRIX64, sizeof(data) * 8, - (u32)data, vaddr); + u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; + if (page_pointer) { + // NOTE: Avoid adding any extra logic to this fast-path block + std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T)); return; } - const PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; + // The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state + std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock); + + PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; switch (type) { case PageType::Unmapped: - LOG_ERROR(HW_Memory, "unmapped Write%zu 0x%08X @ 0x%016" PRIX64, sizeof(data) * 8, - static_cast<u32>(data), vaddr); - return; - case PageType::Special: { - if (WriteSpecial<T>(vaddr, data)) - return; - [[fallthrough]]; - } - case PageType::Memory: { - u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; - ASSERT_MSG(page_pointer, "Mapped memory page without a pointer @ %016" PRIX64, vaddr); - std::memcpy(&page_pointer[vaddr & PAGE_MASK], &data, sizeof(T)); + LOG_ERROR(HW_Memory, "unmapped Write%lu 0x%08X @ 0x%08X", sizeof(data) * 8, (u32)data, + vaddr); return; + case PageType::Memory: + ASSERT_MSG(false, "Mapped memory page without a pointer @ %08X", vaddr); + break; + case PageType::RasterizerCachedMemory: { + RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate); + std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T)); + break; } + default: + UNREACHABLE(); } - UNREACHABLE(); } bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) { auto& page_table = process.vm_manager.page_table; - if ((vaddr >> PAGE_BITS) >= PAGE_TABLE_NUM_ENTRIES) - return false; + const u8* page_pointer = page_table.pointers[vaddr >> PAGE_BITS]; + if (page_pointer) + return true; - const PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; - switch (type) { - case PageType::Unmapped: - return false; - case PageType::Memory: + if (page_table.attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) return true; - case PageType::Special: { - for (auto handler : GetSpecialHandlers(page_table, vaddr, 1)) - if (auto result = handler->IsValidAddress(vaddr)) - return *result; - return current_page_table->pointers[vaddr >> PAGE_BITS] != nullptr; - } - } - UNREACHABLE(); + + if (page_table.attributes[vaddr >> PAGE_BITS] != PageType::Special) + return false; + return false; } @@ -217,7 +247,11 @@ u8* GetPointer(const VAddr vaddr) { return page_pointer + (vaddr & PAGE_MASK); } - LOG_ERROR(HW_Memory, "unknown GetPointer @ 0x%016" PRIx64, vaddr); + if (current_page_table->attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) { + return GetPointerFromVMA(vaddr); + } + + LOG_ERROR(HW_Memory, "unknown GetPointer @ 0x%08x", vaddr); return nullptr; } @@ -291,6 +325,58 @@ u8* GetPhysicalPointer(PAddr address) { return target_pointer; } +void RasterizerMarkRegionCached(VAddr start, u64 size, bool cached) { + if (start == 0) { + return; + } + + u64 num_pages = ((start + size - 1) >> PAGE_BITS) - (start >> PAGE_BITS) + 1; + VAddr vaddr = start; + + for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) { + PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; + + if (cached) { + // Switch page type to cached if now cached + switch (page_type) { + case PageType::Unmapped: + // It is not necessary for a process to have this region mapped into its address + // space, for example, a system module need not have a VRAM mapping. + break; + case PageType::Memory: + page_type = PageType::RasterizerCachedMemory; + current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr; + break; + default: + UNREACHABLE(); + } + } else { + // Switch page type to uncached if now uncached + switch (page_type) { + case PageType::Unmapped: + // It is not necessary for a process to have this region mapped into its address + // space, for example, a system module need not have a VRAM mapping. + break; + case PageType::RasterizerCachedMemory: { + u8* pointer = GetPointerFromVMA(vaddr & ~PAGE_MASK); + if (pointer == nullptr) { + // It's possible that this function has been called while updating the pagetable + // after unmapping a VMA. In that case the underlying VMA will no longer exist, + // and we should just leave the pagetable entry blank. + page_type = PageType::Unmapped; + } else { + page_type = PageType::Memory; + current_page_table->pointers[vaddr >> PAGE_BITS] = pointer; + } + break; + } + default: + UNREACHABLE(); + } + } + } +} + void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) { // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be // null here @@ -344,17 +430,6 @@ u64 Read64(const VAddr addr) { return Read<u64_le>(addr); } -static bool ReadSpecialBlock(const Kernel::Process& process, const VAddr src_addr, - void* dest_buffer, const size_t size) { - auto& page_table = process.vm_manager.page_table; - for (const auto& handler : GetSpecialHandlers(page_table, src_addr, size)) { - if (handler->ReadBlock(src_addr, dest_buffer, size)) { - return true; - } - } - return false; -} - void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer, const size_t size) { auto& page_table = process.vm_manager.page_table; @@ -364,21 +439,16 @@ void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_ size_t page_offset = src_addr & PAGE_MASK; while (remaining_size > 0) { - const size_t copy_amount = std::min<size_t>(PAGE_SIZE - page_offset, remaining_size); + const size_t copy_amount = + std::min(static_cast<size_t>(PAGE_SIZE) - page_offset, remaining_size); const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); switch (page_table.attributes[page_index]) { - case PageType::Unmapped: - LOG_ERROR(HW_Memory, - "unmapped ReadBlock @ 0x%016" PRIX64 " (start address = 0x%" PRIx64 - ", size = %zu)", + case PageType::Unmapped: { + LOG_ERROR(HW_Memory, "unmapped ReadBlock @ 0x%08X (start address = 0x%08X, size = %zu)", current_vaddr, src_addr, size); std::memset(dest_buffer, 0, copy_amount); break; - case PageType::Special: { - if (ReadSpecialBlock(process, current_vaddr, dest_buffer, copy_amount)) - break; - [[fallthrough]]; } case PageType::Memory: { DEBUG_ASSERT(page_table.pointers[page_index]); @@ -387,6 +457,12 @@ void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_ std::memcpy(dest_buffer, src_ptr, copy_amount); break; } + case PageType::RasterizerCachedMemory: { + RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), + FlushMode::Flush); + std::memcpy(dest_buffer, GetPointerFromVMA(process, current_vaddr), copy_amount); + break; + } default: UNREACHABLE(); } @@ -418,17 +494,6 @@ void Write64(const VAddr addr, const u64 data) { Write<u64_le>(addr, data); } -static bool WriteSpecialBlock(const Kernel::Process& process, const VAddr dest_addr, - const void* src_buffer, const size_t size) { - auto& page_table = process.vm_manager.page_table; - for (const auto& handler : GetSpecialHandlers(page_table, dest_addr, size)) { - if (handler->WriteBlock(dest_addr, src_buffer, size)) { - return true; - } - } - return false; -} - void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer, const size_t size) { auto& page_table = process.vm_manager.page_table; @@ -437,20 +502,17 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi size_t page_offset = dest_addr & PAGE_MASK; while (remaining_size > 0) { - const size_t copy_amount = std::min<size_t>(PAGE_SIZE - page_offset, remaining_size); + const size_t copy_amount = + std::min(static_cast<size_t>(PAGE_SIZE) - page_offset, remaining_size); const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); switch (page_table.attributes[page_index]) { - case PageType::Unmapped: + case PageType::Unmapped: { LOG_ERROR(HW_Memory, - "unmapped WriteBlock @ 0x%016" PRIX64 " (start address = 0x%016" PRIX64 - ", size = %zu)", + "unmapped WriteBlock @ 0x%08X (start address = 0x%08X, size = %zu)", current_vaddr, dest_addr, size); break; - case PageType::Special: - if (WriteSpecialBlock(process, current_vaddr, src_buffer, copy_amount)) - break; - [[fallthrough]]; + } case PageType::Memory: { DEBUG_ASSERT(page_table.pointers[page_index]); @@ -458,6 +520,12 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi std::memcpy(dest_ptr, src_buffer, copy_amount); break; } + case PageType::RasterizerCachedMemory: { + RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), + FlushMode::Invalidate); + std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount); + break; + } default: UNREACHABLE(); } @@ -473,9 +541,8 @@ void WriteBlock(const VAddr dest_addr, const void* src_buffer, const size_t size WriteBlock(*Core::CurrentProcess(), dest_addr, src_buffer, size); } -void ZeroBlock(const VAddr dest_addr, const size_t size) { - const auto& process = *Core::CurrentProcess(); - +void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const size_t size) { + auto& page_table = process.vm_manager.page_table; size_t remaining_size = size; size_t page_index = dest_addr >> PAGE_BITS; size_t page_offset = dest_addr & PAGE_MASK; @@ -483,27 +550,29 @@ void ZeroBlock(const VAddr dest_addr, const size_t size) { static const std::array<u8, PAGE_SIZE> zeros = {}; while (remaining_size > 0) { - const size_t copy_amount = std::min<size_t>(PAGE_SIZE - page_offset, remaining_size); + const size_t copy_amount = + std::min(static_cast<size_t>(PAGE_SIZE) - page_offset, remaining_size); const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); - switch (current_page_table->attributes[page_index]) { - case PageType::Unmapped: - LOG_ERROR(HW_Memory, - "unmapped ZeroBlock @ 0x%016" PRIX64 " (start address = 0x%016" PRIX64 - ", size = %zu)", + switch (page_table.attributes[page_index]) { + case PageType::Unmapped: { + LOG_ERROR(HW_Memory, "unmapped ZeroBlock @ 0x%08X (start address = 0x%08X, size = %zu)", current_vaddr, dest_addr, size); break; - case PageType::Special: - if (WriteSpecialBlock(process, current_vaddr, zeros.data(), copy_amount)) - break; - [[fallthrough]]; + } case PageType::Memory: { - DEBUG_ASSERT(current_page_table->pointers[page_index]); + DEBUG_ASSERT(page_table.pointers[page_index]); - u8* dest_ptr = current_page_table->pointers[page_index] + page_offset; + u8* dest_ptr = page_table.pointers[page_index] + page_offset; std::memset(dest_ptr, 0, copy_amount); break; } + case PageType::RasterizerCachedMemory: { + RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), + FlushMode::Invalidate); + std::memset(GetPointerFromVMA(process, current_vaddr), 0, copy_amount); + break; + } default: UNREACHABLE(); } @@ -514,37 +583,34 @@ void ZeroBlock(const VAddr dest_addr, const size_t size) { } } -void CopyBlock(VAddr dest_addr, VAddr src_addr, const size_t size) { - const auto& process = *Core::CurrentProcess(); - +void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr, const size_t size) { + auto& page_table = process.vm_manager.page_table; size_t remaining_size = size; size_t page_index = src_addr >> PAGE_BITS; size_t page_offset = src_addr & PAGE_MASK; while (remaining_size > 0) { - const size_t copy_amount = std::min<size_t>(PAGE_SIZE - page_offset, remaining_size); + const size_t copy_amount = + std::min(static_cast<size_t>(PAGE_SIZE) - page_offset, remaining_size); const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); - switch (current_page_table->attributes[page_index]) { - case PageType::Unmapped: - LOG_ERROR(HW_Memory, - "unmapped CopyBlock @ 0x%016" PRIX64 " (start address = 0x%016" PRIX64 - ", size = %zu)", + switch (page_table.attributes[page_index]) { + case PageType::Unmapped: { + LOG_ERROR(HW_Memory, "unmapped CopyBlock @ 0x%08X (start address = 0x%08X, size = %zu)", current_vaddr, src_addr, size); - ZeroBlock(dest_addr, copy_amount); + ZeroBlock(process, dest_addr, copy_amount); break; - case PageType::Special: { - std::vector<u8> buffer(copy_amount); - if (ReadSpecialBlock(process, current_vaddr, buffer.data(), buffer.size())) { - WriteBlock(dest_addr, buffer.data(), buffer.size()); - break; - } - [[fallthrough]]; } case PageType::Memory: { - DEBUG_ASSERT(current_page_table->pointers[page_index]); - const u8* src_ptr = current_page_table->pointers[page_index] + page_offset; - WriteBlock(dest_addr, src_ptr, copy_amount); + DEBUG_ASSERT(page_table.pointers[page_index]); + const u8* src_ptr = page_table.pointers[page_index] + page_offset; + WriteBlock(process, dest_addr, src_ptr, copy_amount); + break; + } + case PageType::RasterizerCachedMemory: { + RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), + FlushMode::Flush); + WriteBlock(process, dest_addr, GetPointerFromVMA(process, current_vaddr), copy_amount); break; } default: @@ -559,78 +625,6 @@ void CopyBlock(VAddr dest_addr, VAddr src_addr, const size_t size) { } } -template <> -boost::optional<u8> ReadSpecial<u8>(VAddr addr) { - const PageTable& page_table = Core::CurrentProcess()->vm_manager.page_table; - for (const auto& handler : GetSpecialHandlers(page_table, addr, sizeof(u8))) - if (auto result = handler->Read8(addr)) - return *result; - return {}; -} - -template <> -boost::optional<u16> ReadSpecial<u16>(VAddr addr) { - const PageTable& page_table = Core::CurrentProcess()->vm_manager.page_table; - for (const auto& handler : GetSpecialHandlers(page_table, addr, sizeof(u16))) - if (auto result = handler->Read16(addr)) - return *result; - return {}; -} - -template <> -boost::optional<u32> ReadSpecial<u32>(VAddr addr) { - const PageTable& page_table = Core::CurrentProcess()->vm_manager.page_table; - for (const auto& handler : GetSpecialHandlers(page_table, addr, sizeof(u32))) - if (auto result = handler->Read32(addr)) - return *result; - return {}; -} - -template <> -boost::optional<u64> ReadSpecial<u64>(VAddr addr) { - const PageTable& page_table = Core::CurrentProcess()->vm_manager.page_table; - for (const auto& handler : GetSpecialHandlers(page_table, addr, sizeof(u64))) - if (auto result = handler->Read64(addr)) - return *result; - return {}; -} - -template <> -bool WriteSpecial<u8>(VAddr addr, const u8 data) { - const PageTable& page_table = Core::CurrentProcess()->vm_manager.page_table; - for (const auto& handler : GetSpecialHandlers(page_table, addr, sizeof(u8))) - if (handler->Write8(addr, data)) - return true; - return false; -} - -template <> -bool WriteSpecial<u16>(VAddr addr, const u16 data) { - const PageTable& page_table = Core::CurrentProcess()->vm_manager.page_table; - for (const auto& handler : GetSpecialHandlers(page_table, addr, sizeof(u16))) - if (handler->Write16(addr, data)) - return true; - return false; -} - -template <> -bool WriteSpecial<u32>(VAddr addr, const u32 data) { - const PageTable& page_table = Core::CurrentProcess()->vm_manager.page_table; - for (const auto& handler : GetSpecialHandlers(page_table, addr, sizeof(u32))) - if (handler->Write32(addr, data)) - return true; - return false; -} - -template <> -bool WriteSpecial<u64>(VAddr addr, const u64 data) { - const PageTable& page_table = Core::CurrentProcess()->vm_manager.page_table; - for (const auto& handler : GetSpecialHandlers(page_table, addr, sizeof(u64))) - if (handler->Write64(addr, data)) - return true; - return false; -} - boost::optional<PAddr> TryVirtualToPhysicalAddress(const VAddr addr) { if (addr == 0) { return 0; diff --git a/src/core/memory.h b/src/core/memory.h index 4b9c482fe..413a7b4e8 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -255,6 +255,11 @@ enum class FlushMode { }; /** + * Mark each page touching the region as cached. + */ +void RasterizerMarkRegionCached(VAddr start, u64 size, bool cached); + +/** * Flushes and invalidates any externally cached rasterizer resources touching the given virtual * address region. */ diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3dab81769..841f27d7f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -31,6 +31,7 @@ add_library(video_core STATIC renderer_opengl/gl_state.h renderer_opengl/gl_stream_buffer.cpp renderer_opengl/gl_stream_buffer.h + renderer_opengl/maxwell_to_gl.h renderer_opengl/renderer_opengl.cpp renderer_opengl/renderer_opengl.h textures/decoders.cpp diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 986165c6d..5359d21a2 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -7,8 +7,11 @@ #include "core/core.h" #include "video_core/debug_utils/debug_utils.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/renderer_base.h" #include "video_core/textures/decoders.h" #include "video_core/textures/texture.h" +#include "video_core/video_core.h" namespace Tegra { namespace Engines { @@ -174,7 +177,9 @@ void Maxwell3D::ProcessQueryGet() { } void Maxwell3D::DrawArrays() { - LOG_WARNING(HW_GPU, "Game requested a DrawArrays, ignoring"); + LOG_DEBUG(HW_GPU, "called, topology=%d, count=%d", regs.draw.topology.Value(), + regs.vertex_buffer.count); + auto debug_context = Core::System::GetInstance().GetGPUDebugContext(); if (debug_context) { @@ -184,6 +189,8 @@ void Maxwell3D::DrawArrays() { if (debug_context) { debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr); } + + VideoCore::g_renderer->Rasterizer()->AccelerateDrawBatch(false /*is_indexed*/); } void Maxwell3D::BindTextureInfoBuffer(const std::vector<u32>& parameters) { @@ -294,8 +301,45 @@ void Maxwell3D::ProcessCBData(u32 value) { regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; } -std::vector<Texture::TICEntry> Maxwell3D::GetStageTextures(Regs::ShaderStage stage) { - std::vector<Texture::TICEntry> textures; +Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { + GPUVAddr tic_base_address = regs.tic.TICAddress(); + + GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry); + VAddr tic_address_cpu = memory_manager.PhysicalToVirtualAddress(tic_address_gpu); + + Texture::TICEntry tic_entry; + Memory::ReadBlock(tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); + + ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear, + "TIC versions other than BlockLinear are unimplemented"); + + ASSERT_MSG(tic_entry.texture_type == Texture::TextureType::Texture2D, + "Texture types other than Texture2D are unimplemented"); + + auto r_type = tic_entry.r_type.Value(); + auto g_type = tic_entry.g_type.Value(); + auto b_type = tic_entry.b_type.Value(); + auto a_type = tic_entry.a_type.Value(); + + // TODO(Subv): Different data types for separate components are not supported + ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); + + return tic_entry; +} + +Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { + GPUVAddr tsc_base_address = regs.tsc.TSCAddress(); + + GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry); + VAddr tsc_address_cpu = memory_manager.PhysicalToVirtualAddress(tsc_address_gpu); + + Texture::TSCEntry tsc_entry; + Memory::ReadBlock(tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry)); + return tsc_entry; +} + +std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderStage stage) const { + std::vector<Texture::FullTextureInfo> textures; auto& fragment_shader = state.shader_stages[static_cast<size_t>(stage)]; auto& tex_info_buffer = fragment_shader.const_buffers[regs.tex_cb_index]; @@ -309,31 +353,34 @@ std::vector<Texture::TICEntry> Maxwell3D::GetStageTextures(Regs::ShaderStage sta static constexpr size_t TextureInfoOffset = 0x20; for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; - current_texture < tex_info_buffer_end; current_texture += 4) { + current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { - Texture::TextureHandle tex_info{ + Texture::TextureHandle tex_handle{ Memory::Read32(memory_manager.PhysicalToVirtualAddress(current_texture))}; - if (tex_info.tic_id != 0 || tex_info.tsc_id != 0) { - GPUVAddr tic_address_gpu = - tic_base_address + tex_info.tic_id * sizeof(Texture::TICEntry); - VAddr tic_address_cpu = memory_manager.PhysicalToVirtualAddress(tic_address_gpu); - - Texture::TICEntry tic_entry; - Memory::ReadBlock(tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); - - auto r_type = tic_entry.r_type.Value(); - auto g_type = tic_entry.g_type.Value(); - auto b_type = tic_entry.b_type.Value(); - auto a_type = tic_entry.a_type.Value(); + Texture::FullTextureInfo tex_info{}; + // TODO(Subv): Use the shader to determine which textures are actually accessed. + tex_info.index = (current_texture - tex_info_buffer.address - TextureInfoOffset) / + sizeof(Texture::TextureHandle); - // TODO(Subv): Different data types for separate components are not supported - ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); + // Load the TIC data. + if (tex_handle.tic_id != 0) { + tex_info.enabled = true; - auto format = tic_entry.format.Value(); + auto tic_entry = GetTICEntry(tex_handle.tic_id); + // TODO(Subv): Workaround for BitField's move constructor being deleted. + std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry)); + } - textures.push_back(tic_entry); + // Load the TSC data + if (tex_handle.tsc_id != 0) { + auto tsc_entry = GetTSCEntry(tex_handle.tsc_id); + // TODO(Subv): Workaround for BitField's move constructor being deleted. + std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry)); } + + if (tex_info.enabled) + textures.push_back(tex_info); } return textures; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 441cc0c19..3066bc606 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -11,6 +11,8 @@ #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "common/math_util.h" +#include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/textures/texture.h" @@ -59,88 +61,173 @@ public: Fragment = 4, }; - enum class VertexSize : u32 { - Size_32_32_32_32 = 0x01, - Size_32_32_32 = 0x02, - Size_16_16_16_16 = 0x03, - Size_32_32 = 0x04, - Size_16_16_16 = 0x05, - Size_8_8_8_8 = 0x0a, - Size_16_16 = 0x0f, - Size_32 = 0x12, - Size_8_8_8 = 0x13, - Size_8_8 = 0x18, - Size_16 = 0x1b, - Size_8 = 0x1d, - Size_10_10_10_2 = 0x30, - Size_11_11_10 = 0x31, - }; + struct VertexAttribute { + enum class Size : u32 { + Size_32_32_32_32 = 0x01, + Size_32_32_32 = 0x02, + Size_16_16_16_16 = 0x03, + Size_32_32 = 0x04, + Size_16_16_16 = 0x05, + Size_8_8_8_8 = 0x0a, + Size_16_16 = 0x0f, + Size_32 = 0x12, + Size_8_8_8 = 0x13, + Size_8_8 = 0x18, + Size_16 = 0x1b, + Size_8 = 0x1d, + Size_10_10_10_2 = 0x30, + Size_11_11_10 = 0x31, + }; + + enum class Type : u32 { + SignedNorm = 1, + UnsignedNorm = 2, + SignedInt = 3, + UnsignedInt = 4, + UnsignedScaled = 5, + SignedScaled = 6, + Float = 7, + }; + + union { + BitField<0, 5, u32> buffer; + BitField<6, 1, u32> constant; + BitField<7, 14, u32> offset; + BitField<21, 6, Size> size; + BitField<27, 3, Type> type; + BitField<31, 1, u32> bgra; + }; - static std::string VertexSizeToString(VertexSize vertex_size) { - switch (vertex_size) { - case VertexSize::Size_32_32_32_32: - return "32_32_32_32"; - case VertexSize::Size_32_32_32: - return "32_32_32"; - case VertexSize::Size_16_16_16_16: - return "16_16_16_16"; - case VertexSize::Size_32_32: - return "32_32"; - case VertexSize::Size_16_16_16: - return "16_16_16"; - case VertexSize::Size_8_8_8_8: - return "8_8_8_8"; - case VertexSize::Size_16_16: - return "16_16"; - case VertexSize::Size_32: - return "32"; - case VertexSize::Size_8_8_8: - return "8_8_8"; - case VertexSize::Size_8_8: - return "8_8"; - case VertexSize::Size_16: - return "16"; - case VertexSize::Size_8: - return "8"; - case VertexSize::Size_10_10_10_2: - return "10_10_10_2"; - case VertexSize::Size_11_11_10: - return "11_11_10"; + u32 ComponentCount() const { + switch (size) { + case Size::Size_32_32_32_32: + return 4; + case Size::Size_32_32_32: + return 3; + case Size::Size_16_16_16_16: + return 4; + case Size::Size_32_32: + return 2; + case Size::Size_16_16_16: + return 3; + case Size::Size_8_8_8_8: + return 4; + case Size::Size_16_16: + return 2; + case Size::Size_32: + return 1; + case Size::Size_8_8_8: + return 3; + case Size::Size_8_8: + return 2; + case Size::Size_16: + return 1; + case Size::Size_8: + return 1; + case Size::Size_10_10_10_2: + return 4; + case Size::Size_11_11_10: + return 3; + default: + UNREACHABLE(); + } } - UNIMPLEMENTED(); - return {}; - } - - enum class VertexType : u32 { - SignedNorm = 1, - UnsignedNorm = 2, - SignedInt = 3, - UnsignedInt = 4, - UnsignedScaled = 5, - SignedScaled = 6, - Float = 7, - }; - static std::string VertexTypeToString(VertexType vertex_type) { - switch (vertex_type) { - case VertexType::SignedNorm: - return "SignedNorm"; - case VertexType::UnsignedNorm: - return "UnsignedNorm"; - case VertexType::SignedInt: - return "SignedInt"; - case VertexType::UnsignedInt: - return "UnsignedInt"; - case VertexType::UnsignedScaled: - return "UnsignedScaled"; - case VertexType::SignedScaled: - return "SignedScaled"; - case VertexType::Float: - return "Float"; + u32 SizeInBytes() const { + switch (size) { + case Size::Size_32_32_32_32: + return 16; + case Size::Size_32_32_32: + return 12; + case Size::Size_16_16_16_16: + return 8; + case Size::Size_32_32: + return 8; + case Size::Size_16_16_16: + return 6; + case Size::Size_8_8_8_8: + return 4; + case Size::Size_16_16: + return 4; + case Size::Size_32: + return 4; + case Size::Size_8_8_8: + return 3; + case Size::Size_8_8: + return 2; + case Size::Size_16: + return 2; + case Size::Size_8: + return 1; + case Size::Size_10_10_10_2: + return 4; + case Size::Size_11_11_10: + return 4; + default: + UNREACHABLE(); + } } - UNIMPLEMENTED(); - return {}; - } + + std::string SizeString() const { + switch (size) { + case Size::Size_32_32_32_32: + return "32_32_32_32"; + case Size::Size_32_32_32: + return "32_32_32"; + case Size::Size_16_16_16_16: + return "16_16_16_16"; + case Size::Size_32_32: + return "32_32"; + case Size::Size_16_16_16: + return "16_16_16"; + case Size::Size_8_8_8_8: + return "8_8_8_8"; + case Size::Size_16_16: + return "16_16"; + case Size::Size_32: + return "32"; + case Size::Size_8_8_8: + return "8_8_8"; + case Size::Size_8_8: + return "8_8"; + case Size::Size_16: + return "16"; + case Size::Size_8: + return "8"; + case Size::Size_10_10_10_2: + return "10_10_10_2"; + case Size::Size_11_11_10: + return "11_11_10"; + } + UNREACHABLE(); + return {}; + } + + std::string TypeString() const { + switch (type) { + case Type::SignedNorm: + return "SNORM"; + case Type::UnsignedNorm: + return "UNORM"; + case Type::SignedInt: + return "SINT"; + case Type::UnsignedInt: + return "UINT"; + case Type::UnsignedScaled: + return "USCALED"; + case Type::SignedScaled: + return "SSCALED"; + case Type::Float: + return "FLOAT"; + } + UNREACHABLE(); + return {}; + } + + bool IsNormalized() const { + return (type == Type::SignedNorm) || (type == Type::UnsignedNorm); + } + }; enum class PrimitiveTopology : u32 { Points = 0x0, @@ -167,9 +254,9 @@ public: struct { u32 address_high; u32 address_low; - u32 horiz; - u32 vert; - u32 format; + u32 width; + u32 height; + Tegra::RenderTargetFormat format; u32 block_dimensions; u32 array_mode; u32 layer_stride; @@ -195,6 +282,15 @@ public: }; float depth_range_near; float depth_range_far; + + MathUtil::Rectangle<s32> GetRect() const { + return { + static_cast<s32>(x), // left + static_cast<s32>(y + height), // top + static_cast<s32>(x + width), // right + static_cast<s32>(y) // bottom + }; + }; } viewport[NumViewports]; INSERT_PADDING_WORDS(0x1D); @@ -221,14 +317,7 @@ public: INSERT_PADDING_WORDS(0x5B); - union { - BitField<0, 5, u32> buffer; - BitField<6, 1, u32> constant; - BitField<7, 14, u32> offset; - BitField<21, 6, VertexSize> size; - BitField<27, 3, VertexType> type; - BitField<31, 1, u32> bgra; - } vertex_attrib_format[NumVertexAttributes]; + VertexAttribute vertex_attrib_format[NumVertexAttributes]; INSERT_PADDING_WORDS(0xF); @@ -432,7 +521,7 @@ public: void SubmitMacroCode(u32 entry, std::vector<u32> code); /// Returns a list of enabled textures for the specified shader stage. - std::vector<Texture::TICEntry> GetStageTextures(Regs::ShaderStage stage); + std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const; private: MemoryManager& memory_manager; @@ -444,6 +533,12 @@ private: /// Parameters that have been submitted to the macro call so far. std::vector<u32> macro_params; + /// Retrieves information about a specific TIC entry from the TIC buffer. + Texture::TICEntry GetTICEntry(u32 tic_index) const; + + /// Retrieves information about a specific TSC entry from the TSC buffer. + Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; + /** * Call a macro on this engine. * @param method Method to call diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 8183b12e9..71a8661b4 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -13,7 +13,8 @@ namespace Tegra { -enum class RenderTargetFormat { +enum class RenderTargetFormat : u32 { + NONE = 0x0, RGBA8_UNORM = 0xD5, }; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index a493e1d60..8239f9aad 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -15,8 +15,8 @@ class RasterizerInterface { public: virtual ~RasterizerInterface() {} - /// Draw the current batch of triangles - virtual void DrawTriangles() = 0; + /// Draw the current batch of vertex arrays + virtual void DrawArrays() = 0; /// Notify rasterizer that the specified Maxwell register has been changed virtual void NotifyMaxwellRegisterChanged(u32 id) = 0; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 286491b73..911890f16 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -14,11 +14,16 @@ #include "common/microprofile.h" #include "common/scope_exit.h" #include "common/vector_math.h" +#include "core/core.h" +#include "core/hle/kernel/process.h" #include "core/settings.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_gen.h" +#include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" +using Maxwell = Tegra::Engines::Maxwell3D::Regs; using PixelFormat = SurfaceParams::PixelFormat; using SurfaceType = SurfaceParams::SurfaceType; @@ -120,14 +125,14 @@ RasterizerOpenGL::RasterizerOpenGL() { glBufferData(GL_UNIFORM_BUFFER, sizeof(VSUniformData), nullptr, GL_STREAM_COPY); glBindBufferBase(GL_UNIFORM_BUFFER, 1, vs_uniform_buffer.handle); } else { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); } accelerate_draw = AccelDraw::Disabled; glEnable(GL_BLEND); - LOG_WARNING(HW_GPU, "Sync fixed function OpenGL state here when ready"); + LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!"); } RasterizerOpenGL::~RasterizerOpenGL() { @@ -138,47 +143,235 @@ RasterizerOpenGL::~RasterizerOpenGL() { } } -static constexpr std::array<GLenum, 4> vs_attrib_types{ - GL_BYTE, // VertexAttributeFormat::BYTE - GL_UNSIGNED_BYTE, // VertexAttributeFormat::UBYTE - GL_SHORT, // VertexAttributeFormat::SHORT - GL_FLOAT // VertexAttributeFormat::FLOAT -}; - void RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) { - UNIMPLEMENTED(); + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + + if (is_indexed) { + UNREACHABLE(); + } + + // TODO(bunnei): Add support for 1+ vertex arrays + vs_input_size = regs.vertex_buffer.count * regs.vertex_array[0].stride; } void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { MICROPROFILE_SCOPE(OpenGL_VAO); - UNIMPLEMENTED(); + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; + + state.draw.vertex_array = hw_vao.handle; + state.draw.vertex_buffer = stream_buffer->GetHandle(); + state.Apply(); + + // TODO(bunnei): Add support for 1+ vertex arrays + const auto& vertex_array{regs.vertex_array[0]}; + ASSERT_MSG(vertex_array.enable, "vertex array 0 is disabled?"); + ASSERT_MSG(!vertex_array.divisor, "vertex array 0 divisor is unimplemented!"); + for (unsigned index = 1; index < Maxwell::NumVertexArrays; ++index) { + ASSERT_MSG(!regs.vertex_array[index].enable, "vertex array %d is unimplemented!", index); + } + + // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. + // Enables the first 16 vertex attributes always, as we don't know which ones are actually used + // until shader time. Note, Tegra technically supports 32, but we're cappinig this to 16 for now + // to avoid OpenGL errors. + for (unsigned index = 0; index < 16; ++index) { + auto& attrib = regs.vertex_attrib_format[index]; + glVertexAttribPointer(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), + attrib.IsNormalized() ? GL_TRUE : GL_FALSE, vertex_array.stride, + reinterpret_cast<GLvoid*>(buffer_offset + attrib.offset)); + glEnableVertexAttribArray(index); + hw_vao_enabled_attributes[index] = true; + } + + // Copy vertex array data + const u32 data_size{vertex_array.stride * regs.vertex_buffer.count}; + const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(vertex_array.StartAddress())}; + res_cache.FlushRegion(data_addr, data_size, nullptr); + Memory::ReadBlock(data_addr, array_ptr, data_size); + + array_ptr += data_size; + buffer_offset += data_size; } void RasterizerOpenGL::SetupVertexShader(VSUniformData* ub_ptr, GLintptr buffer_offset) { MICROPROFILE_SCOPE(OpenGL_VS); - UNIMPLEMENTED(); + LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader."); + glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_shader->shader.handle); } void RasterizerOpenGL::SetupFragmentShader(FSUniformData* ub_ptr, GLintptr buffer_offset) { MICROPROFILE_SCOPE(OpenGL_FS); - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); } bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { if (!has_ARB_separate_shader_objects) { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); return false; } accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; - DrawTriangles(); + DrawArrays(); return true; } -void RasterizerOpenGL::DrawTriangles() { +void RasterizerOpenGL::DrawArrays() { + if (accelerate_draw == AccelDraw::Disabled) + return; + MICROPROFILE_SCOPE(OpenGL_Drawing); - UNIMPLEMENTED(); + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + + // TODO(bunnei): Implement these + const bool has_stencil = false; + const bool using_color_fb = true; + const bool using_depth_fb = false; + const MathUtil::Rectangle<s32> viewport_rect{regs.viewport[0].GetRect()}; + + const bool write_color_fb = + state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE || + state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE; + + const bool write_depth_fb = + (state.depth.test_enabled && state.depth.write_mask == GL_TRUE) || + (has_stencil && state.stencil.test_enabled && state.stencil.write_mask != 0); + + Surface color_surface; + Surface depth_surface; + MathUtil::Rectangle<u32> surfaces_rect; + std::tie(color_surface, depth_surface, surfaces_rect) = + res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect); + + const u16 res_scale = color_surface != nullptr + ? color_surface->res_scale + : (depth_surface == nullptr ? 1u : depth_surface->res_scale); + + MathUtil::Rectangle<u32> draw_rect{ + static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.left) + + viewport_rect.left * res_scale, + surfaces_rect.left, surfaces_rect.right)), // Left + static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + + viewport_rect.top * res_scale, + surfaces_rect.bottom, surfaces_rect.top)), // Top + static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.left) + + viewport_rect.right * res_scale, + surfaces_rect.left, surfaces_rect.right)), // Right + static_cast<u32>(MathUtil::Clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + + viewport_rect.bottom * res_scale, + surfaces_rect.bottom, surfaces_rect.top))}; // Bottom + + // Bind the framebuffer surfaces + BindFramebufferSurfaces(color_surface, depth_surface, has_stencil); + + // Sync the viewport + SyncViewport(surfaces_rect, res_scale); + + // TODO(bunnei): Sync framebuffer_scale uniform here + // TODO(bunnei): Sync scissorbox uniform(s) here + // TODO(bunnei): Sync and bind the texture surfaces + + // Sync and bind the shader + if (shader_dirty) { + SetShader(); + shader_dirty = false; + } + + // Sync the uniform data + if (uniform_block_data.dirty) { + glBufferSubData(GL_UNIFORM_BUFFER, 0, sizeof(UniformData), &uniform_block_data.data); + uniform_block_data.dirty = false; + } + + // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable + // scissor test to prevent drawing outside of the framebuffer region + state.scissor.enabled = true; + state.scissor.x = draw_rect.left; + state.scissor.y = draw_rect.bottom; + state.scissor.width = draw_rect.GetWidth(); + state.scissor.height = draw_rect.GetHeight(); + state.Apply(); + + // Draw the vertex batch + const bool is_indexed = accelerate_draw == AccelDraw::Indexed; + AnalyzeVertexArray(is_indexed); + state.draw.vertex_buffer = stream_buffer->GetHandle(); + state.Apply(); + + size_t buffer_size = static_cast<size_t>(vs_input_size); + if (is_indexed) { + UNREACHABLE(); + } + buffer_size += sizeof(VSUniformData); + + size_t ptr_pos = 0; + u8* buffer_ptr; + GLintptr buffer_offset; + std::tie(buffer_ptr, buffer_offset) = + stream_buffer->Map(static_cast<GLsizeiptr>(buffer_size), 4); + + SetupVertexArray(buffer_ptr, buffer_offset); + ptr_pos += vs_input_size; + + GLintptr index_buffer_offset = 0; + if (is_indexed) { + UNREACHABLE(); + } + + SetupVertexShader(reinterpret_cast<VSUniformData*>(&buffer_ptr[ptr_pos]), + buffer_offset + static_cast<GLintptr>(ptr_pos)); + const GLintptr vs_ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); + ptr_pos += sizeof(VSUniformData); + + stream_buffer->Unmap(); + + const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { + if (has_ARB_direct_state_access) { + glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size); + } else { + glBindBuffer(GL_COPY_WRITE_BUFFER, handle); + glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size); + } + }; + + copy_buffer(vs_uniform_buffer.handle, vs_ubo_offset, sizeof(VSUniformData)); + + glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_shader->shader.handle); + + if (is_indexed) { + UNREACHABLE(); + } else { + glDrawArrays(MaxwellToGL::PrimitiveTopology(regs.draw.topology), 0, + regs.vertex_buffer.count); + } + + // Disable scissor test + state.scissor.enabled = false; + + accelerate_draw = AccelDraw::Disabled; + + // Unbind textures for potential future use as framebuffer attachments + for (auto& texture_unit : state.texture_units) { + texture_unit.texture_2d = 0; + } + state.Apply(); + + // Mark framebuffer surfaces as dirty + MathUtil::Rectangle<u32> draw_rect_unscaled{ + draw_rect.left / res_scale, draw_rect.top / res_scale, draw_rect.right / res_scale, + draw_rect.bottom / res_scale}; + + if (color_surface != nullptr && write_color_fb) { + auto interval = color_surface->GetSubRectInterval(draw_rect_unscaled); + res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), + color_surface); + } + if (depth_surface != nullptr && write_depth_fb) { + auto interval = depth_surface->GetSubRectInterval(draw_rect_unscaled); + res_cache.InvalidateRegion(boost::icl::first(interval), boost::icl::length(interval), + depth_surface); + } } void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 id) {} @@ -206,17 +399,17 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { bool RasterizerOpenGL::AccelerateDisplayTransfer(const void* config) { MICROPROFILE_SCOPE(OpenGL_Blits); - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); return true; } bool RasterizerOpenGL::AccelerateTextureCopy(const void* config) { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); return true; } bool RasterizerOpenGL::AccelerateFill(const void* config) { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); return true; } @@ -297,14 +490,14 @@ void main() { return; } - LOG_ERROR(HW_GPU, "Emulated shaders are not supported! Using a passthrough shader."); + LOG_CRITICAL(Render_OpenGL, "Emulated shaders are not supported! Using a passthrough shader."); current_shader = &test_shader; if (has_ARB_separate_shader_objects) { test_shader.shader.Create(vertex_shader, nullptr, fragment_shader, {}, true); glActiveShaderProgram(pipeline.handle, test_shader.shader.handle); } else { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); } state.draw.shader_program = test_shader.shader.handle; @@ -316,34 +509,70 @@ void main() { } } +void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface, + const Surface& depth_surface, bool has_stencil) { + state.draw.draw_framebuffer = framebuffer.handle; + state.Apply(); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + color_surface != nullptr ? color_surface->texture.handle : 0, 0); + if (depth_surface != nullptr) { + if (has_stencil) { + // attach both depth and stencil + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + depth_surface->texture.handle, 0); + } else { + // attach depth + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + depth_surface->texture.handle, 0); + // clear stencil attachment + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + } + } else { + // clear both depth and stencil attachment + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + } +} + +void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale) { + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + const MathUtil::Rectangle<s32> viewport_rect{regs.viewport[0].GetRect()}; + + state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left * res_scale; + state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom * res_scale; + state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth() * res_scale); + state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight() * res_scale); +} + void RasterizerOpenGL::SyncClipEnabled() { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); } void RasterizerOpenGL::SyncClipCoef() { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); } void RasterizerOpenGL::SyncCullMode() { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); } void RasterizerOpenGL::SyncDepthScale() { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); } void RasterizerOpenGL::SyncDepthOffset() { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); } void RasterizerOpenGL::SyncBlendEnabled() { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); } void RasterizerOpenGL::SyncBlendFuncs() { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); } void RasterizerOpenGL::SyncBlendColor() { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index b387f383b..fd53e94cd 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -29,7 +29,7 @@ public: RasterizerOpenGL(); ~RasterizerOpenGL() override; - void DrawTriangles() override; + void DrawArrays() override; void NotifyMaxwellRegisterChanged(u32 id) override; void FlushAll() override; void FlushRegion(VAddr addr, u64 size) override; @@ -87,6 +87,13 @@ public: private: struct SamplerInfo {}; + /// Binds the framebuffer color and depth surface + void BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface, + bool has_stencil); + + /// Syncs the viewport to match the guest state + void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect, u16 res_scale); + /// Syncs the clip enabled status to match the guest state void SyncClipEnabled(); @@ -139,7 +146,7 @@ private: OGLVertexArray hw_vao; std::array<bool, 16> hw_vao_enabled_attributes; - std::array<SamplerInfo, 3> texture_samplers; + std::array<SamplerInfo, 32> texture_samplers; static constexpr size_t VERTEX_BUFFER_SIZE = 128 * 1024 * 1024; std::unique_ptr<OGLStreamBuffer> vertex_buffer; OGLBuffer uniform_buffer; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 78fa7c051..2ffbd3bab 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -21,10 +21,13 @@ #include "common/microprofile.h" #include "common/scope_exit.h" #include "common/vector_math.h" +#include "core/core.h" #include "core/frontend/emu_window.h" +#include "core/hle/kernel/process.h" #include "core/hle/kernel/vm_manager.h" #include "core/memory.h" #include "core/settings.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/utils.h" @@ -110,65 +113,26 @@ static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) { template <bool morton_to_gl, PixelFormat format> static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, VAddr base, VAddr start, VAddr end) { constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; - constexpr u32 tile_size = bytes_per_pixel * 64; - constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); - static_assert(gl_bytes_per_pixel >= bytes_per_pixel, ""); - gl_buffer += gl_bytes_per_pixel - bytes_per_pixel; - - const VAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size); - const VAddr aligned_start = base + Common::AlignUp(start - base, tile_size); - const VAddr aligned_end = base + Common::AlignDown(end - base, tile_size); - - ASSERT(!morton_to_gl || (aligned_start == start && aligned_end == end)); - - const u64 begin_pixel_index = (aligned_down_start - base) / bytes_per_pixel; - u32 x = static_cast<u32>((begin_pixel_index % (stride * 8)) / 8); - u32 y = static_cast<u32>((begin_pixel_index / (stride * 8)) * 8); - - gl_buffer += ((height - 8 - y) * stride + x) * gl_bytes_per_pixel; - - auto glbuf_next_tile = [&] { - x = (x + 8) % stride; - gl_buffer += 8 * gl_bytes_per_pixel; - if (!x) { - y += 8; - gl_buffer -= stride * 9 * gl_bytes_per_pixel; - } - }; - - u8* tile_buffer = Memory::GetPointer(start); - - if (start < aligned_start && !morton_to_gl) { - std::array<u8, tile_size> tmp_buf; - MortonCopyTile<morton_to_gl, format>(stride, &tmp_buf[0], gl_buffer); - std::memcpy(tile_buffer, &tmp_buf[start - aligned_down_start], - std::min(aligned_start, end) - start); - - tile_buffer += aligned_start - start; - glbuf_next_tile(); - } - - const u8* const buffer_end = tile_buffer + aligned_end - aligned_start; - while (tile_buffer < buffer_end) { - MortonCopyTile<morton_to_gl, format>(stride, tile_buffer, gl_buffer); - tile_buffer += tile_size; - glbuf_next_tile(); - } - if (end > std::max(aligned_start, aligned_end) && !morton_to_gl) { - std::array<u8, tile_size> tmp_buf; - MortonCopyTile<morton_to_gl, format>(stride, &tmp_buf[0], gl_buffer); - std::memcpy(tile_buffer, &tmp_buf[0], end - aligned_end); - } + // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the + // configuration for this and perform more generic un/swizzle + LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); + VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel, + Memory::GetPointer(base), gl_buffer, morton_to_gl); } static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 18> morton_to_gl_fns = { - MortonCopy<true, PixelFormat::RGBA8>, // 0 - MortonCopy<true, PixelFormat::RGB8>, // 1 - MortonCopy<true, PixelFormat::RGB5A1>, // 2 - MortonCopy<true, PixelFormat::RGB565>, // 3 - MortonCopy<true, PixelFormat::RGBA4>, // 4 + MortonCopy<true, PixelFormat::RGBA8>, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, nullptr, nullptr, nullptr, @@ -177,19 +141,19 @@ static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 18> mo nullptr, nullptr, nullptr, - nullptr, // 5 - 13 - MortonCopy<true, PixelFormat::D16>, // 14 - nullptr, // 15 - MortonCopy<true, PixelFormat::D24>, // 16 - MortonCopy<true, PixelFormat::D24S8> // 17 }; static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 18> gl_to_morton_fns = { - MortonCopy<false, PixelFormat::RGBA8>, // 0 - MortonCopy<false, PixelFormat::RGB8>, // 1 - MortonCopy<false, PixelFormat::RGB5A1>, // 2 - MortonCopy<false, PixelFormat::RGB565>, // 3 - MortonCopy<false, PixelFormat::RGBA4>, // 4 + MortonCopy<false, PixelFormat::RGBA8>, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, nullptr, nullptr, nullptr, @@ -198,11 +162,6 @@ static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 18> gl nullptr, nullptr, nullptr, - nullptr, // 5 - 13 - MortonCopy<false, PixelFormat::D16>, // 14 - nullptr, // 15 - MortonCopy<false, PixelFormat::D24>, // 16 - MortonCopy<false, PixelFormat::D24S8> // 17 }; // Allocate an uninitialized texture of appropriate size and format for the surface @@ -291,8 +250,8 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rec static bool FillSurface(const Surface& surface, const u8* fill_data, const MathUtil::Rectangle<u32>& fill_rect, GLuint draw_fb_handle) { - ASSERT_MSG(false, "Unimplemented"); - return true; + UNREACHABLE(); + return {}; } SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { @@ -531,7 +490,7 @@ MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64 void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) { ASSERT(type != SurfaceType::Fill); - u8* texture_src_data = Memory::GetPointer(addr); + u8* const texture_src_data = Memory::GetPointer(addr); if (texture_src_data == nullptr) return; @@ -548,11 +507,16 @@ void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) { if (!is_tiled) { ASSERT(type == SurfaceType::Color); const u32 bytes_per_pixel{GetFormatBpp() >> 3}; + + // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check + // the configuration for this and perform more generic un/swizzle + LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); VideoCore::MortonCopyPixels128(width, height, bytes_per_pixel, 4, texture_src_data + start_offset, &gl_buffer[start_offset], true); } else { - ASSERT_MSG(false, "Unimplemented"); + morton_to_gl_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr, + load_start, load_end); } } @@ -1093,18 +1057,106 @@ SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& } Surface RasterizerCacheOpenGL::GetTextureSurface(const void* config) { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); return {}; } SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( - bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport_rect) { - UNIMPLEMENTED(); - return {}; + bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport) { + const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; + const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; + const auto& config = regs.rt[0]; + + // TODO(bunnei): This is hard corded to use just the first render buffer + LOG_WARNING(Render_OpenGL, "hard-coded for render target 0!"); + + // update resolution_scale_factor and reset cache if changed + // TODO (bunnei): This code was ported as-is from Citra, and is technically not thread-safe. We + // need to fix this before making the renderer multi-threaded. + static u16 resolution_scale_factor = GetResolutionScaleFactor(); + if (resolution_scale_factor != GetResolutionScaleFactor()) { + resolution_scale_factor = GetResolutionScaleFactor(); + FlushAll(); + while (!surface_cache.empty()) + UnregisterSurface(*surface_cache.begin()->second.begin()); + } + + MathUtil::Rectangle<u32> viewport_clamped{ + static_cast<u32>(MathUtil::Clamp(viewport.left, 0, static_cast<s32>(config.width))), + static_cast<u32>(MathUtil::Clamp(viewport.top, 0, static_cast<s32>(config.height))), + static_cast<u32>(MathUtil::Clamp(viewport.right, 0, static_cast<s32>(config.width))), + static_cast<u32>(MathUtil::Clamp(viewport.bottom, 0, static_cast<s32>(config.height)))}; + + // get color and depth surfaces + SurfaceParams color_params; + color_params.is_tiled = true; + color_params.res_scale = resolution_scale_factor; + color_params.width = config.width; + color_params.height = config.height; + SurfaceParams depth_params = color_params; + + color_params.addr = memory_manager->PhysicalToVirtualAddress(config.Address()); + color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format); + color_params.UpdateParams(); + + ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented"); + // depth_params.addr = config.GetDepthBufferPhysicalAddress(); + // depth_params.pixel_format = SurfaceParams::PixelFormatFromDepthFormat(config.depth_format); + // depth_params.UpdateParams(); + + auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); + auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); + + // Make sure that framebuffers don't overlap if both color and depth are being used + if (using_color_fb && using_depth_fb && + boost::icl::length(color_vp_interval & depth_vp_interval)) { + LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " + "overlapping framebuffers not supported!"); + using_depth_fb = false; + } + + MathUtil::Rectangle<u32> color_rect{}; + Surface color_surface = nullptr; + if (using_color_fb) + std::tie(color_surface, color_rect) = + GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); + + MathUtil::Rectangle<u32> depth_rect{}; + Surface depth_surface = nullptr; + if (using_depth_fb) + std::tie(depth_surface, depth_rect) = + GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); + + MathUtil::Rectangle<u32> fb_rect{}; + if (color_surface != nullptr && depth_surface != nullptr) { + fb_rect = color_rect; + // Color and Depth surfaces must have the same dimensions and offsets + if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top || + color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) { + color_surface = GetSurface(color_params, ScaleMatch::Exact, false); + depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); + fb_rect = color_surface->GetScaledRect(); + } + } else if (color_surface != nullptr) { + fb_rect = color_rect; + } else if (depth_surface != nullptr) { + fb_rect = depth_rect; + } + + if (color_surface != nullptr) { + ValidateSurface(color_surface, boost::icl::first(color_vp_interval), + boost::icl::length(color_vp_interval)); + } + if (depth_surface != nullptr) { + ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval), + boost::icl::length(depth_vp_interval)); + } + + return std::make_tuple(color_surface, depth_surface, fb_rect); } Surface RasterizerCacheOpenGL::GetFillSurface(const void* config) { - ASSERT_MSG(false, "Unimplemented"); + UNREACHABLE(); return {}; } @@ -1348,5 +1400,33 @@ void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) { } void RasterizerCacheOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { - // ASSERT_MSG(false, "Unimplemented"); + const u64 num_pages = + ((addr + size - 1) >> Memory::PAGE_BITS) - (addr >> Memory::PAGE_BITS) + 1; + const u64 page_start = addr >> Memory::PAGE_BITS; + const u64 page_end = page_start + num_pages; + + // Interval maps will erase segments if count reaches 0, so if delta is negative we have to + // subtract after iterating + const auto pages_interval = PageMap::interval_type::right_open(page_start, page_end); + if (delta > 0) + cached_pages.add({pages_interval, delta}); + + for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { + const auto interval = pair.first & pages_interval; + const int count = pair.second; + + const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; + const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; + const u64 interval_size = interval_end_addr - interval_start_addr; + + if (delta > 0 && count == delta) + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true); + else if (delta < 0 && count == -delta) + Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false); + else + ASSERT(count >= 0); + } + + if (delta < 0) + cached_pages.add({pages_interval, delta}); } diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 14f3cdc38..1f660d30c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -41,7 +41,7 @@ static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval using SurfaceRect_Tuple = std::tuple<Surface, MathUtil::Rectangle<u32>>; using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, MathUtil::Rectangle<u32>>; -using PageMap = boost::icl::interval_map<u32, int>; +using PageMap = boost::icl::interval_map<u64, int>; enum class ScaleMatch { Exact, // only accept same res scale @@ -116,6 +116,15 @@ struct SurfaceParams { return GetFormatBpp(pixel_format); } + static PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) { + switch (format) { + case Tegra::RenderTargetFormat::RGBA8_UNORM: + return PixelFormat::RGBA8; + default: + UNREACHABLE(); + } + } + static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) { switch (format) { case Tegra::FramebufferConfig::PixelFormat::ABGR8: @@ -308,7 +317,7 @@ public: /// Get the color and depth surfaces based on the framebuffer configuration SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, - const MathUtil::Rectangle<s32>& viewport_rect); + const MathUtil::Rectangle<s32>& viewport); /// Get a surface that matches the fill config Surface GetFillSurface(const void* config); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 0e0ef18cc..564ea8f9e 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -26,7 +26,7 @@ public: sanitize_mul(sanitize_mul), emit_cb(emit_cb), setemit_cb(setemit_cb) {} std::string Decompile() { - UNIMPLEMENTED(); + UNREACHABLE(); return {}; } diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index f242bce1d..8f3c98800 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -8,12 +8,12 @@ namespace GLShader { std::string GenerateVertexShader(const MaxwellVSConfig& config) { - UNIMPLEMENTED(); + UNREACHABLE(); return {}; } std::string GenerateFragmentShader(const MaxwellFSConfig& config) { - UNIMPLEMENTED(); + UNREACHABLE(); return {}; } diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index a3ba16761..a6c6204d5 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -38,8 +38,8 @@ GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, if (result == GL_TRUE) { LOG_DEBUG(Render_OpenGL, "%s", &vertex_shader_error[0]); } else { - LOG_ERROR(Render_OpenGL, "Error compiling vertex shader:\n%s", - &vertex_shader_error[0]); + LOG_CRITICAL(Render_OpenGL, "Error compiling vertex shader:\n%s", + &vertex_shader_error[0]); } } } @@ -62,8 +62,8 @@ GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, if (result == GL_TRUE) { LOG_DEBUG(Render_OpenGL, "%s", &geometry_shader_error[0]); } else { - LOG_ERROR(Render_OpenGL, "Error compiling geometry shader:\n%s", - &geometry_shader_error[0]); + LOG_CRITICAL(Render_OpenGL, "Error compiling geometry shader:\n%s", + &geometry_shader_error[0]); } } } @@ -86,8 +86,8 @@ GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, if (result == GL_TRUE) { LOG_DEBUG(Render_OpenGL, "%s", &fragment_shader_error[0]); } else { - LOG_ERROR(Render_OpenGL, "Error compiling fragment shader:\n%s", - &fragment_shader_error[0]); + LOG_CRITICAL(Render_OpenGL, "Error compiling fragment shader:\n%s", + &fragment_shader_error[0]); } } } @@ -128,20 +128,20 @@ GLuint LoadProgram(const char* vertex_shader, const char* geometry_shader, if (result == GL_TRUE) { LOG_DEBUG(Render_OpenGL, "%s", &program_error[0]); } else { - LOG_ERROR(Render_OpenGL, "Error linking shader:\n%s", &program_error[0]); + LOG_CRITICAL(Render_OpenGL, "Error linking shader:\n%s", &program_error[0]); } } // If the program linking failed at least one of the shaders was probably bad if (result == GL_FALSE) { if (vertex_shader) { - LOG_ERROR(Render_OpenGL, "Vertex shader:\n%s", vertex_shader); + LOG_CRITICAL(Render_OpenGL, "Vertex shader:\n%s", vertex_shader); } if (geometry_shader) { - LOG_ERROR(Render_OpenGL, "Geometry shader:\n%s", geometry_shader); + LOG_CRITICAL(Render_OpenGL, "Geometry shader:\n%s", geometry_shader); } if (fragment_shader) { - LOG_ERROR(Render_OpenGL, "Fragment shader:\n%s", fragment_shader); + LOG_CRITICAL(Render_OpenGL, "Fragment shader:\n%s", fragment_shader); } } ASSERT_MSG(result == GL_TRUE, "Shader not linked"); diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 940575dfa..c1f4efc8c 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -85,7 +85,7 @@ public: struct { GLuint texture_2d; // GL_TEXTURE_BINDING_2D GLuint sampler; // GL_SAMPLER_BINDING - } texture_units[3]; + } texture_units[32]; struct { GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h new file mode 100644 index 000000000..d847317ac --- /dev/null +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -0,0 +1,50 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <glad/glad.h> +#include "common/common_types.h" +#include "common/logging/log.h" +#include "video_core/engines/maxwell_3d.h" + +namespace MaxwellToGL { + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +inline GLenum VertexType(Maxwell::VertexAttribute attrib) { + switch (attrib.type) { + case Maxwell::VertexAttribute::Type::UnsignedNorm: { + + switch (attrib.size) { + case Maxwell::VertexAttribute::Size::Size_8_8_8_8: + return GL_UNSIGNED_BYTE; + } + + LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size=%s", attrib.SizeString().c_str()); + UNREACHABLE(); + return {}; + } + + case Maxwell::VertexAttribute::Type::Float: + return GL_FLOAT; + } + + LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type=%s", attrib.TypeString().c_str()); + UNREACHABLE(); + return {}; +} + +inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { + switch (topology) { + case Maxwell::PrimitiveTopology::TriangleStrip: + return GL_TRIANGLE_STRIP; + } + LOG_CRITICAL(Render_OpenGL, "Unimplemented primitive topology=%d", topology); + UNREACHABLE(); + return {}; +} + +} // namespace MaxwellToGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 1a24855d7..78b50b227 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -100,6 +100,8 @@ RendererOpenGL::~RendererOpenGL() = default; /// Swap buffers (render frame) void RendererOpenGL::SwapBuffers(boost::optional<const Tegra::FramebufferConfig&> framebuffer) { + Core::System::GetInstance().perf_stats.EndSystemFrame(); + // Maintain the rasterizer's state as a priority OpenGLState prev_state = OpenGLState::GetCurState(); state.Apply(); @@ -114,20 +116,19 @@ void RendererOpenGL::SwapBuffers(boost::optional<const Tegra::FramebufferConfig& // performance problem. ConfigureFramebufferTexture(screen_info.texture, *framebuffer); } + + // Load the framebuffer from memory, draw it to the screen, and swap buffers LoadFBToScreenInfo(*framebuffer, screen_info); + DrawScreen(); + render_window->SwapBuffers(); } - DrawScreens(); - - Core::System::GetInstance().perf_stats.EndSystemFrame(); - - // Swap buffers render_window->PollEvents(); - render_window->SwapBuffers(); Core::System::GetInstance().frame_limiter.DoFrameLimiting(CoreTiming::GetGlobalTimeUs()); Core::System::GetInstance().perf_stats.BeginSystemFrame(); + // Restore the rasterizer state prev_state.Apply(); RefreshRasterizerSetting(); } @@ -141,11 +142,6 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel}; const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; - // TODO(bunnei): The framebuffer region should only be invalidated if it is written to, not - // every frame. When we find the right place for this, the below line can be removed. - Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes, - Memory::FlushMode::Invalidate); - // Framebuffer orientation handling framebuffer_transform_flags = framebuffer.transform_flags; @@ -283,7 +279,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, gl_framebuffer_data.resize(texture.width * texture.height * 4); break; default: - UNIMPLEMENTED(); + UNREACHABLE(); } state.texture_units[0].texture_2d = texture.resource.handle; @@ -297,8 +293,8 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, state.Apply(); } -void RendererOpenGL::DrawSingleScreen(const ScreenInfo& screen_info, float x, float y, float w, - float h) { +void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, + float h) { const auto& texcoords = screen_info.display_texcoords; auto left = texcoords.left; auto right = texcoords.right; @@ -309,7 +305,7 @@ void RendererOpenGL::DrawSingleScreen(const ScreenInfo& screen_info, float x, fl right = texcoords.left; } else { // Other transformations are unsupported - LOG_CRITICAL(HW_GPU, "unsupported framebuffer_transform_flags=%d", + LOG_CRITICAL(Render_OpenGL, "Unsupported framebuffer_transform_flags=%d", framebuffer_transform_flags); UNIMPLEMENTED(); } @@ -334,7 +330,7 @@ void RendererOpenGL::DrawSingleScreen(const ScreenInfo& screen_info, float x, fl /** * Draws the emulated screens to the emulator window. */ -void RendererOpenGL::DrawScreens() { +void RendererOpenGL::DrawScreen() { const auto& layout = render_window->GetFramebufferLayout(); const auto& screen = layout.screen; @@ -350,8 +346,8 @@ void RendererOpenGL::DrawScreens() { glActiveTexture(GL_TEXTURE0); glUniform1i(uniform_color_texture, 0); - DrawSingleScreen(screen_info, (float)screen.left, (float)screen.top, (float)screen.GetWidth(), - (float)screen.GetHeight()); + DrawScreenTriangles(screen_info, (float)screen.left, (float)screen.top, + (float)screen.GetWidth(), (float)screen.GetHeight()); m_current_frame++; } diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 29516baf4..fffd0f9f4 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -55,8 +55,8 @@ private: void InitOpenGLObjects(); void ConfigureFramebufferTexture(TextureInfo& texture, const Tegra::FramebufferConfig& framebuffer); - void DrawScreens(); - void DrawSingleScreen(const ScreenInfo& screen_info, float x, float y, float w, float h); + void DrawScreen(); + void DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, float h); void UpdateFramerate(); // Loads framebuffer from emulated memory into the display information structure diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index d969bcdd9..07936f8a3 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -17,11 +17,32 @@ enum class TextureFormat : u32 { DXT1 = 0x24, }; +enum class TextureType : u32 { + Texture1D = 0, + Texture2D = 1, + Texture3D = 2, + TextureCubemap = 3, + Texture1DArray = 4, + Texture2DArray = 5, + Texture1DBuffer = 6, + Texture2DNoMipmap = 7, + TextureCubeArray = 8, +}; + +enum class TICHeaderVersion : u32 { + OneDBuffer = 0, + PitchColorKey = 1, + Pitch = 2, + BlockLinear = 3, + BlockLinearColorKey = 4, +}; + union TextureHandle { u32 raw; BitField<0, 20, u32> tic_id; BitField<20, 12, u32> tsc_id; }; +static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); struct TICEntry { union { @@ -33,10 +54,15 @@ struct TICEntry { BitField<16, 3, u32> a_type; }; u32 address_low; - u16 address_high; - INSERT_PADDING_BYTES(6); - u16 width_minus_1; - INSERT_PADDING_BYTES(2); + union { + BitField<0, 16, u32> address_high; + BitField<21, 3, TICHeaderVersion> header_version; + }; + INSERT_PADDING_BYTES(4); + union { + BitField<0, 16, u32> width_minus_1; + BitField<23, 4, TextureType> texture_type; + }; u16 height_minus_1; INSERT_PADDING_BYTES(10); @@ -54,6 +80,56 @@ struct TICEntry { }; static_assert(sizeof(TICEntry) == 0x20, "TICEntry has wrong size"); +enum class WrapMode : u32 { + Wrap = 0, + Mirror = 1, + ClampToEdge = 2, + Border = 3, + ClampOGL = 4, + MirrorOnceClampToEdge = 5, + MirrorOnceBorder = 6, + MirrorOnceClampOGL = 7, +}; + +enum class TextureFilter : u32 { + Nearest = 1, + Linear = 2, +}; + +enum class TextureMipmapFilter : u32 { + None = 1, + Nearest = 2, + Linear = 3, +}; + +struct TSCEntry { + union { + BitField<0, 3, WrapMode> wrap_u; + BitField<3, 3, WrapMode> wrap_v; + BitField<6, 3, WrapMode> wrap_p; + BitField<9, 1, u32> depth_compare_enabled; + BitField<10, 3, u32> depth_compare_func; + }; + union { + BitField<0, 2, TextureFilter> mag_filter; + BitField<4, 2, TextureFilter> min_filter; + BitField<6, 2, TextureMipmapFilter> mip_filter; + }; + INSERT_PADDING_BYTES(8); + u32 border_color_r; + u32 border_color_g; + u32 border_color_b; + u32 border_color_a; +}; +static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); + +struct FullTextureInfo { + u32 index; + TICEntry tic; + TSCEntry tsc; + bool enabled; +}; + /// Returns the number of bytes per pixel of the input texture format. u32 BytesPerPixel(TextureFormat format); diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 864691baa..289140f31 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -26,7 +26,7 @@ bool Init(EmuWindow* emu_window) { if (g_renderer->Init()) { LOG_DEBUG(Render, "initialized OK"); } else { - LOG_ERROR(Render, "initialization failed !"); + LOG_CRITICAL(Render, "initialization failed !"); return false; } return true; diff --git a/src/yuzu/debugger/graphics/graphics_surface.cpp b/src/yuzu/debugger/graphics/graphics_surface.cpp index 8e6509adc..1e4844b57 100644 --- a/src/yuzu/debugger/graphics/graphics_surface.cpp +++ b/src/yuzu/debugger/graphics/graphics_surface.cpp @@ -339,11 +339,10 @@ void GraphicsSurfaceWidget::OnUpdate() { static_cast<size_t>(Source::RenderTarget0)]; surface_address = rt.Address(); - surface_width = rt.horiz; - surface_height = rt.vert; - if (rt.format != 0) { - surface_format = - ConvertToTextureFormat(static_cast<Tegra::RenderTargetFormat>(rt.format)); + surface_width = rt.width; + surface_height = rt.height; + if (rt.format != Tegra::RenderTargetFormat::NONE) { + surface_format = ConvertToTextureFormat(rt.format); } break; |