diff options
Diffstat (limited to 'src/video_core')
68 files changed, 3538 insertions, 1094 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 91df062d7..f7febd6a2 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -148,6 +148,7 @@ add_library(video_core STATIC textures/convert.h textures/decoders.cpp textures/decoders.h + textures/texture.cpp textures/texture.h video_core.cpp video_core.h @@ -210,6 +211,8 @@ if (ENABLE_VULKAN) renderer_vulkan/vk_texture_cache.h renderer_vulkan/vk_update_descriptor.cpp renderer_vulkan/vk_update_descriptor.h + renderer_vulkan/wrapper.cpp + renderer_vulkan/wrapper.h ) target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h index 4b9193182..e35ee0b67 100644 --- a/src/video_core/buffer_cache/buffer_block.h +++ b/src/video_core/buffer_cache/buffer_block.h @@ -15,37 +15,29 @@ namespace VideoCommon { class BufferBlock { public: - bool Overlaps(const CacheAddr start, const CacheAddr end) const { - return (cache_addr < end) && (cache_addr_end > start); + bool Overlaps(const VAddr start, const VAddr end) const { + return (cpu_addr < end) && (cpu_addr_end > start); } - bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { - return cache_addr <= other_start && other_end <= cache_addr_end; + bool IsInside(const VAddr other_start, const VAddr other_end) const { + return cpu_addr <= other_start && other_end <= cpu_addr_end; } - u8* GetWritableHostPtr() const { - return FromCacheAddr(cache_addr); + std::size_t GetOffset(const VAddr in_addr) { + return static_cast<std::size_t>(in_addr - cpu_addr); } - u8* GetWritableHostPtr(std::size_t offset) const { - return FromCacheAddr(cache_addr + offset); + VAddr GetCpuAddr() const { + return cpu_addr; } - std::size_t GetOffset(const CacheAddr in_addr) { - return static_cast<std::size_t>(in_addr - cache_addr); + VAddr GetCpuAddrEnd() const { + return cpu_addr_end; } - CacheAddr GetCacheAddr() const { - return cache_addr; - } - - CacheAddr GetCacheAddrEnd() const { - return cache_addr_end; - } - - void SetCacheAddr(const CacheAddr new_addr) { - cache_addr = new_addr; - cache_addr_end = new_addr + size; + void SetCpuAddr(const VAddr new_addr) { + cpu_addr = new_addr; + cpu_addr_end = new_addr + size; } std::size_t GetSize() const { @@ -61,14 +53,14 @@ public: } protected: - explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} { - SetCacheAddr(cache_addr); + explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} { + SetCpuAddr(cpu_addr); } ~BufferBlock() = default; private: - CacheAddr cache_addr{}; - CacheAddr cache_addr_end{}; + VAddr cpu_addr{}; + VAddr cpu_addr_end{}; std::size_t size{}; u64 epoch{}; }; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 186aca61d..b57c0d4d4 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -19,6 +19,7 @@ #include "common/alignment.h" #include "common/common_types.h" #include "core/core.h" +#include "core/memory.h" #include "video_core/buffer_cache/buffer_block.h" #include "video_core/buffer_cache/map_interval.h" #include "video_core/memory_manager.h" @@ -37,28 +38,45 @@ public: bool is_written = false, bool use_fast_cbuf = false) { std::lock_guard lock{mutex}; - auto& memory_manager = system.GPU().MemoryManager(); - const auto host_ptr = memory_manager.GetPointer(gpu_addr); - if (!host_ptr) { + const std::optional<VAddr> cpu_addr_opt = + system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + + if (!cpu_addr_opt) { return {GetEmptyBuffer(size), 0}; } - const auto cache_addr = ToCacheAddr(host_ptr); + + VAddr cpu_addr = *cpu_addr_opt; // Cache management is a big overhead, so only cache entries with a given size. // TODO: Figure out which size is the best for given games. constexpr std::size_t max_stream_size = 0x800; if (use_fast_cbuf || size < max_stream_size) { - if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) { + if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { + auto& memory_manager = system.GPU().MemoryManager(); if (use_fast_cbuf) { - return ConstBufferUpload(host_ptr, size); + if (memory_manager.IsGranularRange(gpu_addr, size)) { + const auto host_ptr = memory_manager.GetPointer(gpu_addr); + return ConstBufferUpload(host_ptr, size); + } else { + staging_buffer.resize(size); + memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); + return ConstBufferUpload(staging_buffer.data(), size); + } } else { - return StreamBufferUpload(host_ptr, size, alignment); + if (memory_manager.IsGranularRange(gpu_addr, size)) { + const auto host_ptr = memory_manager.GetPointer(gpu_addr); + return StreamBufferUpload(host_ptr, size, alignment); + } else { + staging_buffer.resize(size); + memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); + return StreamBufferUpload(staging_buffer.data(), size, alignment); + } } } } - auto block = GetBlock(cache_addr, size); - auto map = MapAddress(block, gpu_addr, cache_addr, size); + auto block = GetBlock(cpu_addr, size); + auto map = MapAddress(block, gpu_addr, cpu_addr, size); if (is_written) { map->MarkAsModified(true, GetModifiedTicks()); if (!map->IsWritten()) { @@ -71,7 +89,7 @@ public: } } - const u64 offset = static_cast<u64>(block->GetOffset(cache_addr)); + const u64 offset = static_cast<u64>(block->GetOffset(cpu_addr)); return {ToHandle(block), offset}; } @@ -112,7 +130,7 @@ public: } /// Write any cached resources overlapping the specified region back to memory - void FlushRegion(CacheAddr addr, std::size_t size) { + void FlushRegion(VAddr addr, std::size_t size) { std::lock_guard lock{mutex}; std::vector<MapInterval> objects = GetMapsInRange(addr, size); @@ -127,7 +145,7 @@ public: } /// Mark the specified region as being invalidated - void InvalidateRegion(CacheAddr addr, u64 size) { + void InvalidateRegion(VAddr addr, u64 size) { std::lock_guard lock{mutex}; std::vector<MapInterval> objects = GetMapsInRange(addr, size); @@ -152,7 +170,7 @@ protected: virtual void WriteBarrier() = 0; - virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0; + virtual TBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0; virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, const u8* data) = 0; @@ -169,20 +187,17 @@ protected: /// Register an object into the cache void Register(const MapInterval& new_map, bool inherit_written = false) { - const CacheAddr cache_ptr = new_map->GetStart(); - const std::optional<VAddr> cpu_addr = - system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress()); - if (!cache_ptr || !cpu_addr) { + const VAddr cpu_addr = new_map->GetStart(); + if (!cpu_addr) { LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", new_map->GetGpuAddress()); return; } const std::size_t size = new_map->GetEnd() - new_map->GetStart(); - new_map->SetCpuAddress(*cpu_addr); new_map->MarkAsRegistered(true); const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; mapped_addresses.insert({interval, new_map}); - rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); + rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); if (inherit_written) { MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); new_map->MarkAsWritten(true); @@ -192,7 +207,7 @@ protected: /// Unregisters an object from the cache void Unregister(MapInterval& map) { const std::size_t size = map->GetEnd() - map->GetStart(); - rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1); + rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1); map->MarkAsRegistered(false); if (map->IsWritten()) { UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); @@ -202,32 +217,39 @@ protected: } private: - MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) { + MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) { return std::make_shared<MapIntervalBase>(start, end, gpu_addr); } - MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, - const CacheAddr cache_addr, const std::size_t size) { + MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr, + const std::size_t size) { - std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size); + std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size); if (overlaps.empty()) { - const CacheAddr cache_addr_end = cache_addr + size; - MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr); - u8* host_ptr = FromCacheAddr(cache_addr); - UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr); + auto& memory_manager = system.GPU().MemoryManager(); + const VAddr cpu_addr_end = cpu_addr + size; + MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr); + if (memory_manager.IsGranularRange(gpu_addr, size)) { + u8* host_ptr = memory_manager.GetPointer(gpu_addr); + UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr); + } else { + staging_buffer.resize(size); + memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); + UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data()); + } Register(new_map); return new_map; } - const CacheAddr cache_addr_end = cache_addr + size; + const VAddr cpu_addr_end = cpu_addr + size; if (overlaps.size() == 1) { MapInterval& current_map = overlaps[0]; - if (current_map->IsInside(cache_addr, cache_addr_end)) { + if (current_map->IsInside(cpu_addr, cpu_addr_end)) { return current_map; } } - CacheAddr new_start = cache_addr; - CacheAddr new_end = cache_addr_end; + VAddr new_start = cpu_addr; + VAddr new_end = cpu_addr_end; bool write_inheritance = false; bool modified_inheritance = false; // Calculate new buffer parameters @@ -237,7 +259,7 @@ private: write_inheritance |= overlap->IsWritten(); modified_inheritance |= overlap->IsModified(); } - GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr; + GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr; for (auto& overlap : overlaps) { Unregister(overlap); } @@ -250,7 +272,7 @@ private: return new_map; } - void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end, + void UpdateBlock(const TBuffer& block, VAddr start, VAddr end, std::vector<MapInterval>& overlaps) { const IntervalType base_interval{start, end}; IntervalSet interval_set{}; @@ -262,13 +284,15 @@ private: for (auto& interval : interval_set) { std::size_t size = interval.upper() - interval.lower(); if (size > 0) { - u8* host_ptr = FromCacheAddr(interval.lower()); - UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr); + staging_buffer.resize(size); + system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); + UploadBlockData(block, block->GetOffset(interval.lower()), size, + staging_buffer.data()); } } } - std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) { + std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) { if (size == 0) { return {}; } @@ -290,8 +314,9 @@ private: void FlushMap(MapInterval map) { std::size_t size = map->GetEnd() - map->GetStart(); TBuffer block = blocks[map->GetStart() >> block_page_bits]; - u8* host_ptr = FromCacheAddr(map->GetStart()); - DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr); + staging_buffer.resize(size); + DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data()); + system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size); map->MarkAsModified(false, 0); } @@ -316,14 +341,14 @@ private: TBuffer EnlargeBlock(TBuffer buffer) { const std::size_t old_size = buffer->GetSize(); const std::size_t new_size = old_size + block_page_size; - const CacheAddr cache_addr = buffer->GetCacheAddr(); - TBuffer new_buffer = CreateBlock(cache_addr, new_size); + const VAddr cpu_addr = buffer->GetCpuAddr(); + TBuffer new_buffer = CreateBlock(cpu_addr, new_size); CopyBlock(buffer, new_buffer, 0, 0, old_size); buffer->SetEpoch(epoch); pending_destruction.push_back(buffer); - const CacheAddr cache_addr_end = cache_addr + new_size - 1; - u64 page_start = cache_addr >> block_page_bits; - const u64 page_end = cache_addr_end >> block_page_bits; + const VAddr cpu_addr_end = cpu_addr + new_size - 1; + u64 page_start = cpu_addr >> block_page_bits; + const u64 page_end = cpu_addr_end >> block_page_bits; while (page_start <= page_end) { blocks[page_start] = new_buffer; ++page_start; @@ -334,9 +359,9 @@ private: TBuffer MergeBlocks(TBuffer first, TBuffer second) { const std::size_t size_1 = first->GetSize(); const std::size_t size_2 = second->GetSize(); - const CacheAddr first_addr = first->GetCacheAddr(); - const CacheAddr second_addr = second->GetCacheAddr(); - const CacheAddr new_addr = std::min(first_addr, second_addr); + const VAddr first_addr = first->GetCpuAddr(); + const VAddr second_addr = second->GetCpuAddr(); + const VAddr new_addr = std::min(first_addr, second_addr); const std::size_t new_size = size_1 + size_2; TBuffer new_buffer = CreateBlock(new_addr, new_size); CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); @@ -345,9 +370,9 @@ private: second->SetEpoch(epoch); pending_destruction.push_back(first); pending_destruction.push_back(second); - const CacheAddr cache_addr_end = new_addr + new_size - 1; + const VAddr cpu_addr_end = new_addr + new_size - 1; u64 page_start = new_addr >> block_page_bits; - const u64 page_end = cache_addr_end >> block_page_bits; + const u64 page_end = cpu_addr_end >> block_page_bits; while (page_start <= page_end) { blocks[page_start] = new_buffer; ++page_start; @@ -355,18 +380,18 @@ private: return new_buffer; } - TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) { + TBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) { TBuffer found{}; - const CacheAddr cache_addr_end = cache_addr + size - 1; - u64 page_start = cache_addr >> block_page_bits; - const u64 page_end = cache_addr_end >> block_page_bits; + const VAddr cpu_addr_end = cpu_addr + size - 1; + u64 page_start = cpu_addr >> block_page_bits; + const u64 page_end = cpu_addr_end >> block_page_bits; while (page_start <= page_end) { auto it = blocks.find(page_start); if (it == blocks.end()) { if (found) { found = EnlargeBlock(found); } else { - const CacheAddr start_addr = (page_start << block_page_bits); + const VAddr start_addr = (page_start << block_page_bits); found = CreateBlock(start_addr, block_page_size); blocks[page_start] = found; } @@ -386,7 +411,7 @@ private: return found; } - void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { + void MarkRegionAsWritten(const VAddr start, const VAddr end) { u64 page_start = start >> write_page_bit; const u64 page_end = end >> write_page_bit; while (page_start <= page_end) { @@ -400,7 +425,7 @@ private: } } - void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { + void UnmarkRegionAsWritten(const VAddr start, const VAddr end) { u64 page_start = start >> write_page_bit; const u64 page_end = end >> write_page_bit; while (page_start <= page_end) { @@ -416,7 +441,7 @@ private: } } - bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const { + bool IsRegionWritten(const VAddr start, const VAddr end) const { u64 page_start = start >> write_page_bit; const u64 page_end = end >> write_page_bit; while (page_start <= page_end) { @@ -440,8 +465,8 @@ private: u64 buffer_offset = 0; u64 buffer_offset_base = 0; - using IntervalSet = boost::icl::interval_set<CacheAddr>; - using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>; + using IntervalSet = boost::icl::interval_set<VAddr>; + using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>; using IntervalType = typename IntervalCache::interval_type; IntervalCache mapped_addresses; @@ -456,6 +481,8 @@ private: u64 epoch = 0; u64 modified_ticks = 0; + std::vector<u8> staging_buffer; + std::recursive_mutex mutex; }; diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h index 3a104d5cd..b0956029d 100644 --- a/src/video_core/buffer_cache/map_interval.h +++ b/src/video_core/buffer_cache/map_interval.h @@ -11,7 +11,7 @@ namespace VideoCommon { class MapIntervalBase { public: - MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) + MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) : start{start}, end{end}, gpu_addr{gpu_addr} {} void SetCpuAddress(VAddr new_cpu_addr) { @@ -26,7 +26,7 @@ public: return gpu_addr; } - bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { + bool IsInside(const VAddr other_start, const VAddr other_end) const { return (start <= other_start && other_end <= end); } @@ -46,11 +46,11 @@ public: return is_registered; } - CacheAddr GetStart() const { + VAddr GetStart() const { return start; } - CacheAddr GetEnd() const { + VAddr GetEnd() const { return end; } @@ -76,8 +76,8 @@ public: } private: - CacheAddr start; - CacheAddr end; + VAddr start; + VAddr end; GPUVAddr gpu_addr; VAddr cpu_addr{}; bool is_written{}; diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index d24c9f657..4637ddabd 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -312,6 +312,35 @@ public: } }; + struct MsaaSampleLocation { + union { + BitField<0, 4, u32> x0; + BitField<4, 4, u32> y0; + BitField<8, 4, u32> x1; + BitField<12, 4, u32> y1; + BitField<16, 4, u32> x2; + BitField<20, 4, u32> y2; + BitField<24, 4, u32> x3; + BitField<28, 4, u32> y3; + }; + + constexpr std::pair<u32, u32> Location(int index) const { + switch (index) { + case 0: + return {x0, y0}; + case 1: + return {x1, y1}; + case 2: + return {x2, y2}; + case 3: + return {x3, y3}; + default: + UNREACHABLE(); + return {0, 0}; + } + } + }; + enum class DepthMode : u32 { MinusOneToOne = 0, ZeroToOne = 1, @@ -793,7 +822,13 @@ public: u32 rt_separate_frag_data; - INSERT_UNION_PADDING_WORDS(0xC); + INSERT_UNION_PADDING_WORDS(0x1); + + u32 multisample_raster_enable; + u32 multisample_raster_samples; + std::array<u32, 4> multisample_sample_mask; + + INSERT_UNION_PADDING_WORDS(0x5); struct { u32 address_high; @@ -830,7 +865,16 @@ public: std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format; - INSERT_UNION_PADDING_WORDS(0xF); + std::array<MsaaSampleLocation, 4> multisample_sample_locations; + + INSERT_UNION_PADDING_WORDS(0x2); + + union { + BitField<0, 1, u32> enable; + BitField<4, 3, u32> target; + } multisample_coverage_to_color; + + INSERT_UNION_PADDING_WORDS(0x8); struct { union { @@ -943,7 +987,7 @@ public: CounterReset counter_reset; - INSERT_UNION_PADDING_WORDS(0x1); + u32 multisample_enable; u32 zeta_enable; @@ -1007,7 +1051,11 @@ public: float polygon_offset_units; - INSERT_UNION_PADDING_WORDS(0x11); + INSERT_UNION_PADDING_WORDS(0x4); + + Tegra::Texture::MsaaMode multisample_mode; + + INSERT_UNION_PADDING_WORDS(0xC); union { BitField<2, 1, u32> coord_origin; @@ -1507,12 +1555,17 @@ ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5); ASSERT_REG_POSITION(stencil_back_mask, 0x3D6); ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7); ASSERT_REG_POSITION(color_mask_common, 0x3E4); -ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); ASSERT_REG_POSITION(depth_bounds, 0x3E7); +ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB); +ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED); +ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE); +ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF); ASSERT_REG_POSITION(zeta, 0x3F8); ASSERT_REG_POSITION(clear_flags, 0x43E); ASSERT_REG_POSITION(fill_rectangle, 0x44F); ASSERT_REG_POSITION(vertex_attrib_format, 0x458); +ASSERT_REG_POSITION(multisample_sample_locations, 0x478); +ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E); ASSERT_REG_POSITION(rt_control, 0x487); ASSERT_REG_POSITION(zeta_width, 0x48a); ASSERT_REG_POSITION(zeta_height, 0x48b); @@ -1545,11 +1598,12 @@ ASSERT_REG_POSITION(samplecnt_enable, 0x545); ASSERT_REG_POSITION(point_size, 0x546); ASSERT_REG_POSITION(point_sprite_enable, 0x548); ASSERT_REG_POSITION(counter_reset, 0x54C); +ASSERT_REG_POSITION(multisample_enable, 0x54D); ASSERT_REG_POSITION(zeta_enable, 0x54E); ASSERT_REG_POSITION(multisample_control, 0x54F); ASSERT_REG_POSITION(condition, 0x554); ASSERT_REG_POSITION(tsc, 0x557); -ASSERT_REG_POSITION(polygon_offset_factor, 0x55b); +ASSERT_REG_POSITION(polygon_offset_factor, 0x55B); ASSERT_REG_POSITION(tic, 0x55D); ASSERT_REG_POSITION(stencil_two_side_enable, 0x565); ASSERT_REG_POSITION(stencil_back_op_fail, 0x566); @@ -1558,6 +1612,7 @@ ASSERT_REG_POSITION(stencil_back_op_zpass, 0x568); ASSERT_REG_POSITION(stencil_back_func_func, 0x569); ASSERT_REG_POSITION(framebuffer_srgb, 0x56E); ASSERT_REG_POSITION(polygon_offset_units, 0x56F); +ASSERT_REG_POSITION(multisample_mode, 0x574); ASSERT_REG_POSITION(point_coord_replace, 0x581); ASSERT_REG_POSITION(code_address, 0x582); ASSERT_REG_POSITION(draw, 0x585); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 7400e1aa9..c66c66f6c 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -231,18 +231,6 @@ enum class AtomicOp : u64 { Or = 6, Xor = 7, Exch = 8, -}; - -enum class GlobalAtomicOp : u64 { - Add = 0, - Min = 1, - Max = 2, - Inc = 3, - Dec = 4, - And = 5, - Or = 6, - Xor = 7, - Exch = 8, SafeAdd = 10, }; @@ -1018,7 +1006,7 @@ union Instruction { } stg; union { - BitField<52, 4, GlobalAtomicOp> operation; + BitField<52, 4, AtomicOp> operation; BitField<49, 3, GlobalAtomicType> type; BitField<28, 20, s64> offset; } atom; @@ -1777,6 +1765,7 @@ public: BRK, DEPBAR, VOTE, + VOTE_VTG, SHFL, FSWZADD, BFE_C, @@ -1823,6 +1812,7 @@ public: IPA, OUT_R, // Emit vertex/primitive ISBERD, + BAR, MEMBAR, VMAD, VSETP, @@ -1908,7 +1898,7 @@ public: MOV_C, MOV_R, MOV_IMM, - MOV_SYS, + S2R, MOV32_IMM, SHL_C, SHL_R, @@ -2092,6 +2082,7 @@ private: INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"), INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), + INST("0101000011100---", Id::VOTE_VTG, Type::Warp, "VOTE_VTG"), INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"), INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), @@ -2129,6 +2120,7 @@ private: INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), INST("1110111111010---", Id::ISBERD, Type::Trivial, "ISBERD"), + INST("1111000010101---", Id::BAR, Type::Trivial, "BAR"), INST("1110111110011---", Id::MEMBAR, Type::Trivial, "MEMBAR"), INST("01011111--------", Id::VMAD, Type::Video, "VMAD"), INST("0101000011110---", Id::VSETP, Type::Video, "VSETP"), @@ -2201,7 +2193,7 @@ private: INST("0100110010011---", Id::MOV_C, Type::Arithmetic, "MOV_C"), INST("0101110010011---", Id::MOV_R, Type::Arithmetic, "MOV_R"), INST("0011100-10011---", Id::MOV_IMM, Type::Arithmetic, "MOV_IMM"), - INST("1111000011001---", Id::MOV_SYS, Type::Trivial, "MOV_SYS"), + INST("1111000011001---", Id::S2R, Type::Trivial, "S2R"), INST("000000010000----", Id::MOV32_IMM, Type::ArithmeticImmediate, "MOV32_IMM"), INST("0100110001100---", Id::FMNMX_C, Type::Arithmetic, "FMNMX_C"), INST("0101110001100---", Id::FMNMX_R, Type::Arithmetic, "FMNMX_R"), diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h index bc80661d8..72e2a33d5 100644 --- a/src/video_core/engines/shader_header.h +++ b/src/video_core/engines/shader_header.h @@ -4,6 +4,9 @@ #pragma once +#include <array> +#include <optional> + #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" @@ -16,7 +19,7 @@ enum class OutputTopology : u32 { TriangleStrip = 7, }; -enum class AttributeUse : u8 { +enum class PixelImap : u8 { Unused = 0, Constant = 1, Perspective = 2, @@ -24,7 +27,7 @@ enum class AttributeUse : u8 { }; // Documentation in: -// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture +// http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html struct Header { union { BitField<0, 5, u32> sph_type; @@ -59,8 +62,8 @@ struct Header { union { BitField<0, 12, u32> max_output_vertices; BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. - BitField<24, 4, u32> reserved; - BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders. + BitField<20, 4, u32> reserved; + BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders. } common4{}; union { @@ -93,17 +96,20 @@ struct Header { struct { INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB + union { - BitField<0, 2, AttributeUse> x; - BitField<2, 2, AttributeUse> y; - BitField<4, 2, AttributeUse> w; - BitField<6, 2, AttributeUse> z; + BitField<0, 2, PixelImap> x; + BitField<2, 2, PixelImap> y; + BitField<4, 2, PixelImap> z; + BitField<6, 2, PixelImap> w; u8 raw; } imap_generic_vector[32]; + INSERT_UNION_PADDING_BYTES(2); // ImapColor INSERT_UNION_PADDING_BYTES(2); // ImapSystemValuesC INSERT_UNION_PADDING_BYTES(10); // ImapFixedFncTexture[10] INSERT_UNION_PADDING_BYTES(2); // ImapReserved + struct { u32 target; union { @@ -112,31 +118,30 @@ struct Header { BitField<2, 30, u32> reserved; }; } omap; + bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const { const u32 bit = render_target * 4 + component; return omap.target & (1 << bit); } - AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const { - return static_cast<AttributeUse>( - (imap_generic_vector[attribute].raw >> (index * 2)) & 0x03); - } - AttributeUse GetAttributeUse(u32 attribute) const { - AttributeUse result = AttributeUse::Unused; - for (u32 i = 0; i < 4; i++) { - const auto index = GetAttributeIndexUse(attribute, i); - if (index == AttributeUse::Unused) { - continue; - } - if (result == AttributeUse::Unused || result == index) { - result = index; + + PixelImap GetPixelImap(u32 attribute) const { + const auto get_index = [this, attribute](u32 index) { + return static_cast<PixelImap>( + (imap_generic_vector[attribute].raw >> (index * 2)) & 3); + }; + + std::optional<PixelImap> result; + for (u32 component = 0; component < 4; ++component) { + const PixelImap index = get_index(component); + if (index == PixelImap::Unused) { continue; } - LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode"); - if (index == AttributeUse::Perspective) { - result = index; + if (result && result != index) { + LOG_CRITICAL(HW_GPU, "Generic attribute conflict in interpolation mode"); } + result = index; } - return result; + return result.value_or(PixelImap::Unused); } } ps; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index e8f763ce9..8acf2eda2 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -7,6 +7,7 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/core_timing_util.h" +#include "core/frontend/emu_window.h" #include "core/memory.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/kepler_compute.h" @@ -16,14 +17,15 @@ #include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/renderer_base.h" +#include "video_core/video_core.h" namespace Tegra { MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); -GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) - : system{system}, renderer{renderer}, is_async{is_async} { - auto& rasterizer{renderer.Rasterizer()}; +GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, bool is_async) + : system{system}, renderer{std::move(renderer_)}, is_async{is_async} { + auto& rasterizer{renderer->Rasterizer()}; memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); @@ -137,7 +139,7 @@ u64 GPU::GetTicks() const { } void GPU::FlushCommands() { - renderer.Rasterizer().FlushCommands(); + renderer->Rasterizer().FlushCommands(); } // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 64acb17df..1a2d747be 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -25,8 +25,11 @@ inline u8* FromCacheAddr(CacheAddr cache_addr) { } namespace Core { -class System; +namespace Frontend { +class EmuWindow; } +class System; +} // namespace Core namespace VideoCore { class RendererBase; @@ -129,7 +132,8 @@ class MemoryManager; class GPU { public: - explicit GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async); + explicit GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer, + bool is_async); virtual ~GPU(); @@ -174,6 +178,14 @@ public: /// Returns a reference to the GPU DMA pusher. Tegra::DmaPusher& DmaPusher(); + VideoCore::RendererBase& Renderer() { + return *renderer; + } + + const VideoCore::RendererBase& Renderer() const { + return *renderer; + } + // Waits for the GPU to finish working virtual void WaitIdle() const = 0; @@ -258,13 +270,13 @@ public: virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory - virtual void FlushRegion(CacheAddr addr, u64 size) = 0; + virtual void FlushRegion(VAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be invalidated - virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; + virtual void InvalidateRegion(VAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be flushed and invalidated - virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; + virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; protected: virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0; @@ -287,7 +299,7 @@ private: protected: std::unique_ptr<Tegra::DmaPusher> dma_pusher; Core::System& system; - VideoCore::RendererBase& renderer; + std::unique_ptr<VideoCore::RendererBase> renderer; private: std::unique_ptr<Tegra::MemoryManager> memory_manager; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index 04222d060..cc434faf7 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -10,13 +10,16 @@ namespace VideoCommon { -GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) - : GPU(system, renderer, true), gpu_thread{system} {} +GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, + std::unique_ptr<Core::Frontend::GraphicsContext>&& context) + : GPU(system, std::move(renderer_), true), gpu_thread{system}, gpu_context(std::move(context)), + cpu_context(renderer->GetRenderWindow().CreateSharedContext()) {} GPUAsynch::~GPUAsynch() = default; void GPUAsynch::Start() { - gpu_thread.StartThread(renderer, *dma_pusher); + cpu_context->MakeCurrent(); + gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher); } void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { @@ -27,15 +30,15 @@ void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { gpu_thread.SwapBuffers(framebuffer); } -void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) { +void GPUAsynch::FlushRegion(VAddr addr, u64 size) { gpu_thread.FlushRegion(addr, size); } -void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) { +void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) { gpu_thread.InvalidateRegion(addr, size); } -void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { +void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { gpu_thread.FlushAndInvalidateRegion(addr, size); } diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 1241ade1d..03fd0eef0 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -7,6 +7,10 @@ #include "video_core/gpu.h" #include "video_core/gpu_thread.h" +namespace Core::Frontend { +class GraphicsContext; +} + namespace VideoCore { class RendererBase; } // namespace VideoCore @@ -16,15 +20,16 @@ namespace VideoCommon { /// Implementation of GPU interface that runs the GPU asynchronously class GPUAsynch final : public Tegra::GPU { public: - explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer); + explicit GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer, + std::unique_ptr<Core::Frontend::GraphicsContext>&& context); ~GPUAsynch() override; void Start() override; void PushGPUEntries(Tegra::CommandList&& entries) override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; - void FlushRegion(CacheAddr addr, u64 size) override; - void InvalidateRegion(CacheAddr addr, u64 size) override; - void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void FlushRegion(VAddr addr, u64 size) override; + void InvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void WaitIdle() const override; protected: @@ -32,6 +37,8 @@ protected: private: GPUThread::ThreadManager gpu_thread; + std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context; + std::unique_ptr<Core::Frontend::GraphicsContext> gpu_context; }; } // namespace VideoCommon diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index d48221077..6f38a672a 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp @@ -7,12 +7,15 @@ namespace VideoCommon { -GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer) - : GPU(system, renderer, false) {} +GPUSynch::GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer, + std::unique_ptr<Core::Frontend::GraphicsContext>&& context) + : GPU(system, std::move(renderer), false), context{std::move(context)} {} GPUSynch::~GPUSynch() = default; -void GPUSynch::Start() {} +void GPUSynch::Start() { + context->MakeCurrent(); +} void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { dma_pusher->Push(std::move(entries)); @@ -20,19 +23,19 @@ void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { } void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - renderer.SwapBuffers(framebuffer); + renderer->SwapBuffers(framebuffer); } -void GPUSynch::FlushRegion(CacheAddr addr, u64 size) { - renderer.Rasterizer().FlushRegion(addr, size); +void GPUSynch::FlushRegion(VAddr addr, u64 size) { + renderer->Rasterizer().FlushRegion(addr, size); } -void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) { - renderer.Rasterizer().InvalidateRegion(addr, size); +void GPUSynch::InvalidateRegion(VAddr addr, u64 size) { + renderer->Rasterizer().InvalidateRegion(addr, size); } -void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { - renderer.Rasterizer().FlushAndInvalidateRegion(addr, size); +void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { + renderer->Rasterizer().FlushAndInvalidateRegion(addr, size); } } // namespace VideoCommon diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index c71baee89..4a6e9a01d 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -6,6 +6,10 @@ #include "video_core/gpu.h" +namespace Core::Frontend { +class GraphicsContext; +} + namespace VideoCore { class RendererBase; } // namespace VideoCore @@ -15,20 +19,24 @@ namespace VideoCommon { /// Implementation of GPU interface that runs the GPU synchronously class GPUSynch final : public Tegra::GPU { public: - explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer); + explicit GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer, + std::unique_ptr<Core::Frontend::GraphicsContext>&& context); ~GPUSynch() override; void Start() override; void PushGPUEntries(Tegra::CommandList&& entries) override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; - void FlushRegion(CacheAddr addr, u64 size) override; - void InvalidateRegion(CacheAddr addr, u64 size) override; - void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void FlushRegion(VAddr addr, u64 size) override; + void InvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void WaitIdle() const override {} protected: void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, [[maybe_unused]] u32 value) const override {} + +private: + std::unique_ptr<Core::Frontend::GraphicsContext> context; }; } // namespace VideoCommon diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index b1088af3d..10cda686b 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -5,7 +5,7 @@ #include "common/assert.h" #include "common/microprofile.h" #include "core/core.h" -#include "core/frontend/scope_acquire_context.h" +#include "core/frontend/emu_window.h" #include "video_core/dma_pusher.h" #include "video_core/gpu.h" #include "video_core/gpu_thread.h" @@ -14,8 +14,8 @@ namespace VideoCommon::GPUThread { /// Runs the GPU thread -static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher, - SynchState& state) { +static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, + Tegra::DmaPusher& dma_pusher, SynchState& state) { MicroProfileOnThreadCreate("GpuThread"); // Wait for first GPU command before acquiring the window context @@ -27,7 +27,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p return; } - Core::Frontend::ScopeAcquireContext acquire_context{renderer.GetRenderWindow()}; + auto current_context = context.Acquire(); CommandDataContainer next; while (state.is_running) { @@ -62,8 +62,11 @@ ThreadManager::~ThreadManager() { thread.join(); } -void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { - thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; +void ThreadManager::StartThread(VideoCore::RendererBase& renderer, + Core::Frontend::GraphicsContext& context, + Tegra::DmaPusher& dma_pusher) { + thread = std::thread{RunThread, std::ref(renderer), std::ref(context), std::ref(dma_pusher), + std::ref(state)}; } void ThreadManager::SubmitList(Tegra::CommandList&& entries) { @@ -74,15 +77,15 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); } -void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { +void ThreadManager::FlushRegion(VAddr addr, u64 size) { PushCommand(FlushRegionCommand(addr, size)); } -void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { +void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { system.Renderer().Rasterizer().InvalidateRegion(addr, size); } -void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { +void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important InvalidateRegion(addr, size); } diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 882e2d9c7..cd74ad330 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -10,7 +10,6 @@ #include <optional> #include <thread> #include <variant> - #include "common/threadsafe_queue.h" #include "video_core/gpu.h" @@ -20,6 +19,9 @@ class DmaPusher; } // namespace Tegra namespace Core { +namespace Frontend { +class GraphicsContext; +} class System; } // namespace Core @@ -45,26 +47,26 @@ struct SwapBuffersCommand final { /// Command to signal to the GPU thread to flush a region struct FlushRegionCommand final { - explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} + explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} - CacheAddr addr; + VAddr addr; u64 size; }; /// Command to signal to the GPU thread to invalidate a region struct InvalidateRegionCommand final { - explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} + explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} - CacheAddr addr; + VAddr addr; u64 size; }; /// Command to signal to the GPU thread to flush and invalidate a region struct FlushAndInvalidateRegionCommand final { - explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size) + explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} - CacheAddr addr; + VAddr addr; u64 size; }; @@ -99,7 +101,8 @@ public: ~ThreadManager(); /// Creates and starts the GPU thread. - void StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher); + void StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, + Tegra::DmaPusher& dma_pusher); /// Push GPU command entries to be processed void SubmitList(Tegra::CommandList&& entries); @@ -108,13 +111,13 @@ public: void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory - void FlushRegion(CacheAddr addr, u64 size); + void FlushRegion(VAddr addr, u64 size); /// Notify rasterizer that any caches of the specified region should be invalidated - void InvalidateRegion(CacheAddr addr, u64 size); + void InvalidateRegion(VAddr addr, u64 size); /// Notify rasterizer that any caches of the specified region should be flushed and invalidated - void FlushAndInvalidateRegion(CacheAddr addr, u64 size); + void FlushAndInvalidateRegion(VAddr addr, u64 size); // Wait until the gpu thread is idle. void WaitIdle() const; diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index f5d33f27a..a3389d0d2 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -81,12 +81,11 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { ASSERT((gpu_addr & page_mask) == 0); const u64 aligned_size{Common::AlignUp(size, page_size)}; - const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; const auto cpu_addr = GpuToCpuAddress(gpu_addr); ASSERT(cpu_addr); // Flush and invalidate through the GPU interface, to be asynchronous if possible. - system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size); + system.GPU().FlushAndInvalidateRegion(*cpu_addr, aligned_size); UnmapRange(gpu_addr, aligned_size); ASSERT(system.CurrentProcess() @@ -140,11 +139,11 @@ T MemoryManager::Read(GPUVAddr addr) const { return {}; } - const u8* page_pointer{page_table.pointers[addr >> page_bits]}; + const u8* page_pointer{GetPointer(addr)}; if (page_pointer) { // NOTE: Avoid adding any extra logic to this fast-path block T value; - std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T)); + std::memcpy(&value, page_pointer, sizeof(T)); return value; } @@ -167,10 +166,10 @@ void MemoryManager::Write(GPUVAddr addr, T data) { return; } - u8* page_pointer{page_table.pointers[addr >> page_bits]}; + u8* page_pointer{GetPointer(addr)}; if (page_pointer) { // NOTE: Avoid adding any extra logic to this fast-path block - std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T)); + std::memcpy(page_pointer, &data, sizeof(T)); return; } @@ -201,9 +200,12 @@ u8* MemoryManager::GetPointer(GPUVAddr addr) { return {}; } - u8* const page_pointer{page_table.pointers[addr >> page_bits]}; - if (page_pointer != nullptr) { - return page_pointer + (addr & page_mask); + auto& memory = system.Memory(); + + const VAddr page_addr{page_table.backing_addr[addr >> page_bits]}; + + if (page_addr != 0) { + return memory.GetPointer(page_addr + (addr & page_mask)); } LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); @@ -215,9 +217,12 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const { return {}; } - const u8* const page_pointer{page_table.pointers[addr >> page_bits]}; - if (page_pointer != nullptr) { - return page_pointer + (addr & page_mask); + const auto& memory = system.Memory(); + + const VAddr page_addr{page_table.backing_addr[addr >> page_bits]}; + + if (page_addr != 0) { + return memory.GetPointer(page_addr + (addr & page_mask)); } LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); @@ -238,17 +243,19 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s std::size_t page_index{src_addr >> page_bits}; std::size_t page_offset{src_addr & page_mask}; + auto& memory = system.Memory(); + while (remaining_size > 0) { const std::size_t copy_amount{ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; switch (page_table.attributes[page_index]) { case Common::PageType::Memory: { - const u8* src_ptr{page_table.pointers[page_index] + page_offset}; + const VAddr src_addr{page_table.backing_addr[page_index] + page_offset}; // Flush must happen on the rasterizer interface, such that memory is always synchronous // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. - rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); - std::memcpy(dest_buffer, src_ptr, copy_amount); + rasterizer.FlushRegion(src_addr, copy_amount); + memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); break; } default: @@ -268,13 +275,15 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, std::size_t page_index{src_addr >> page_bits}; std::size_t page_offset{src_addr & page_mask}; + auto& memory = system.Memory(); + while (remaining_size > 0) { const std::size_t copy_amount{ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; const u8* page_pointer = page_table.pointers[page_index]; if (page_pointer) { - const u8* src_ptr{page_pointer + page_offset}; - std::memcpy(dest_buffer, src_ptr, copy_amount); + const VAddr src_addr{page_table.backing_addr[page_index] + page_offset}; + memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); } else { std::memset(dest_buffer, 0, copy_amount); } @@ -290,17 +299,19 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const std::size_t page_index{dest_addr >> page_bits}; std::size_t page_offset{dest_addr & page_mask}; + auto& memory = system.Memory(); + while (remaining_size > 0) { const std::size_t copy_amount{ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; switch (page_table.attributes[page_index]) { case Common::PageType::Memory: { - u8* dest_ptr{page_table.pointers[page_index] + page_offset}; + const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset}; // Invalidate must happen on the rasterizer interface, such that memory is always // synchronous when it is written (even when in asynchronous GPU mode). - rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount); - std::memcpy(dest_ptr, src_buffer, copy_amount); + rasterizer.InvalidateRegion(dest_addr, copy_amount); + memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); break; } default: @@ -320,13 +331,15 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t page_index{dest_addr >> page_bits}; std::size_t page_offset{dest_addr & page_mask}; + auto& memory = system.Memory(); + while (remaining_size > 0) { const std::size_t copy_amount{ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; u8* page_pointer = page_table.pointers[page_index]; if (page_pointer) { - u8* dest_ptr{page_pointer + page_offset}; - std::memcpy(dest_ptr, src_buffer, copy_amount); + const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset}; + memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); } page_index++; page_offset = 0; @@ -336,33 +349,9 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, } void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { - std::size_t remaining_size{size}; - std::size_t page_index{src_addr >> page_bits}; - std::size_t page_offset{src_addr & page_mask}; - - while (remaining_size > 0) { - const std::size_t copy_amount{ - std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; - - switch (page_table.attributes[page_index]) { - case Common::PageType::Memory: { - // Flush must happen on the rasterizer interface, such that memory is always synchronous - // when it is copied (even when in asynchronous GPU mode). - const u8* src_ptr{page_table.pointers[page_index] + page_offset}; - rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); - WriteBlock(dest_addr, src_ptr, copy_amount); - break; - } - default: - UNREACHABLE(); - } - - page_index++; - page_offset = 0; - dest_addr += static_cast<VAddr>(copy_amount); - src_addr += static_cast<VAddr>(copy_amount); - remaining_size -= copy_amount; - } + std::vector<u8> tmp_buffer(size); + ReadBlock(src_addr, tmp_buffer.data(), size); + WriteBlock(dest_addr, tmp_buffer.data(), size); } void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { @@ -371,6 +360,12 @@ void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size); } +bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) { + const VAddr addr = page_table.backing_addr[gpu_addr >> page_bits]; + const std::size_t page = (addr & Memory::PAGE_MASK) + size; + return page <= Memory::PAGE_SIZE; +} + void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, VAddr backing_addr) { LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size, diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 073bdb491..0d9468535 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -97,6 +97,11 @@ public: void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); + /** + * IsGranularRange checks if a gpu region can be simply read with a pointer + */ + bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size); + private: using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>; using VMAHandle = VMAMap::const_iterator; diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index e66054ed0..5ea2b01f2 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h @@ -98,12 +98,12 @@ public: static_cast<QueryCache&>(*this), VideoCore::QueryType::SamplesPassed}}} {} - void InvalidateRegion(CacheAddr addr, std::size_t size) { + void InvalidateRegion(VAddr addr, std::size_t size) { std::unique_lock lock{mutex}; FlushAndRemoveRegion(addr, size); } - void FlushRegion(CacheAddr addr, std::size_t size) { + void FlushRegion(VAddr addr, std::size_t size) { std::unique_lock lock{mutex}; FlushAndRemoveRegion(addr, size); } @@ -117,14 +117,16 @@ public: void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) { std::unique_lock lock{mutex}; auto& memory_manager = system.GPU().MemoryManager(); - const auto host_ptr = memory_manager.GetPointer(gpu_addr); + const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); + ASSERT(cpu_addr_opt); + VAddr cpu_addr = *cpu_addr_opt; - CachedQuery* query = TryGet(ToCacheAddr(host_ptr)); + CachedQuery* query = TryGet(cpu_addr); if (!query) { - const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); - ASSERT_OR_EXECUTE(cpu_addr, return;); + ASSERT_OR_EXECUTE(cpu_addr_opt, return;); + const auto host_ptr = memory_manager.GetPointer(gpu_addr); - query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); + query = Register(type, cpu_addr, host_ptr, timestamp.has_value()); } query->BindCounter(Stream(type).Current(), timestamp); @@ -173,11 +175,11 @@ protected: private: /// Flushes a memory range to guest memory and removes it from the cache. - void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { + void FlushAndRemoveRegion(VAddr addr, std::size_t size) { const u64 addr_begin = static_cast<u64>(addr); const u64 addr_end = addr_begin + static_cast<u64>(size); const auto in_range = [addr_begin, addr_end](CachedQuery& query) { - const u64 cache_begin = query.GetCacheAddr(); + const u64 cache_begin = query.GetCpuAddr(); const u64 cache_end = cache_begin + query.SizeInBytes(); return cache_begin < addr_end && addr_begin < cache_end; }; @@ -193,7 +195,7 @@ private: if (!in_range(query)) { continue; } - rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1); + rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1); query.Flush(); } contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), @@ -204,22 +206,21 @@ private: /// Registers the passed parameters as cached and returns a pointer to the stored cached query. CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) { rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1); - const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT; + const u64 page = static_cast<u64>(cpu_addr) >> PAGE_SHIFT; return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr, host_ptr); } /// Tries to a get a cached query. Returns nullptr on failure. - CachedQuery* TryGet(CacheAddr addr) { + CachedQuery* TryGet(VAddr addr) { const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT; const auto it = cached_queries.find(page); if (it == std::end(cached_queries)) { return nullptr; } auto& contents = it->second; - const auto found = - std::find_if(std::begin(contents), std::end(contents), - [addr](auto& query) { return query.GetCacheAddr() == addr; }); + const auto found = std::find_if(std::begin(contents), std::end(contents), + [addr](auto& query) { return query.GetCpuAddr() == addr; }); return found != std::end(contents) ? &*found : nullptr; } @@ -323,14 +324,10 @@ public: timestamp = timestamp_; } - VAddr CpuAddr() const noexcept { + VAddr GetCpuAddr() const noexcept { return cpu_addr; } - CacheAddr GetCacheAddr() const noexcept { - return ToCacheAddr(host_ptr); - } - u64 SizeInBytes() const noexcept { return SizeInBytes(timestamp.has_value()); } diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h index 6de1597a2..22987751e 100644 --- a/src/video_core/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache.h @@ -18,22 +18,14 @@ class RasterizerCacheObject { public: - explicit RasterizerCacheObject(const u8* host_ptr) - : host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {} + explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {} virtual ~RasterizerCacheObject(); - CacheAddr GetCacheAddr() const { - return cache_addr; + VAddr GetCpuAddr() const { + return cpu_addr; } - const u8* GetHostPtr() const { - return host_ptr; - } - - /// Gets the address of the shader in guest memory, required for cache management - virtual VAddr GetCpuAddr() const = 0; - /// Gets the size of the shader in guest memory, required for cache management virtual std::size_t GetSizeInBytes() const = 0; @@ -68,8 +60,7 @@ private: bool is_registered{}; ///< Whether the object is currently registered with the cache bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing - const u8* host_ptr{}; ///< Pointer to the memory backing this cached region - CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space + VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space }; template <class T> @@ -80,7 +71,7 @@ public: explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} /// Write any cached resources overlapping the specified region back to memory - void FlushRegion(CacheAddr addr, std::size_t size) { + void FlushRegion(VAddr addr, std::size_t size) { std::lock_guard lock{mutex}; const auto& objects{GetSortedObjectsFromRegion(addr, size)}; @@ -90,7 +81,7 @@ public: } /// Mark the specified region as being invalidated - void InvalidateRegion(CacheAddr addr, u64 size) { + void InvalidateRegion(VAddr addr, u64 size) { std::lock_guard lock{mutex}; const auto& objects{GetSortedObjectsFromRegion(addr, size)}; @@ -114,27 +105,20 @@ public: protected: /// Tries to get an object from the cache with the specified cache address - T TryGet(CacheAddr addr) const { + T TryGet(VAddr addr) const { const auto iter = map_cache.find(addr); if (iter != map_cache.end()) return iter->second; return nullptr; } - T TryGet(const void* addr) const { - const auto iter = map_cache.find(ToCacheAddr(addr)); - if (iter != map_cache.end()) - return iter->second; - return nullptr; - } - /// Register an object into the cache virtual void Register(const T& object) { std::lock_guard lock{mutex}; object->SetIsRegistered(true); interval_cache.add({GetInterval(object), ObjectSet{object}}); - map_cache.insert({object->GetCacheAddr(), object}); + map_cache.insert({object->GetCpuAddr(), object}); rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1); } @@ -144,7 +128,7 @@ protected: object->SetIsRegistered(false); rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); - const CacheAddr addr = object->GetCacheAddr(); + const VAddr addr = object->GetCpuAddr(); interval_cache.subtract({GetInterval(object), ObjectSet{object}}); map_cache.erase(addr); } @@ -173,7 +157,7 @@ protected: private: /// Returns a list of cached objects from the specified memory region, ordered by access time - std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) { + std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) { if (size == 0) { return {}; } @@ -197,13 +181,13 @@ private: } using ObjectSet = std::set<T>; - using ObjectCache = std::unordered_map<CacheAddr, T>; - using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>; + using ObjectCache = std::unordered_map<VAddr, T>; + using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; using ObjectInterval = typename IntervalCache::interval_type; static auto GetInterval(const T& object) { - return ObjectInterval::right_open(object->GetCacheAddr(), - object->GetCacheAddr() + object->GetSizeInBytes()); + return ObjectInterval::right_open(object->GetCpuAddr(), + object->GetCpuAddr() + object->GetSizeInBytes()); } ObjectCache map_cache; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 1a68e3caa..8ae5b9c4e 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -53,14 +53,14 @@ public: virtual void FlushAll() = 0; /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory - virtual void FlushRegion(CacheAddr addr, u64 size) = 0; + virtual void FlushRegion(VAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be invalidated - virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; + virtual void InvalidateRegion(VAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// and invalidated - virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; + virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; /// Notify the rasterizer to send all written commands to the host GPU. virtual void FlushCommands() = 0; diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 5ec99a126..1d85219b6 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -46,7 +46,8 @@ public: /// Draws the latest frame to the window waiting timeout_ms for a frame to arrive (Renderer /// specific implementation) - virtual void TryPresent(int timeout_ms) = 0; + /// Returns true if a frame was drawn + virtual bool TryPresent(int timeout_ms) = 0; // Getter/setter functions: // ------------------------ diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 0375fca17..4eb37a96c 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -21,8 +21,8 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); -CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size) - : VideoCommon::BufferBlock{cache_addr, size} { +CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size) + : VideoCommon::BufferBlock{cpu_addr, size} { gl_buffer.Create(); glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); } @@ -47,8 +47,8 @@ OGLBufferCache::~OGLBufferCache() { glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); } -Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { - return std::make_shared<CachedBufferBlock>(cache_addr, size); +Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { + return std::make_shared<CachedBufferBlock>(cpu_addr, size); } void OGLBufferCache::WriteBarrier() { diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 8c7145443..d94a11252 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -31,7 +31,7 @@ using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuf class CachedBufferBlock : public VideoCommon::BufferBlock { public: - explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size); + explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size); ~CachedBufferBlock(); const GLuint* GetHandle() const { @@ -55,7 +55,7 @@ public: } protected: - Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; + Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; void WriteBarrier() override; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 1a2e2a9f7..c286502ba 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -131,6 +131,31 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin return bindings; } +bool IsASTCSupported() { + static constexpr std::array formats = { + GL_COMPRESSED_RGBA_ASTC_4x4_KHR, GL_COMPRESSED_RGBA_ASTC_5x4_KHR, + GL_COMPRESSED_RGBA_ASTC_5x5_KHR, GL_COMPRESSED_RGBA_ASTC_6x5_KHR, + GL_COMPRESSED_RGBA_ASTC_6x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x5_KHR, + GL_COMPRESSED_RGBA_ASTC_8x6_KHR, GL_COMPRESSED_RGBA_ASTC_8x8_KHR, + GL_COMPRESSED_RGBA_ASTC_10x5_KHR, GL_COMPRESSED_RGBA_ASTC_10x6_KHR, + GL_COMPRESSED_RGBA_ASTC_10x8_KHR, GL_COMPRESSED_RGBA_ASTC_10x10_KHR, + GL_COMPRESSED_RGBA_ASTC_12x10_KHR, GL_COMPRESSED_RGBA_ASTC_12x12_KHR, + GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR, + GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR, + GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR, + GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR, + GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR, + GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR, + GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR, GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR, + }; + return std::find_if_not(formats.begin(), formats.end(), [](GLenum format) { + GLint supported; + glGetInternalformativ(GL_TEXTURE_2D, format, GL_INTERNALFORMAT_SUPPORTED, 1, + &supported); + return supported == GL_TRUE; + }) == formats.end(); +} + } // Anonymous namespace Device::Device() : base_bindings{BuildBaseBindings()} { @@ -152,6 +177,7 @@ Device::Device() : base_bindings{BuildBaseBindings()} { has_shader_ballot = GLAD_GL_ARB_shader_ballot; has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); + has_astc = IsASTCSupported(); has_variable_aoffi = TestVariableAoffi(); has_component_indexing_bug = is_amd; has_precise_bug = TestPreciseBug(); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index d73b099d0..a55050cb5 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -64,6 +64,10 @@ public: return has_image_load_formatted; } + bool HasASTC() const { + return has_astc; + } + bool HasVariableAoffi() const { return has_variable_aoffi; } @@ -97,6 +101,7 @@ private: bool has_shader_ballot{}; bool has_vertex_viewport_layer{}; bool has_image_load_formatted{}; + bool has_astc{}; bool has_variable_aoffi{}; bool has_component_indexing_bug{}; bool has_precise_bug{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 826eee7df..368f399df 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -386,11 +386,14 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color_fb, bool using texture_cache.GuardRenderTargets(true); View color_surface; if (using_color_fb) { - color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false); + const std::size_t index = regs.clear_buffers.RT; + color_surface = texture_cache.GetColorBufferSurface(index, true); + texture_cache.MarkColorBufferInUse(index); } View depth_surface; if (using_depth_fb || using_stencil_fb) { - depth_surface = texture_cache.GetDepthBufferSurface(false); + depth_surface = texture_cache.GetDepthBufferSurface(true); + texture_cache.MarkDepthBufferInUse(); } texture_cache.GuardRenderTargets(false); @@ -444,6 +447,7 @@ void RasterizerOpenGL::Clear() { } SyncRasterizeEnable(); + SyncStencilTestState(); if (regs.clear_flags.scissor) { SyncScissorTest(); @@ -652,9 +656,9 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, void RasterizerOpenGL::FlushAll() {} -void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { +void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - if (!addr || !size) { + if (addr == 0 || size == 0) { return; } texture_cache.FlushRegion(addr, size); @@ -662,9 +666,9 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { query_cache.FlushRegion(addr, size); } -void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { +void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - if (!addr || !size) { + if (addr == 0 || size == 0) { return; } texture_cache.InvalidateRegion(addr, size); @@ -673,7 +677,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { query_cache.InvalidateRegion(addr, size); } -void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { +void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { if (Settings::values.use_accurate_gpu_emulation) { FlushRegion(addr, size); } @@ -712,8 +716,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, MICROPROFILE_SCOPE(OpenGL_CacheManagement); - const auto surface{ - texture_cache.TryFindFramebufferSurface(system.Memory().GetPointer(framebuffer_addr))}; + const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; if (!surface) { return {}; } @@ -1052,12 +1055,8 @@ void RasterizerOpenGL::SyncStencilTestState() { flags[Dirty::StencilTest] = false; const auto& regs = gpu.regs; - if (!regs.stencil_enable) { - glDisable(GL_STENCIL_TEST); - return; - } + oglEnable(GL_STENCIL_TEST, regs.stencil_enable); - glEnable(GL_STENCIL_TEST); glStencilFuncSeparate(GL_FRONT, MaxwellToGL::ComparisonOp(regs.stencil_front_func_func), regs.stencil_front_func_ref, regs.stencil_front_func_mask); glStencilOpSeparate(GL_FRONT, MaxwellToGL::StencilOp(regs.stencil_front_op_fail), diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 2d3be2437..212dad852 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -65,9 +65,9 @@ public: void ResetCounter(VideoCore::QueryType type) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; void FlushAll() override; - void FlushRegion(CacheAddr addr, u64 size) override; - void InvalidateRegion(CacheAddr addr, u64 size) override; - void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void FlushRegion(VAddr addr, u64 size) override; + void InvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void FlushCommands() override; void TickFrame() override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index e3d31c3eb..6d2ff20f9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -214,11 +214,11 @@ std::unordered_set<GLenum> GetSupportedFormats() { } // Anonymous namespace -CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, +CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, std::shared_ptr<OGLProgram> program) - : RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)}, - cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {} + : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)}, + size_in_bytes{size_in_bytes}, program{std::move(program)} {} CachedShader::~CachedShader() = default; @@ -254,9 +254,8 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, entry.bindless_samplers = registry->GetBindlessSamplers(); params.disk_cache.SaveEntry(std::move(entry)); - return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, - size_in_bytes, std::move(registry), - MakeEntries(ir), std::move(program))); + return std::shared_ptr<CachedShader>(new CachedShader( + params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); } Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { @@ -279,17 +278,16 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog entry.bindless_samplers = registry->GetBindlessSamplers(); params.disk_cache.SaveEntry(std::move(entry)); - return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, - size_in_bytes, std::move(registry), - MakeEntries(ir), std::move(program))); + return std::shared_ptr<CachedShader>(new CachedShader( + params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); } Shader CachedShader::CreateFromCache(const ShaderParameters& params, const PrecompiledShader& precompiled_shader, std::size_t size_in_bytes) { - return std::shared_ptr<CachedShader>(new CachedShader( - params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry, - precompiled_shader.entries, precompiled_shader.program)); + return std::shared_ptr<CachedShader>( + new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry, + precompiled_shader.entries, precompiled_shader.program)); } ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, @@ -327,8 +325,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, std::size_t end) { - context->MakeCurrent(); - SCOPE_EXIT({ return context->DoneCurrent(); }); + const auto scope = context->Acquire(); for (std::size_t i = begin; i < end; ++i) { if (stop_loading) { @@ -450,12 +447,14 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { const GPUVAddr address{GetShaderAddress(system, program)}; // Look up shader in the cache based on address - const auto host_ptr{memory_manager.GetPointer(address)}; - Shader shader{TryGet(host_ptr)}; + const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; + Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr}; if (shader) { return last_shaders[static_cast<std::size_t>(program)] = shader; } + const auto host_ptr{memory_manager.GetPointer(address)}; + // No shader found - create a new one ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)}; ProgramCode code_b; @@ -466,9 +465,9 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { const auto unique_identifier = GetUniqueIdentifier( GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); - const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; - const ShaderParameters params{system, disk_cache, device, - cpu_addr, host_ptr, unique_identifier}; + + const ShaderParameters params{system, disk_cache, device, + *cpu_addr, host_ptr, unique_identifier}; const auto found = runtime_cache.find(unique_identifier); if (found == runtime_cache.end()) { @@ -485,18 +484,20 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { auto& memory_manager{system.GPU().MemoryManager()}; - const auto host_ptr{memory_manager.GetPointer(code_addr)}; - auto kernel = TryGet(host_ptr); + const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)}; + + auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr; if (kernel) { return kernel; } + const auto host_ptr{memory_manager.GetPointer(code_addr)}; // No kernel found, create a new one auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; - const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; - const ShaderParameters params{system, disk_cache, device, - cpu_addr, host_ptr, unique_identifier}; + + const ShaderParameters params{system, disk_cache, device, + *cpu_addr, host_ptr, unique_identifier}; const auto found = runtime_cache.find(unique_identifier); if (found == runtime_cache.end()) { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 4935019fc..c836df5bd 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -65,11 +65,6 @@ public: /// Gets the GL program handle for the shader GLuint GetHandle() const; - /// Returns the guest CPU address of the shader - VAddr GetCpuAddr() const override { - return cpu_addr; - } - /// Returns the size in bytes of the shader std::size_t GetSizeInBytes() const override { return size_in_bytes; @@ -90,13 +85,12 @@ public: std::size_t size_in_bytes); private: - explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, + explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, std::shared_ptr<OGLProgram> program); std::shared_ptr<VideoCommon::Shader::Registry> registry; ShaderEntries entries; - VAddr cpu_addr = 0; std::size_t size_in_bytes = 0; std::shared_ptr<OGLProgram> program; }; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 8aa4a7ac9..160ae4340 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -31,11 +31,11 @@ namespace { using Tegra::Engines::ShaderType; using Tegra::Shader::Attribute; -using Tegra::Shader::AttributeUse; using Tegra::Shader::Header; using Tegra::Shader::IpaInterpMode; using Tegra::Shader::IpaMode; using Tegra::Shader::IpaSampleMode; +using Tegra::Shader::PixelImap; using Tegra::Shader::Register; using VideoCommon::Shader::BuildTransformFeedback; using VideoCommon::Shader::Registry; @@ -702,20 +702,19 @@ private: code.AddNewLine(); } - std::string GetInputFlags(AttributeUse attribute) { + const char* GetInputFlags(PixelImap attribute) { switch (attribute) { - case AttributeUse::Perspective: - // Default, Smooth - return {}; - case AttributeUse::Constant: - return "flat "; - case AttributeUse::ScreenLinear: - return "noperspective "; - default: - case AttributeUse::Unused: - UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<u32>(attribute)); - return {}; + case PixelImap::Perspective: + return "smooth"; + case PixelImap::Constant: + return "flat"; + case PixelImap::ScreenLinear: + return "noperspective"; + case PixelImap::Unused: + break; } + UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<int>(attribute)); + return {}; } void DeclareInputAttributes() { @@ -749,8 +748,8 @@ private: std::string suffix; if (stage == ShaderType::Fragment) { - const auto input_mode{header.ps.GetAttributeUse(location)}; - if (skip_unused && input_mode == AttributeUse::Unused) { + const auto input_mode{header.ps.GetPixelImap(location)}; + if (input_mode == PixelImap::Unused) { return; } suffix = GetInputFlags(input_mode); @@ -927,7 +926,7 @@ private: const u32 address{generic_base + index * generic_stride + element * element_stride}; const bool declared = stage != ShaderType::Fragment || - header.ps.GetAttributeUse(index) != AttributeUse::Unused; + header.ps.GetPixelImap(index) != PixelImap::Unused; const std::string value = declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f"; code.AddLine("case 0x{:X}U: return {};", address, value); @@ -1142,8 +1141,7 @@ private: GetSwizzle(element)), Type::Float}; case ShaderType::Fragment: - return {element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)), - Type::Float}; + return {"gl_FragCoord"s + GetSwizzle(element), Type::Float}; default: UNREACHABLE(); } @@ -2114,6 +2112,10 @@ private: template <const std::string_view& opname, Type type> Expression Atomic(Operation operation) { + if ((opname == Func::Min || opname == Func::Max) && type == Type::Int) { + UNIMPLEMENTED_MSG("Unimplemented Min & Max for atomic operations"); + return {}; + } return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), Visit(operation[1]).As(type)), type}; @@ -2307,6 +2309,8 @@ private: ~Func() = delete; static constexpr std::string_view Add = "Add"; + static constexpr std::string_view Min = "Min"; + static constexpr std::string_view Max = "Max"; static constexpr std::string_view And = "And"; static constexpr std::string_view Or = "Or"; static constexpr std::string_view Xor = "Xor"; @@ -2457,7 +2461,21 @@ private: &GLSLDecompiler::AtomicImage<Func::Xor>, &GLSLDecompiler::AtomicImage<Func::Exchange>, + &GLSLDecompiler::Atomic<Func::Exchange, Type::Uint>, &GLSLDecompiler::Atomic<Func::Add, Type::Uint>, + &GLSLDecompiler::Atomic<Func::Min, Type::Uint>, + &GLSLDecompiler::Atomic<Func::Max, Type::Uint>, + &GLSLDecompiler::Atomic<Func::And, Type::Uint>, + &GLSLDecompiler::Atomic<Func::Or, Type::Uint>, + &GLSLDecompiler::Atomic<Func::Xor, Type::Uint>, + + &GLSLDecompiler::Atomic<Func::Exchange, Type::Int>, + &GLSLDecompiler::Atomic<Func::Add, Type::Int>, + &GLSLDecompiler::Atomic<Func::Min, Type::Int>, + &GLSLDecompiler::Atomic<Func::Max, Type::Int>, + &GLSLDecompiler::Atomic<Func::And, Type::Int>, + &GLSLDecompiler::Atomic<Func::Or, Type::Int>, + &GLSLDecompiler::Atomic<Func::Xor, Type::Int>, &GLSLDecompiler::Branch, &GLSLDecompiler::BranchIndirect, diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index f424e3000..36590a6d0 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -24,7 +24,6 @@ using Tegra::Texture::SwizzleSource; using VideoCore::MortonSwizzleMode; using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::SurfaceCompression; using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceType; @@ -37,102 +36,100 @@ namespace { struct FormatTuple { GLint internal_format; - GLenum format; - GLenum type; - bool compressed; + GLenum format = GL_NONE; + GLenum type = GL_NONE; }; constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8U - {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE, false}, // ABGR8S - {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false}, // ABGR8UI - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5U - {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10U - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false}, // A1B5G5R5U - {GL_R8, GL_RED, GL_UNSIGNED_BYTE, false}, // R8U - {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F - {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U - {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT, false}, // RGBA16S - {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI - {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F - {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI - {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1 - {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23 - {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45 - {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN1 - {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, true}, // DXN2UNORM - {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, true}, // DXN2SNORM - {GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // BC7U - {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // BC6H_UF16 - {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // BC6H_SF16 - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4 - {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, false}, // BGRA8 - {GL_RGBA32F, GL_RGBA, GL_FLOAT, false}, // RGBA32F - {GL_RG32F, GL_RG, GL_FLOAT, false}, // RG32F - {GL_R32F, GL_RED, GL_FLOAT, false}, // R32F - {GL_R16F, GL_RED, GL_HALF_FLOAT, false}, // R16F - {GL_R16, GL_RED, GL_UNSIGNED_SHORT, false}, // R16U - {GL_R16_SNORM, GL_RED, GL_SHORT, false}, // R16S - {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, false}, // R16UI - {GL_R16I, GL_RED_INTEGER, GL_SHORT, false}, // R16I - {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, false}, // RG16 - {GL_RG16F, GL_RG, GL_HALF_FLOAT, false}, // RG16F - {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, false}, // RG16UI - {GL_RG16I, GL_RG_INTEGER, GL_SHORT, false}, // RG16I - {GL_RG16_SNORM, GL_RG, GL_SHORT, false}, // RG16S - {GL_RGB32F, GL_RGB, GL_FLOAT, false}, // RGB32F - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // RGBA8_SRGB - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false}, // RG8U - {GL_RG8_SNORM, GL_RG, GL_BYTE, false}, // RG8S - {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI - {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI - {GL_R32I, GL_RED_INTEGER, GL_INT, false}, // R32I - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8 - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5 - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4 - {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, false}, // BGRA8 + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // ABGR8U + {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // ABGR8S + {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // ABGR8UI + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5U + {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10U + {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5U + {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8U + {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8UI + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // RGBA16U + {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // RGBA16S + {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // RGBA16UI + {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // R11FG11FB10F + {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // RGBA32UI + {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // DXT1 + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // DXT23 + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // DXT45 + {GL_COMPRESSED_RED_RGTC1}, // DXN1 + {GL_COMPRESSED_RG_RGTC2}, // DXN2UNORM + {GL_COMPRESSED_SIGNED_RG_RGTC2}, // DXN2SNORM + {GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7U + {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UF16 + {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SF16 + {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4 + {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8 + {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F + {GL_RG32F, GL_RG, GL_FLOAT}, // RG32F + {GL_R32F, GL_RED, GL_FLOAT}, // R32F + {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16F + {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16U + {GL_R16_SNORM, GL_RED, GL_SHORT}, // R16S + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16UI + {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16I + {GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // RG16 + {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16F + {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // RG16UI + {GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // RG16I + {GL_RG16_SNORM, GL_RG, GL_SHORT}, // RG16S + {GL_RGB32F, GL_RGB, GL_FLOAT}, // RGB32F + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // RGBA8_SRGB + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // RG8U + {GL_RG8_SNORM, GL_RG, GL_BYTE}, // RG8S + {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // RG32UI + {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // RGBX16F + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32UI + {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32I + {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8 + {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5 + {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4 + {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8 // Compressed sRGB formats - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45_SRGB - {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // BC7U_SRGB - {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV, false}, // R4G4B4A4U - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_4X4_SRGB - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8_SRGB - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5_SRGB - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4_SRGB - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X5 - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X5_SRGB - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X8 - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X8_SRGB - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X6 - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X6_SRGB - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X10 - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_10X10_SRGB - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_12X12 - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_12X12_SRGB - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X6 - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X6_SRGB - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X5 - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_6X5_SRGB - {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV, false}, // E5B9G9R9F + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // DXT1_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // DXT23_SRGB + {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // DXT45_SRGB + {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7U_SRGB + {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // R4G4B4A4U + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB + {GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5 + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8 + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6 + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10 + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB + {GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12 + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB + {GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6 + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB + {GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5 + {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB + {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9F // Depth formats - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, false}, // Z32F - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, false}, // Z16 + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // Z32F + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // Z16 // DepthStencil formats - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, false}, // Z24S8 - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, false}, // S8Z24 - {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, false}, // Z32FS8 + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // Z24S8 + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8Z24 + {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // Z32FS8 }}; const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); - const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]}; - return format; + return tex_format_tuples[static_cast<std::size_t>(pixel_format)]; } GLenum GetTextureTarget(const SurfaceTarget& target) { @@ -242,13 +239,20 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte } // Anonymous namespace -CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) - : VideoCommon::SurfaceBase<View>(gpu_addr, params) { - const auto& tuple{GetFormatTuple(params.pixel_format)}; - internal_format = tuple.internal_format; - format = tuple.format; - type = tuple.type; - is_compressed = tuple.compressed; +CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, + bool is_astc_supported) + : VideoCommon::SurfaceBase<View>(gpu_addr, params, is_astc_supported) { + if (is_converted) { + internal_format = params.srgb_conversion ? GL_SRGB8_ALPHA8 : GL_RGBA8; + format = GL_RGBA; + type = GL_UNSIGNED_BYTE; + } else { + const auto& tuple{GetFormatTuple(params.pixel_format)}; + internal_format = tuple.internal_format; + format = tuple.format; + type = tuple.type; + is_compressed = params.IsCompressed(); + } target = GetTextureTarget(params.target); texture = CreateTexture(params, target, internal_format, texture_buffer); DecorateSurfaceName(); @@ -264,7 +268,7 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { if (params.IsBuffer()) { glGetNamedBufferSubData(texture_buffer.handle, 0, - static_cast<GLsizeiptr>(params.GetHostSizeInBytes()), + static_cast<GLsizeiptr>(params.GetHostSizeInBytes(false)), staging_buffer.data()); return; } @@ -272,9 +276,10 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); for (u32 level = 0; level < params.emulated_levels; ++level) { - glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); + glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); - const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); + const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); + u8* const mip_data = staging_buffer.data() + mip_offset; const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level)); if (is_compressed) { @@ -294,14 +299,10 @@ void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { } void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) { - glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); + glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level, is_converted))); glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); - auto compression_type = params.GetCompressionType(); - - const std::size_t mip_offset = compression_type == SurfaceCompression::Converted - ? params.GetConvertedMipmapOffset(level) - : params.GetHostMipmapLevelOffset(level); + const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); const u8* buffer{staging_buffer.data() + mip_offset}; if (is_compressed) { const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))}; @@ -482,7 +483,7 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const { TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, const Device& device, StateTracker& state_tracker) - : TextureCacheBase{system, rasterizer}, state_tracker{state_tracker} { + : TextureCacheBase{system, rasterizer, device.HasASTC()}, state_tracker{state_tracker} { src_framebuffer.Create(); dst_framebuffer.Create(); } @@ -490,7 +491,7 @@ TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, TextureCacheOpenGL::~TextureCacheOpenGL() = default; Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { - return std::make_shared<CachedSurface>(gpu_addr, params); + return std::make_shared<CachedSurface>(gpu_addr, params, is_astc_supported); } void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface, @@ -596,7 +597,7 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); - if (source_format.compressed) { + if (src_surface->IsCompressed()) { glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size), nullptr); } else { @@ -610,7 +611,7 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) const GLsizei width = static_cast<GLsizei>(dst_params.width); const GLsizei height = static_cast<GLsizei>(dst_params.height); const GLsizei depth = static_cast<GLsizei>(dst_params.depth); - if (dest_format.compressed) { + if (dst_surface->IsCompressed()) { LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!"); UNREACHABLE(); } else { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 6658c6ffd..02d9981a1 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -37,7 +37,7 @@ class CachedSurface final : public VideoCommon::SurfaceBase<View> { friend CachedSurfaceView; public: - explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params); + explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params, bool is_astc_supported); ~CachedSurface(); void UploadTexture(const std::vector<u8>& staging_buffer) override; @@ -51,6 +51,10 @@ public: return texture.handle; } + bool IsCompressed() const { + return is_compressed; + } + protected: void DecorateSurfaceName() override; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index fca5e3ec0..f1a28cc21 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -30,8 +30,6 @@ namespace OpenGL { namespace { -// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have -// to wait on available presentation frames. constexpr std::size_t SWAP_CHAIN_SIZE = 3; struct Frame { @@ -214,7 +212,7 @@ public: std::deque<Frame*> present_queue; Frame* previous_frame{}; - FrameMailbox() : has_debug_tool{HasDebugTool()} { + FrameMailbox() { for (auto& frame : swap_chain) { free_queue.push(&frame); } @@ -285,13 +283,9 @@ public: std::unique_lock lock{swap_chain_lock}; present_queue.push_front(frame); present_cv.notify_one(); - - DebugNotifyNextFrame(); } Frame* TryGetPresentFrame(int timeout_ms) { - DebugWaitForNextFrame(); - std::unique_lock lock{swap_chain_lock}; // wait for new entries in the present_queue present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms), @@ -317,38 +311,12 @@ public: previous_frame = frame; return frame; } - -private: - std::mutex debug_synch_mutex; - std::condition_variable debug_synch_condition; - std::atomic_int frame_for_debug{}; - const bool has_debug_tool; // When true, using a GPU debugger, so keep frames in lock-step - - /// Signal that a new frame is available (called from GPU thread) - void DebugNotifyNextFrame() { - if (!has_debug_tool) { - return; - } - frame_for_debug++; - std::lock_guard lock{debug_synch_mutex}; - debug_synch_condition.notify_one(); - } - - /// Wait for a new frame to be available (called from presentation thread) - void DebugWaitForNextFrame() { - if (!has_debug_tool) { - return; - } - const int last_frame = frame_for_debug; - std::unique_lock lock{debug_synch_mutex}; - debug_synch_condition.wait(lock, - [this, last_frame] { return frame_for_debug > last_frame; }); - } }; -RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system) +RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, + Core::Frontend::GraphicsContext& context) : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system}, - frame_mailbox{std::make_unique<FrameMailbox>()} {} + frame_mailbox{}, context{context}, has_debug_tool{HasDebugTool()} {} RendererOpenGL::~RendererOpenGL() = default; @@ -356,8 +324,6 @@ MICROPROFILE_DEFINE(OpenGL_RenderFrame, "OpenGL", "Render Frame", MP_RGB(128, 12 MICROPROFILE_DEFINE(OpenGL_WaitPresent, "OpenGL", "Wait For Present", MP_RGB(128, 128, 128)); void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - render_window.PollEvents(); - if (!framebuffer) { return; } @@ -413,6 +379,13 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { m_current_frame++; rasterizer->TickFrame(); } + + render_window.PollEvents(); + if (has_debug_tool) { + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + Present(0); + context.SwapBuffers(); + } } void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) { @@ -480,6 +453,8 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color } void RendererOpenGL::InitOpenGLObjects() { + frame_mailbox = std::make_unique<FrameMailbox>(); + glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f); @@ -692,12 +667,21 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); } -void RendererOpenGL::TryPresent(int timeout_ms) { +bool RendererOpenGL::TryPresent(int timeout_ms) { + if (has_debug_tool) { + LOG_DEBUG(Render_OpenGL, + "Skipping presentation because we are presenting on the main context"); + return false; + } + return Present(timeout_ms); +} + +bool RendererOpenGL::Present(int timeout_ms) { const auto& layout = render_window.GetFramebufferLayout(); auto frame = frame_mailbox->TryGetPresentFrame(timeout_ms); if (!frame) { LOG_DEBUG(Render_OpenGL, "TryGetPresentFrame returned no frame to present"); - return; + return false; } // Clearing before a full overwrite of a fbo can signal to drivers that they can avoid a @@ -725,6 +709,7 @@ void RendererOpenGL::TryPresent(int timeout_ms) { glFlush(); glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + return true; } void RendererOpenGL::RenderScreenshot() { diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 33073ce5b..50b647661 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -55,13 +55,14 @@ class FrameMailbox; class RendererOpenGL final : public VideoCore::RendererBase { public: - explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system); + explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, + Core::Frontend::GraphicsContext& context); ~RendererOpenGL() override; bool Init() override; void ShutDown() override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; - void TryPresent(int timeout_ms) override; + bool TryPresent(int timeout_ms) override; private: /// Initializes the OpenGL state and creates persistent objects. @@ -89,8 +90,11 @@ private: void PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer); + bool Present(int timeout_ms); + Core::Frontend::EmuWindow& emu_window; Core::System& system; + Core::Frontend::GraphicsContext& context; StateTracker state_tracker{system}; @@ -115,6 +119,8 @@ private: /// Frame presentation mailbox std::unique_ptr<FrameMailbox> frame_mailbox; + + bool has_debug_tool = false; }; } // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/declarations.h b/src/video_core/renderer_vulkan/declarations.h index 323bf6b39..89a035ca4 100644 --- a/src/video_core/renderer_vulkan/declarations.h +++ b/src/video_core/renderer_vulkan/declarations.h @@ -39,6 +39,7 @@ using UniqueFence = UniqueHandle<vk::Fence>; using UniqueFramebuffer = UniqueHandle<vk::Framebuffer>; using UniqueImage = UniqueHandle<vk::Image>; using UniqueImageView = UniqueHandle<vk::ImageView>; +using UniqueInstance = UniqueHandle<vk::Instance>; using UniqueIndirectCommandsLayoutNVX = UniqueHandle<vk::IndirectCommandsLayoutNVX>; using UniqueObjectTableNVX = UniqueHandle<vk::ObjectTableNVX>; using UniquePipeline = UniqueHandle<vk::Pipeline>; @@ -50,6 +51,7 @@ using UniqueSampler = UniqueHandle<vk::Sampler>; using UniqueSamplerYcbcrConversion = UniqueHandle<vk::SamplerYcbcrConversion>; using UniqueSemaphore = UniqueHandle<vk::Semaphore>; using UniqueShaderModule = UniqueHandle<vk::ShaderModule>; +using UniqueSurfaceKHR = UniqueHandle<vk::SurfaceKHR>; using UniqueSwapchainKHR = UniqueHandle<vk::SwapchainKHR>; using UniqueValidationCacheEXT = UniqueHandle<vk::ValidationCacheEXT>; using UniqueDebugReportCallbackEXT = UniqueHandle<vk::DebugReportCallbackEXT>; diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 42bb01418..9cdb4b627 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -2,13 +2,18 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> +#include <array> +#include <cstring> #include <memory> #include <optional> +#include <string> #include <vector> #include <fmt/format.h> #include "common/assert.h" +#include "common/dynamic_library.h" #include "common/logging/log.h" #include "common/telemetry.h" #include "core/core.h" @@ -30,15 +35,30 @@ #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_swapchain.h" +// Include these late to avoid changing Vulkan-Hpp's dynamic dispatcher size +#ifdef _WIN32 +#include <windows.h> +// ensure include order +#include <vulkan/vulkan_win32.h> +#endif + +#ifdef __linux__ +#include <X11/Xlib.h> +#include <vulkan/vulkan_wayland.h> +#include <vulkan/vulkan_xlib.h> +#endif + namespace Vulkan { namespace { +using Core::Frontend::WindowSystemType; + VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity_, VkDebugUtilsMessageTypeFlagsEXT type, const VkDebugUtilsMessengerCallbackDataEXT* data, [[maybe_unused]] void* user_data) { - const vk::DebugUtilsMessageSeverityFlagBitsEXT severity{severity_}; + const auto severity{static_cast<vk::DebugUtilsMessageSeverityFlagBitsEXT>(severity_)}; const char* message{data->pMessage}; if (severity & vk::DebugUtilsMessageSeverityFlagBitsEXT::eError) { @@ -53,6 +73,110 @@ VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity_, return VK_FALSE; } +Common::DynamicLibrary OpenVulkanLibrary() { + Common::DynamicLibrary library; +#ifdef __APPLE__ + // Check if a path to a specific Vulkan library has been specified. + char* libvulkan_env = getenv("LIBVULKAN_PATH"); + if (!libvulkan_env || !library.Open(libvulkan_env)) { + // Use the libvulkan.dylib from the application bundle. + std::string filename = File::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib"; + library.Open(filename.c_str()); + } +#else + std::string filename = Common::DynamicLibrary::GetVersionedFilename("vulkan", 1); + if (!library.Open(filename.c_str())) { + // Android devices may not have libvulkan.so.1, only libvulkan.so. + filename = Common::DynamicLibrary::GetVersionedFilename("vulkan"); + library.Open(filename.c_str()); + } +#endif + return library; +} + +UniqueInstance CreateInstance(Common::DynamicLibrary& library, vk::DispatchLoaderDynamic& dld, + WindowSystemType window_type = WindowSystemType::Headless, + bool enable_layers = false) { + if (!library.IsOpen()) { + LOG_ERROR(Render_Vulkan, "Vulkan library not available"); + return UniqueInstance{}; + } + PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr; + if (!library.GetSymbol("vkGetInstanceProcAddr", &vkGetInstanceProcAddr)) { + LOG_ERROR(Render_Vulkan, "vkGetInstanceProcAddr not present in Vulkan"); + return UniqueInstance{}; + } + dld.init(vkGetInstanceProcAddr); + + std::vector<const char*> extensions; + extensions.reserve(4); + switch (window_type) { + case Core::Frontend::WindowSystemType::Headless: + break; +#ifdef _WIN32 + case Core::Frontend::WindowSystemType::Windows: + extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME); + break; +#endif +#ifdef __linux__ + case Core::Frontend::WindowSystemType::X11: + extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME); + break; + case Core::Frontend::WindowSystemType::Wayland: + extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME); + break; +#endif + default: + LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); + break; + } + if (window_type != Core::Frontend::WindowSystemType::Headless) { + extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); + } + if (enable_layers) { + extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); + } + + u32 num_properties; + if (vk::enumerateInstanceExtensionProperties(nullptr, &num_properties, nullptr, dld) != + vk::Result::eSuccess) { + LOG_ERROR(Render_Vulkan, "Failed to query number of extension properties"); + return UniqueInstance{}; + } + std::vector<vk::ExtensionProperties> properties(num_properties); + if (vk::enumerateInstanceExtensionProperties(nullptr, &num_properties, properties.data(), + dld) != vk::Result::eSuccess) { + LOG_ERROR(Render_Vulkan, "Failed to query extension properties"); + return UniqueInstance{}; + } + + for (const char* extension : extensions) { + const auto it = + std::find_if(properties.begin(), properties.end(), [extension](const auto& prop) { + return !std::strcmp(extension, prop.extensionName); + }); + if (it == properties.end()) { + LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension); + return UniqueInstance{}; + } + } + + const vk::ApplicationInfo application_info("yuzu Emulator", VK_MAKE_VERSION(0, 1, 0), + "yuzu Emulator", VK_MAKE_VERSION(0, 1, 0), + VK_API_VERSION_1_1); + const std::array layers = {"VK_LAYER_LUNARG_standard_validation"}; + const vk::InstanceCreateInfo instance_ci( + {}, &application_info, enable_layers ? static_cast<u32>(layers.size()) : 0, layers.data(), + static_cast<u32>(extensions.size()), extensions.data()); + vk::Instance unsafe_instance; + if (vk::createInstance(&instance_ci, nullptr, &unsafe_instance, dld) != vk::Result::eSuccess) { + LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance"); + return UniqueInstance{}; + } + dld.init(unsafe_instance); + return UniqueInstance(unsafe_instance, {nullptr, dld}); +} + std::string GetReadableVersion(u32 version) { return fmt::format("{}.{}.{}", VK_VERSION_MAJOR(version), VK_VERSION_MINOR(version), VK_VERSION_PATCH(version)); @@ -141,32 +265,18 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { render_window.PollEvents(); } -void RendererVulkan::TryPresent(int /*timeout_ms*/) { +bool RendererVulkan::TryPresent(int /*timeout_ms*/) { // TODO (bunnei): ImplementMe + return true; } bool RendererVulkan::Init() { - PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr{}; - render_window.RetrieveVulkanHandlers(&vkGetInstanceProcAddr, &instance, &surface); - const vk::DispatchLoaderDynamic dldi(instance, vkGetInstanceProcAddr); - - std::optional<vk::DebugUtilsMessengerEXT> callback; - if (Settings::values.renderer_debug && dldi.vkCreateDebugUtilsMessengerEXT) { - callback = CreateDebugCallback(dldi); - if (!callback) { - return false; - } - } - - if (!PickDevices(dldi)) { - if (callback) { - instance.destroy(*callback, nullptr, dldi); - } + library = OpenVulkanLibrary(); + instance = CreateInstance(library, dld, render_window.GetWindowInfo().type, + Settings::values.renderer_debug); + if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) { return false; } - debug_callback = UniqueDebugUtilsMessengerEXT( - *callback, vk::ObjectDestroy<vk::Instance, vk::DispatchLoaderDynamic>( - instance, nullptr, device->GetDispatchLoader())); Report(); @@ -175,7 +285,7 @@ bool RendererVulkan::Init() { resource_manager = std::make_unique<VKResourceManager>(*device); const auto& framebuffer = render_window.GetFramebufferLayout(); - swapchain = std::make_unique<VKSwapchain>(surface, *device); + swapchain = std::make_unique<VKSwapchain>(*surface, *device); swapchain->Create(framebuffer.width, framebuffer.height, false); state_tracker = std::make_unique<StateTracker>(system); @@ -212,8 +322,10 @@ void RendererVulkan::ShutDown() { device.reset(); } -std::optional<vk::DebugUtilsMessengerEXT> RendererVulkan::CreateDebugCallback( - const vk::DispatchLoaderDynamic& dldi) { +bool RendererVulkan::CreateDebugCallback() { + if (!Settings::values.renderer_debug) { + return true; + } const vk::DebugUtilsMessengerCreateInfoEXT callback_ci( {}, vk::DebugUtilsMessageSeverityFlagBitsEXT::eError | @@ -224,32 +336,88 @@ std::optional<vk::DebugUtilsMessengerEXT> RendererVulkan::CreateDebugCallback( vk::DebugUtilsMessageTypeFlagBitsEXT::eValidation | vk::DebugUtilsMessageTypeFlagBitsEXT::ePerformance, &DebugCallback, nullptr); - vk::DebugUtilsMessengerEXT callback; - if (instance.createDebugUtilsMessengerEXT(&callback_ci, nullptr, &callback, dldi) != + vk::DebugUtilsMessengerEXT unsafe_callback; + if (instance->createDebugUtilsMessengerEXT(&callback_ci, nullptr, &unsafe_callback, dld) != vk::Result::eSuccess) { LOG_ERROR(Render_Vulkan, "Failed to create debug callback"); - return {}; + return false; + } + debug_callback = UniqueDebugUtilsMessengerEXT(unsafe_callback, {*instance, nullptr, dld}); + return true; +} + +bool RendererVulkan::CreateSurface() { + [[maybe_unused]] const auto& window_info = render_window.GetWindowInfo(); + VkSurfaceKHR unsafe_surface = nullptr; + +#ifdef _WIN32 + if (window_info.type == Core::Frontend::WindowSystemType::Windows) { + const HWND hWnd = static_cast<HWND>(window_info.render_surface); + const VkWin32SurfaceCreateInfoKHR win32_ci{VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR, + nullptr, 0, nullptr, hWnd}; + const auto vkCreateWin32SurfaceKHR = reinterpret_cast<PFN_vkCreateWin32SurfaceKHR>( + dld.vkGetInstanceProcAddr(*instance, "vkCreateWin32SurfaceKHR")); + if (!vkCreateWin32SurfaceKHR || vkCreateWin32SurfaceKHR(instance.get(), &win32_ci, nullptr, + &unsafe_surface) != VK_SUCCESS) { + LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface"); + return false; + } + } +#endif +#ifdef __linux__ + if (window_info.type == Core::Frontend::WindowSystemType::X11) { + const VkXlibSurfaceCreateInfoKHR xlib_ci{ + VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0, + static_cast<Display*>(window_info.display_connection), + reinterpret_cast<Window>(window_info.render_surface)}; + const auto vkCreateXlibSurfaceKHR = reinterpret_cast<PFN_vkCreateXlibSurfaceKHR>( + dld.vkGetInstanceProcAddr(*instance, "vkCreateXlibSurfaceKHR")); + if (!vkCreateXlibSurfaceKHR || vkCreateXlibSurfaceKHR(instance.get(), &xlib_ci, nullptr, + &unsafe_surface) != VK_SUCCESS) { + LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface"); + return false; + } + } + if (window_info.type == Core::Frontend::WindowSystemType::Wayland) { + const VkWaylandSurfaceCreateInfoKHR wayland_ci{ + VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, nullptr, 0, + static_cast<wl_display*>(window_info.display_connection), + static_cast<wl_surface*>(window_info.render_surface)}; + const auto vkCreateWaylandSurfaceKHR = reinterpret_cast<PFN_vkCreateWaylandSurfaceKHR>( + dld.vkGetInstanceProcAddr(*instance, "vkCreateWaylandSurfaceKHR")); + if (!vkCreateWaylandSurfaceKHR || + vkCreateWaylandSurfaceKHR(instance.get(), &wayland_ci, nullptr, &unsafe_surface) != + VK_SUCCESS) { + LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface"); + return false; + } + } +#endif + if (!unsafe_surface) { + LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform"); + return false; } - return callback; + + surface = UniqueSurfaceKHR(unsafe_surface, {*instance, nullptr, dld}); + return true; } -bool RendererVulkan::PickDevices(const vk::DispatchLoaderDynamic& dldi) { - const auto devices = instance.enumeratePhysicalDevices(dldi); +bool RendererVulkan::PickDevices() { + const auto devices = instance->enumeratePhysicalDevices(dld); - // TODO(Rodrigo): Choose device from config file const s32 device_index = Settings::values.vulkan_device; if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) { LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index); return false; } - const vk::PhysicalDevice physical_device = devices[device_index]; + const vk::PhysicalDevice physical_device = devices[static_cast<std::size_t>(device_index)]; - if (!VKDevice::IsSuitable(dldi, physical_device, surface)) { + if (!VKDevice::IsSuitable(physical_device, *surface, dld)) { return false; } - device = std::make_unique<VKDevice>(dldi, physical_device, surface); - return device->Create(dldi, instance); + device = std::make_unique<VKDevice>(dld, physical_device, *surface); + return device->Create(*instance); } void RendererVulkan::Report() const { @@ -275,4 +443,33 @@ void RendererVulkan::Report() const { telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); } +std::vector<std::string> RendererVulkan::EnumerateDevices() { + // Avoid putting DispatchLoaderDynamic, it's too large + auto dld_memory = std::make_unique<vk::DispatchLoaderDynamic>(); + auto& dld = *dld_memory; + + Common::DynamicLibrary library = OpenVulkanLibrary(); + UniqueInstance instance = CreateInstance(library, dld); + if (!instance) { + return {}; + } + + u32 num_devices; + if (instance->enumeratePhysicalDevices(&num_devices, nullptr, dld) != vk::Result::eSuccess) { + return {}; + } + std::vector<vk::PhysicalDevice> devices(num_devices); + if (instance->enumeratePhysicalDevices(&num_devices, devices.data(), dld) != + vk::Result::eSuccess) { + return {}; + } + + std::vector<std::string> names; + names.reserve(num_devices); + for (auto& device : devices) { + names.push_back(device.getProperties(dld).deviceName); + } + return names; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 3da08d2e4..42e253de5 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -6,8 +6,11 @@ #include <memory> #include <optional> +#include <string> #include <vector> +#include "common/dynamic_library.h" + #include "video_core/renderer_base.h" #include "video_core/renderer_vulkan/declarations.h" @@ -42,20 +45,26 @@ public: bool Init() override; void ShutDown() override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; - void TryPresent(int timeout_ms) override; + bool TryPresent(int timeout_ms) override; + + static std::vector<std::string> EnumerateDevices(); private: - std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback( - const vk::DispatchLoaderDynamic& dldi); + bool CreateDebugCallback(); - bool PickDevices(const vk::DispatchLoaderDynamic& dldi); + bool CreateSurface(); + + bool PickDevices(); void Report() const; Core::System& system; - vk::Instance instance; - vk::SurfaceKHR surface; + Common::DynamicLibrary library; + vk::DispatchLoaderDynamic dld; + + UniqueInstance instance; + UniqueSurfaceKHR surface; VKScreenInfo screen_info; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 1ba544943..326d74f29 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -42,8 +42,8 @@ auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { } // Anonymous namespace CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, - CacheAddr cache_addr, std::size_t size) - : VideoCommon::BufferBlock{cache_addr, size} { + VAddr cpu_addr, std::size_t size) + : VideoCommon::BufferBlock{cpu_addr, size} { const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size), BufferUsage | vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst, @@ -68,8 +68,8 @@ VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::S VKBufferCache::~VKBufferCache() = default; -Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { - return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size); +Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { + return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size); } const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) { diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 3f38eed0c..508214618 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -30,7 +30,7 @@ class VKScheduler; class CachedBufferBlock final : public VideoCommon::BufferBlock { public: explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, - CacheAddr cache_addr, std::size_t size); + VAddr cpu_addr, std::size_t size); ~CachedBufferBlock(); const vk::Buffer* GetHandle() const { @@ -55,7 +55,7 @@ public: protected: void WriteBarrier() override {} - Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; + Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; const vk::Buffer* ToHandle(const Buffer& buffer) override; diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 28d2fbc4f..6f4ae9132 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -10,6 +10,7 @@ #include <string_view> #include <thread> #include <vector> + #include "common/assert.h" #include "core/settings.h" #include "video_core/renderer_vulkan/declarations.h" @@ -35,20 +36,20 @@ void SetNext(void**& next, T& data) { } template <typename T> -T GetFeatures(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) { +T GetFeatures(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dld) { vk::PhysicalDeviceFeatures2 features; T extension_features; features.pNext = &extension_features; - physical.getFeatures2(&features, dldi); + physical.getFeatures2(&features, dld); return extension_features; } template <typename T> -T GetProperties(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dldi) { +T GetProperties(vk::PhysicalDevice physical, const vk::DispatchLoaderDynamic& dld) { vk::PhysicalDeviceProperties2 properties; T extension_properties; properties.pNext = &extension_properties; - physical.getProperties2(&properties, dldi); + physical.getProperties2(&properties, dld); return extension_properties; } @@ -78,19 +79,19 @@ vk::FormatFeatureFlags GetFormatFeatures(vk::FormatProperties properties, Format } // Anonymous namespace -VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, +VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical, vk::SurfaceKHR surface) - : physical{physical}, properties{physical.getProperties(dldi)}, - format_properties{GetFormatProperties(dldi, physical)} { - SetupFamilies(dldi, surface); - SetupFeatures(dldi); + : dld{dld}, physical{physical}, properties{physical.getProperties(dld)}, + format_properties{GetFormatProperties(dld, physical)} { + SetupFamilies(surface); + SetupFeatures(); } VKDevice::~VKDevice() = default; -bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) { +bool VKDevice::Create(vk::Instance instance) { const auto queue_cis = GetDeviceQueueCreateInfos(); - const std::vector extensions = LoadExtensions(dldi); + const std::vector extensions = LoadExtensions(); vk::PhysicalDeviceFeatures2 features2; void** next = &features2.pNext; @@ -165,15 +166,13 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan nullptr); device_ci.pNext = &features2; - vk::Device dummy_logical; - if (physical.createDevice(&device_ci, nullptr, &dummy_logical, dldi) != vk::Result::eSuccess) { + vk::Device unsafe_logical; + if (physical.createDevice(&device_ci, nullptr, &unsafe_logical, dld) != vk::Result::eSuccess) { LOG_CRITICAL(Render_Vulkan, "Logical device failed to be created!"); return false; } - - dld.init(instance, dldi.vkGetInstanceProcAddr, dummy_logical, dldi.vkGetDeviceProcAddr); - logical = UniqueDevice( - dummy_logical, vk::ObjectDestroy<vk::NoParent, vk::DispatchLoaderDynamic>(nullptr, dld)); + dld.init(instance, dld.vkGetInstanceProcAddr, unsafe_logical); + logical = UniqueDevice(unsafe_logical, {nullptr, dld}); CollectTelemetryParameters(); @@ -235,20 +234,23 @@ void VKDevice::ReportLoss() const { // *(VKGraphicsPipeline*)data[0] } -bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, - const vk::DispatchLoaderDynamic& dldi) const { +bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features) const { // Disable for now to avoid converting ASTC twice. - return false; static constexpr std::array astc_formats = { - vk::Format::eAstc4x4SrgbBlock, vk::Format::eAstc8x8SrgbBlock, - vk::Format::eAstc8x5SrgbBlock, vk::Format::eAstc5x4SrgbBlock, + vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock, + vk::Format::eAstc5x4UnormBlock, vk::Format::eAstc5x4SrgbBlock, vk::Format::eAstc5x5UnormBlock, vk::Format::eAstc5x5SrgbBlock, - vk::Format::eAstc10x8UnormBlock, vk::Format::eAstc10x8SrgbBlock, + vk::Format::eAstc6x5UnormBlock, vk::Format::eAstc6x5SrgbBlock, vk::Format::eAstc6x6UnormBlock, vk::Format::eAstc6x6SrgbBlock, - vk::Format::eAstc10x10UnormBlock, vk::Format::eAstc10x10SrgbBlock, - vk::Format::eAstc12x12UnormBlock, vk::Format::eAstc12x12SrgbBlock, + vk::Format::eAstc8x5UnormBlock, vk::Format::eAstc8x5SrgbBlock, vk::Format::eAstc8x6UnormBlock, vk::Format::eAstc8x6SrgbBlock, - vk::Format::eAstc6x5UnormBlock, vk::Format::eAstc6x5SrgbBlock}; + vk::Format::eAstc8x8UnormBlock, vk::Format::eAstc8x8SrgbBlock, + vk::Format::eAstc10x5UnormBlock, vk::Format::eAstc10x5SrgbBlock, + vk::Format::eAstc10x6UnormBlock, vk::Format::eAstc10x6SrgbBlock, + vk::Format::eAstc10x8UnormBlock, vk::Format::eAstc10x8SrgbBlock, + vk::Format::eAstc10x10UnormBlock, vk::Format::eAstc10x10SrgbBlock, + vk::Format::eAstc12x10UnormBlock, vk::Format::eAstc12x10SrgbBlock, + vk::Format::eAstc12x12UnormBlock, vk::Format::eAstc12x12SrgbBlock}; if (!features.textureCompressionASTC_LDR) { return false; } @@ -257,7 +259,7 @@ bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc | vk::FormatFeatureFlagBits::eTransferDst}; for (const auto format : astc_formats) { - const auto format_properties{physical.getFormatProperties(format, dldi)}; + const auto format_properties{physical.getFormatProperties(format, dld)}; if (!(format_properties.optimalTilingFeatures & format_feature_usage)) { return false; } @@ -276,11 +278,9 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag return (supported_usage & wanted_usage) == wanted_usage; } -bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, - vk::SurfaceKHR surface) { - bool is_suitable = true; - - constexpr std::array required_extensions = { +bool VKDevice::IsSuitable(vk::PhysicalDevice physical, vk::SurfaceKHR surface, + const vk::DispatchLoaderDynamic& dld) { + static constexpr std::array required_extensions = { VK_KHR_SWAPCHAIN_EXTENSION_NAME, VK_KHR_16BIT_STORAGE_EXTENSION_NAME, VK_KHR_8BIT_STORAGE_EXTENSION_NAME, @@ -290,9 +290,10 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, }; + bool is_suitable = true; std::bitset<required_extensions.size()> available_extensions{}; - for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { + for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dld)) { for (std::size_t i = 0; i < required_extensions.size(); ++i) { if (available_extensions[i]) { continue; @@ -312,7 +313,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev } bool has_graphics{}, has_present{}; - const auto queue_family_properties = physical.getQueueFamilyProperties(dldi); + const auto queue_family_properties = physical.getQueueFamilyProperties(dld); for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { const auto& family = queue_family_properties[i]; if (family.queueCount == 0) { @@ -320,7 +321,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev } has_graphics |= (family.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast<vk::QueueFlagBits>(0); - has_present |= physical.getSurfaceSupportKHR(i, surface, dldi) != 0; + has_present |= physical.getSurfaceSupportKHR(i, surface, dld) != 0; } if (!has_graphics || !has_present) { LOG_ERROR(Render_Vulkan, "Device lacks a graphics and present queue"); @@ -328,7 +329,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev } // TODO(Rodrigo): Check if the device matches all requeriments. - const auto properties{physical.getProperties(dldi)}; + const auto properties{physical.getProperties(dld)}; const auto& limits{properties.limits}; constexpr u32 required_ubo_size = 65536; @@ -345,7 +346,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev is_suitable = false; } - const auto features{physical.getFeatures(dldi)}; + const auto features{physical.getFeatures(dld)}; const std::array feature_report = { std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), std::make_pair(features.independentBlend, "independentBlend"), @@ -377,7 +378,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev return is_suitable; } -std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) { +std::vector<const char*> VKDevice::LoadExtensions() { std::vector<const char*> extensions; const auto Test = [&](const vk::ExtensionProperties& extension, std::optional<std::reference_wrapper<bool>> status, const char* name, @@ -408,7 +409,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami bool has_khr_shader_float16_int8{}; bool has_ext_subgroup_size_control{}; bool has_ext_transform_feedback{}; - for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { + for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dld)) { Test(extension, khr_uniform_buffer_standard_layout, VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, @@ -430,15 +431,15 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami if (has_khr_shader_float16_int8) { is_float16_supported = - GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16; + GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dld).shaderFloat16; extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); } if (has_ext_subgroup_size_control) { const auto features = - GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi); + GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dld); const auto properties = - GetProperties<vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT>(physical, dldi); + GetProperties<vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT>(physical, dld); is_warp_potentially_bigger = properties.maxSubgroupSize > GuestWarpSize; @@ -453,9 +454,9 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami if (has_ext_transform_feedback) { const auto features = - GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi); + GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dld); const auto properties = - GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi); + GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dld); if (features.transformFeedback && features.geometryStreams && properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers && @@ -468,10 +469,10 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami return extensions; } -void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) { +void VKDevice::SetupFamilies(vk::SurfaceKHR surface) { std::optional<u32> graphics_family_, present_family_; - const auto queue_family_properties = physical.getQueueFamilyProperties(dldi); + const auto queue_family_properties = physical.getQueueFamilyProperties(dld); for (u32 i = 0; i < static_cast<u32>(queue_family_properties.size()); ++i) { if (graphics_family_ && present_family_) break; @@ -480,10 +481,12 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK if (queue_family.queueCount == 0) continue; - if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics) + if (queue_family.queueFlags & vk::QueueFlagBits::eGraphics) { graphics_family_ = i; - if (physical.getSurfaceSupportKHR(i, surface, dldi)) + } + if (physical.getSurfaceSupportKHR(i, surface, dld)) { present_family_ = i; + } } ASSERT(graphics_family_ && present_family_); @@ -491,10 +494,10 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK present_family = *present_family_; } -void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { - const auto supported_features{physical.getFeatures(dldi)}; +void VKDevice::SetupFeatures() { + const auto supported_features{physical.getFeatures(dld)}; is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; - is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); + is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); } void VKDevice::CollectTelemetryParameters() { @@ -522,7 +525,7 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con } std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties( - const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) { + const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical) { static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32, vk::Format::eA8B8G8R8UintPack32, vk::Format::eA8B8G8R8SnormPack32, @@ -572,28 +575,38 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti vk::Format::eBc2SrgbBlock, vk::Format::eBc3SrgbBlock, vk::Format::eBc7SrgbBlock, + vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock, - vk::Format::eAstc8x8SrgbBlock, - vk::Format::eAstc8x5SrgbBlock, + vk::Format::eAstc5x4UnormBlock, vk::Format::eAstc5x4SrgbBlock, vk::Format::eAstc5x5UnormBlock, vk::Format::eAstc5x5SrgbBlock, - vk::Format::eAstc10x8UnormBlock, - vk::Format::eAstc10x8SrgbBlock, + vk::Format::eAstc6x5UnormBlock, + vk::Format::eAstc6x5SrgbBlock, vk::Format::eAstc6x6UnormBlock, vk::Format::eAstc6x6SrgbBlock, + vk::Format::eAstc8x5UnormBlock, + vk::Format::eAstc8x5SrgbBlock, + vk::Format::eAstc8x6UnormBlock, + vk::Format::eAstc8x6SrgbBlock, + vk::Format::eAstc8x8UnormBlock, + vk::Format::eAstc8x8SrgbBlock, + vk::Format::eAstc10x5UnormBlock, + vk::Format::eAstc10x5SrgbBlock, + vk::Format::eAstc10x6UnormBlock, + vk::Format::eAstc10x6SrgbBlock, + vk::Format::eAstc10x8UnormBlock, + vk::Format::eAstc10x8SrgbBlock, vk::Format::eAstc10x10UnormBlock, vk::Format::eAstc10x10SrgbBlock, + vk::Format::eAstc12x10UnormBlock, + vk::Format::eAstc12x10SrgbBlock, vk::Format::eAstc12x12UnormBlock, vk::Format::eAstc12x12SrgbBlock, - vk::Format::eAstc8x6UnormBlock, - vk::Format::eAstc8x6SrgbBlock, - vk::Format::eAstc6x5UnormBlock, - vk::Format::eAstc6x5SrgbBlock, vk::Format::eE5B9G9R9UfloatPack32}; std::unordered_map<vk::Format, vk::FormatProperties> format_properties; for (const auto format : formats) { - format_properties.emplace(format, physical.getFormatProperties(format, dldi)); + format_properties.emplace(format, physical.getFormatProperties(format, dld)); } return format_properties; } diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 6e656517f..d9d809852 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -22,12 +22,12 @@ const u32 GuestWarpSize = 32; /// Handles data specific to a physical device. class VKDevice final { public: - explicit VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, + explicit VKDevice(const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical, vk::SurfaceKHR surface); ~VKDevice(); /// Initializes the device. Returns true on success. - bool Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance); + bool Create(vk::Instance instance); /** * Returns a format supported by the device for the passed requeriments. @@ -188,18 +188,18 @@ public: } /// Checks if the physical device is suitable. - static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, - vk::SurfaceKHR surface); + static bool IsSuitable(vk::PhysicalDevice physical, vk::SurfaceKHR surface, + const vk::DispatchLoaderDynamic& dld); private: /// Loads extensions into a vector and stores available ones in this object. - std::vector<const char*> LoadExtensions(const vk::DispatchLoaderDynamic& dldi); + std::vector<const char*> LoadExtensions(); /// Sets up queue families. - void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface); + void SetupFamilies(vk::SurfaceKHR surface); /// Sets up device features. - void SetupFeatures(const vk::DispatchLoaderDynamic& dldi); + void SetupFeatures(); /// Collects telemetry information from the device. void CollectTelemetryParameters(); @@ -208,8 +208,7 @@ private: std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; /// Returns true if ASTC textures are natively supported. - bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, - const vk::DispatchLoaderDynamic& dldi) const; + bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features) const; /// Returns true if a format is supported. bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, @@ -217,10 +216,10 @@ private: /// Returns the device properties for Vulkan formats. static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties( - const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); + const vk::DispatchLoaderDynamic& dld, vk::PhysicalDevice physical); - const vk::PhysicalDevice physical; ///< Physical device. vk::DispatchLoaderDynamic dld; ///< Device function pointers. + vk::PhysicalDevice physical; ///< Physical device. vk::PhysicalDeviceProperties properties; ///< Device properties. UniqueDevice logical; ///< Logical device. vk::Queue graphics_queue; ///< Main graphics queue. diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 557b9d662..c2a426aeb 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -158,11 +158,11 @@ u32 FillDescriptorLayout(const ShaderEntries& entries, } // Anonymous namespace CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, - GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, - ProgramCode program_code, u32 main_offset) - : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, - program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)}, - shader_ir{this->program_code, main_offset, compiler_settings, registry}, + GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code, + u32 main_offset) + : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)}, + registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset, + compiler_settings, registry}, entries{GenerateShaderEntries(shader_ir)} {} CachedShader::~CachedShader() = default; @@ -201,19 +201,19 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { auto& memory_manager{system.GPU().MemoryManager()}; const GPUVAddr program_addr{GetShaderAddress(system, program)}; - const auto host_ptr{memory_manager.GetPointer(program_addr)}; - auto shader = TryGet(host_ptr); + const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); + ASSERT(cpu_addr); + auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr; if (!shader) { + const auto host_ptr{memory_manager.GetPointer(program_addr)}; + // No shader found - create a new one constexpr u32 stage_offset = 10; const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1); auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false); - const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); - ASSERT(cpu_addr); - shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, - host_ptr, std::move(code), stage_offset); + std::move(code), stage_offset); Register(shader); } shaders[index] = std::move(shader); @@ -253,18 +253,19 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach auto& memory_manager = system.GPU().MemoryManager(); const auto program_addr = key.shader; - const auto host_ptr = memory_manager.GetPointer(program_addr); - auto shader = TryGet(host_ptr); + const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); + ASSERT(cpu_addr); + + auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr; if (!shader) { // No shader found - create a new one - const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); - ASSERT(cpu_addr); + const auto host_ptr = memory_manager.GetPointer(program_addr); auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true); constexpr u32 kernel_main_offset = 0; shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, - program_addr, *cpu_addr, host_ptr, std::move(code), + program_addr, *cpu_addr, std::move(code), kernel_main_offset); Register(shader); } @@ -345,8 +346,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { } const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); - const auto host_ptr = memory_manager.GetPointer(gpu_addr); - const auto shader = TryGet(host_ptr); + const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); + ASSERT(cpu_addr); + const auto shader = TryGet(*cpu_addr); ASSERT(shader); const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index c4c112290..27c01732f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -113,17 +113,13 @@ namespace Vulkan { class CachedShader final : public RasterizerCacheObject { public: explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, - VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset); + VAddr cpu_addr, ProgramCode program_code, u32 main_offset); ~CachedShader(); GPUVAddr GetGpuAddr() const { return gpu_addr; } - VAddr GetCpuAddr() const override { - return cpu_addr; - } - std::size_t GetSizeInBytes() const override { return program_code.size() * sizeof(u64); } @@ -149,7 +145,6 @@ private: Tegra::Engines::ShaderType stage); GPUVAddr gpu_addr{}; - VAddr cpu_addr{}; ProgramCode program_code; VideoCommon::Shader::Registry registry; VideoCommon::Shader::ShaderIR shader_ir; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 58c69b786..0a2ea4fd4 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -495,20 +495,26 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, void RasterizerVulkan::FlushAll() {} -void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { +void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { + if (addr == 0 || size == 0) { + return; + } texture_cache.FlushRegion(addr, size); buffer_cache.FlushRegion(addr, size); query_cache.FlushRegion(addr, size); } -void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { +void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { + if (addr == 0 || size == 0) { + return; + } texture_cache.InvalidateRegion(addr, size); pipeline_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); query_cache.InvalidateRegion(addr, size); } -void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { +void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) { FlushRegion(addr, size); InvalidateRegion(addr, size); } @@ -540,8 +546,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, return false; } - const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)}; - const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)}; + const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; if (!surface) { return false; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 3185868e9..f642dde76 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -118,9 +118,9 @@ public: void ResetCounter(VideoCore::QueryType type) override; void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; void FlushAll() override; - void FlushRegion(CacheAddr addr, u64 size) override; - void InvalidateRegion(CacheAddr addr, u64 size) override; - void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void FlushRegion(VAddr addr, u64 size) override; + void InvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void FlushCommands() override; void TickFrame() override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 51ecb5567..b9f9e2714 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -35,7 +35,7 @@ namespace { using Sirit::Id; using Tegra::Engines::ShaderType; using Tegra::Shader::Attribute; -using Tegra::Shader::AttributeUse; +using Tegra::Shader::PixelImap; using Tegra::Shader::Register; using namespace VideoCommon::Shader; @@ -752,16 +752,16 @@ private: if (stage != ShaderType::Fragment) { continue; } - switch (header.ps.GetAttributeUse(location)) { - case AttributeUse::Constant: + switch (header.ps.GetPixelImap(location)) { + case PixelImap::Constant: Decorate(id, spv::Decoration::Flat); break; - case AttributeUse::ScreenLinear: - Decorate(id, spv::Decoration::NoPerspective); - break; - case AttributeUse::Perspective: + case PixelImap::Perspective: // Default break; + case PixelImap::ScreenLinear: + Decorate(id, spv::Decoration::NoPerspective); + break; default: UNREACHABLE_MSG("Unused attribute being fetched"); } @@ -1145,9 +1145,6 @@ private: switch (attribute) { case Attribute::Index::Position: { if (stage == ShaderType::Fragment) { - if (element == 3) { - return {Constant(t_float, 1.0f), Type::Float}; - } return {OpLoad(t_float, AccessElement(t_in_float, frag_coord, element)), Type::Float}; } @@ -1941,7 +1938,11 @@ private: return {}; } - Expression AtomicAdd(Operation operation) { + template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type, + Type value_type = result_type> + Expression Atomic(Operation operation) { + const Id type_def = GetTypeDefinition(result_type); + Id pointer; if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { pointer = GetSharedMemoryPointer(*smem); @@ -1949,14 +1950,15 @@ private: pointer = GetGlobalMemoryPointer(*gmem); } else { UNREACHABLE(); - return {Constant(t_uint, 0), Type::Uint}; + return {Constant(type_def, 0), result_type}; } + const Id value = As(Visit(operation[1]), value_type); + const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); - const Id semantics = Constant(t_uint, 0U); + const Id semantics = Constant(type_def, 0); - const Id value = AsUint(Visit(operation[1])); - return {OpAtomicIAdd(t_uint, pointer, scope, semantics, value), Type::Uint}; + return {(this->*func)(type_def, pointer, scope, semantics, value), result_type}; } Expression Branch(Operation operation) { @@ -2545,7 +2547,21 @@ private: &SPIRVDecompiler::AtomicImageXor, &SPIRVDecompiler::AtomicImageExchange, - &SPIRVDecompiler::AtomicAdd, + &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Uint>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Uint>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin, Type::Uint>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax, Type::Uint>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Uint>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Uint>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Uint>, + + &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Int>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Int>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin, Type::Int>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax, Type::Int>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Int>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Int>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Int>, &SPIRVDecompiler::Branch, &SPIRVDecompiler::BranchIndirect, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 26175921b..5b9b39670 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -35,7 +35,6 @@ using VideoCore::MortonSwizzleMode; using Tegra::Texture::SwizzleSource; using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::SurfaceCompression; using VideoCore::Surface::SurfaceTarget; namespace { @@ -96,9 +95,10 @@ vk::ImageViewType GetImageViewType(SurfaceTarget target) { return {}; } -UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params) { +UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params, + std::size_t host_memory_size) { // TODO(Rodrigo): Move texture buffer creation to the buffer cache - const vk::BufferCreateInfo buffer_ci({}, params.GetHostSizeInBytes(), + const vk::BufferCreateInfo buffer_ci({}, host_memory_size, vk::BufferUsageFlagBits::eUniformTexelBuffer | vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst, @@ -110,12 +110,13 @@ UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params) { vk::BufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device, const SurfaceParams& params, - vk::Buffer buffer) { + vk::Buffer buffer, + std::size_t host_memory_size) { ASSERT(params.IsBuffer()); const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format; - return vk::BufferViewCreateInfo({}, buffer, format, 0, params.GetHostSizeInBytes()); + return vk::BufferViewCreateInfo({}, buffer, format, 0, host_memory_size); } vk::ImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) { @@ -169,14 +170,15 @@ CachedSurface::CachedSurface(Core::System& system, const VKDevice& device, VKResourceManager& resource_manager, VKMemoryManager& memory_manager, VKScheduler& scheduler, VKStagingBufferPool& staging_pool, GPUVAddr gpu_addr, const SurfaceParams& params) - : SurfaceBase<View>{gpu_addr, params}, system{system}, device{device}, - resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler}, - staging_pool{staging_pool} { + : SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, system{system}, + device{device}, resource_manager{resource_manager}, + memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} { if (params.IsBuffer()) { - buffer = CreateBuffer(device, params); + buffer = CreateBuffer(device, params, host_memory_size); commit = memory_manager.Commit(*buffer, false); - const auto buffer_view_ci = GenerateBufferViewCreateInfo(device, params, *buffer); + const auto buffer_view_ci = + GenerateBufferViewCreateInfo(device, params, *buffer, host_memory_size); format = buffer_view_ci.format; const auto dev = device.GetLogical(); @@ -255,7 +257,7 @@ void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) { std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size); scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer, - size = params.GetHostSizeInBytes()](auto cmdbuf, auto& dld) { + size = host_memory_size](auto cmdbuf, auto& dld) { const vk::BufferCopy copy(0, 0, size); cmdbuf.copyBuffer(src_buffer, dst_buffer, {copy}, dld); @@ -299,10 +301,7 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) { vk::BufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const { const u32 vk_depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1; - const auto compression_type = params.GetCompressionType(); - const std::size_t mip_offset = compression_type == SurfaceCompression::Converted - ? params.GetConvertedMipmapOffset(level) - : params.GetHostMipmapLevelOffset(level); + const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level, is_converted); return vk::BufferImageCopy( mip_offset, 0, 0, @@ -390,8 +389,9 @@ VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterf const VKDevice& device, VKResourceManager& resource_manager, VKMemoryManager& memory_manager, VKScheduler& scheduler, VKStagingBufferPool& staging_pool) - : TextureCache(system, rasterizer), device{device}, resource_manager{resource_manager}, - memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {} + : TextureCache(system, rasterizer, device.IsOptimalAstcSupported()), device{device}, + resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler}, + staging_pool{staging_pool} {} VKTextureCache::~VKTextureCache() = default; diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp new file mode 100644 index 000000000..9b94dfff1 --- /dev/null +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -0,0 +1,750 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <exception> +#include <memory> +#include <optional> +#include <utility> +#include <vector> + +#include "common/common_types.h" + +#include "video_core/renderer_vulkan/wrapper.h" + +namespace Vulkan::vk { + +namespace { + +template <typename T> +bool Proc(T& result, const InstanceDispatch& dld, const char* proc_name, + VkInstance instance = nullptr) noexcept { + result = reinterpret_cast<T>(dld.vkGetInstanceProcAddr(instance, proc_name)); + return result != nullptr; +} + +template <typename T> +void Proc(T& result, const DeviceDispatch& dld, const char* proc_name, VkDevice device) noexcept { + result = reinterpret_cast<T>(dld.vkGetDeviceProcAddr(device, proc_name)); +} + +void Load(VkDevice device, DeviceDispatch& dld) noexcept { +#define X(name) Proc(dld.name, dld, #name, device) + X(vkAcquireNextImageKHR); + X(vkAllocateCommandBuffers); + X(vkAllocateDescriptorSets); + X(vkAllocateMemory); + X(vkBeginCommandBuffer); + X(vkBindBufferMemory); + X(vkBindImageMemory); + X(vkCmdBeginQuery); + X(vkCmdBeginRenderPass); + X(vkCmdBeginTransformFeedbackEXT); + X(vkCmdBindDescriptorSets); + X(vkCmdBindIndexBuffer); + X(vkCmdBindPipeline); + X(vkCmdBindTransformFeedbackBuffersEXT); + X(vkCmdBindVertexBuffers); + X(vkCmdBlitImage); + X(vkCmdClearAttachments); + X(vkCmdCopyBuffer); + X(vkCmdCopyBufferToImage); + X(vkCmdCopyImage); + X(vkCmdCopyImageToBuffer); + X(vkCmdDispatch); + X(vkCmdDraw); + X(vkCmdDrawIndexed); + X(vkCmdEndQuery); + X(vkCmdEndRenderPass); + X(vkCmdEndTransformFeedbackEXT); + X(vkCmdFillBuffer); + X(vkCmdPipelineBarrier); + X(vkCmdPushConstants); + X(vkCmdSetBlendConstants); + X(vkCmdSetCheckpointNV); + X(vkCmdSetDepthBias); + X(vkCmdSetDepthBounds); + X(vkCmdSetScissor); + X(vkCmdSetStencilCompareMask); + X(vkCmdSetStencilReference); + X(vkCmdSetStencilWriteMask); + X(vkCmdSetViewport); + X(vkCreateBuffer); + X(vkCreateBufferView); + X(vkCreateCommandPool); + X(vkCreateComputePipelines); + X(vkCreateDescriptorPool); + X(vkCreateDescriptorSetLayout); + X(vkCreateDescriptorUpdateTemplateKHR); + X(vkCreateFence); + X(vkCreateFramebuffer); + X(vkCreateGraphicsPipelines); + X(vkCreateImage); + X(vkCreateImageView); + X(vkCreatePipelineLayout); + X(vkCreateQueryPool); + X(vkCreateRenderPass); + X(vkCreateSampler); + X(vkCreateSemaphore); + X(vkCreateShaderModule); + X(vkCreateSwapchainKHR); + X(vkDestroyBuffer); + X(vkDestroyBufferView); + X(vkDestroyCommandPool); + X(vkDestroyDescriptorPool); + X(vkDestroyDescriptorSetLayout); + X(vkDestroyDescriptorUpdateTemplateKHR); + X(vkDestroyFence); + X(vkDestroyFramebuffer); + X(vkDestroyImage); + X(vkDestroyImageView); + X(vkDestroyPipeline); + X(vkDestroyPipelineLayout); + X(vkDestroyQueryPool); + X(vkDestroyRenderPass); + X(vkDestroySampler); + X(vkDestroySemaphore); + X(vkDestroyShaderModule); + X(vkDestroySwapchainKHR); + X(vkDeviceWaitIdle); + X(vkEndCommandBuffer); + X(vkFreeCommandBuffers); + X(vkFreeDescriptorSets); + X(vkFreeMemory); + X(vkGetBufferMemoryRequirements); + X(vkGetDeviceQueue); + X(vkGetFenceStatus); + X(vkGetImageMemoryRequirements); + X(vkGetQueryPoolResults); + X(vkGetQueueCheckpointDataNV); + X(vkMapMemory); + X(vkQueueSubmit); + X(vkResetFences); + X(vkResetQueryPoolEXT); + X(vkUnmapMemory); + X(vkUpdateDescriptorSetWithTemplateKHR); + X(vkUpdateDescriptorSets); + X(vkWaitForFences); +#undef X +} + +} // Anonymous namespace + +bool Load(InstanceDispatch& dld) noexcept { +#define X(name) Proc(dld.name, dld, #name) + return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties); +#undef X +} + +bool Load(VkInstance instance, InstanceDispatch& dld) noexcept { +#define X(name) Proc(dld.name, dld, #name, instance) + // These functions may fail to load depending on the enabled extensions. + // Don't return a failure on these. + X(vkCreateDebugUtilsMessengerEXT); + X(vkDestroyDebugUtilsMessengerEXT); + X(vkDestroySurfaceKHR); + X(vkGetPhysicalDeviceFeatures2KHR); + X(vkGetPhysicalDeviceProperties2KHR); + X(vkGetPhysicalDeviceSurfaceCapabilitiesKHR); + X(vkGetPhysicalDeviceSurfaceFormatsKHR); + X(vkGetPhysicalDeviceSurfacePresentModesKHR); + X(vkGetPhysicalDeviceSurfaceSupportKHR); + X(vkGetSwapchainImagesKHR); + X(vkQueuePresentKHR); + + return X(vkCreateDevice) && X(vkDestroyDevice) && X(vkDestroyDevice) && + X(vkEnumerateDeviceExtensionProperties) && X(vkEnumeratePhysicalDevices) && + X(vkGetDeviceProcAddr) && X(vkGetPhysicalDeviceFormatProperties) && + X(vkGetPhysicalDeviceMemoryProperties) && X(vkGetPhysicalDeviceProperties) && + X(vkGetPhysicalDeviceQueueFamilyProperties); +#undef X +} + +const char* Exception::what() const noexcept { + return ToString(result); +} + +const char* ToString(VkResult result) noexcept { + switch (result) { + case VkResult::VK_SUCCESS: + return "VK_SUCCESS"; + case VkResult::VK_NOT_READY: + return "VK_NOT_READY"; + case VkResult::VK_TIMEOUT: + return "VK_TIMEOUT"; + case VkResult::VK_EVENT_SET: + return "VK_EVENT_SET"; + case VkResult::VK_EVENT_RESET: + return "VK_EVENT_RESET"; + case VkResult::VK_INCOMPLETE: + return "VK_INCOMPLETE"; + case VkResult::VK_ERROR_OUT_OF_HOST_MEMORY: + return "VK_ERROR_OUT_OF_HOST_MEMORY"; + case VkResult::VK_ERROR_OUT_OF_DEVICE_MEMORY: + return "VK_ERROR_OUT_OF_DEVICE_MEMORY"; + case VkResult::VK_ERROR_INITIALIZATION_FAILED: + return "VK_ERROR_INITIALIZATION_FAILED"; + case VkResult::VK_ERROR_DEVICE_LOST: + return "VK_ERROR_DEVICE_LOST"; + case VkResult::VK_ERROR_MEMORY_MAP_FAILED: + return "VK_ERROR_MEMORY_MAP_FAILED"; + case VkResult::VK_ERROR_LAYER_NOT_PRESENT: + return "VK_ERROR_LAYER_NOT_PRESENT"; + case VkResult::VK_ERROR_EXTENSION_NOT_PRESENT: + return "VK_ERROR_EXTENSION_NOT_PRESENT"; + case VkResult::VK_ERROR_FEATURE_NOT_PRESENT: + return "VK_ERROR_FEATURE_NOT_PRESENT"; + case VkResult::VK_ERROR_INCOMPATIBLE_DRIVER: + return "VK_ERROR_INCOMPATIBLE_DRIVER"; + case VkResult::VK_ERROR_TOO_MANY_OBJECTS: + return "VK_ERROR_TOO_MANY_OBJECTS"; + case VkResult::VK_ERROR_FORMAT_NOT_SUPPORTED: + return "VK_ERROR_FORMAT_NOT_SUPPORTED"; + case VkResult::VK_ERROR_FRAGMENTED_POOL: + return "VK_ERROR_FRAGMENTED_POOL"; + case VkResult::VK_ERROR_OUT_OF_POOL_MEMORY: + return "VK_ERROR_OUT_OF_POOL_MEMORY"; + case VkResult::VK_ERROR_INVALID_EXTERNAL_HANDLE: + return "VK_ERROR_INVALID_EXTERNAL_HANDLE"; + case VkResult::VK_ERROR_SURFACE_LOST_KHR: + return "VK_ERROR_SURFACE_LOST_KHR"; + case VkResult::VK_ERROR_NATIVE_WINDOW_IN_USE_KHR: + return "VK_ERROR_NATIVE_WINDOW_IN_USE_KHR"; + case VkResult::VK_SUBOPTIMAL_KHR: + return "VK_SUBOPTIMAL_KHR"; + case VkResult::VK_ERROR_OUT_OF_DATE_KHR: + return "VK_ERROR_OUT_OF_DATE_KHR"; + case VkResult::VK_ERROR_INCOMPATIBLE_DISPLAY_KHR: + return "VK_ERROR_INCOMPATIBLE_DISPLAY_KHR"; + case VkResult::VK_ERROR_VALIDATION_FAILED_EXT: + return "VK_ERROR_VALIDATION_FAILED_EXT"; + case VkResult::VK_ERROR_INVALID_SHADER_NV: + return "VK_ERROR_INVALID_SHADER_NV"; + case VkResult::VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT: + return "VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT"; + case VkResult::VK_ERROR_FRAGMENTATION_EXT: + return "VK_ERROR_FRAGMENTATION_EXT"; + case VkResult::VK_ERROR_NOT_PERMITTED_EXT: + return "VK_ERROR_NOT_PERMITTED_EXT"; + case VkResult::VK_ERROR_INVALID_DEVICE_ADDRESS_EXT: + return "VK_ERROR_INVALID_DEVICE_ADDRESS_EXT"; + case VkResult::VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT: + return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT"; + } + return "Unknown"; +} + +void Destroy(VkInstance instance, const InstanceDispatch& dld) noexcept { + dld.vkDestroyInstance(instance, nullptr); +} + +void Destroy(VkDevice device, const InstanceDispatch& dld) noexcept { + dld.vkDestroyDevice(device, nullptr); +} + +void Destroy(VkDevice device, VkBuffer handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyBuffer(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkBufferView handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyBufferView(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkCommandPool handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyCommandPool(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkDescriptorPool handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyDescriptorPool(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkDescriptorSetLayout handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyDescriptorSetLayout(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkDescriptorUpdateTemplateKHR handle, + const DeviceDispatch& dld) noexcept { + dld.vkDestroyDescriptorUpdateTemplateKHR(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkDeviceMemory handle, const DeviceDispatch& dld) noexcept { + dld.vkFreeMemory(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkFence handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyFence(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkFramebuffer handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyFramebuffer(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkImage handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyImage(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkImageView handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyImageView(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkPipeline handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyPipeline(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkPipelineLayout handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyPipelineLayout(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkQueryPool handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyQueryPool(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkRenderPass handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyRenderPass(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkSampler handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroySampler(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkSwapchainKHR handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroySwapchainKHR(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkSemaphore handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroySemaphore(device, handle, nullptr); +} + +void Destroy(VkDevice device, VkShaderModule handle, const DeviceDispatch& dld) noexcept { + dld.vkDestroyShaderModule(device, handle, nullptr); +} + +void Destroy(VkInstance instance, VkDebugUtilsMessengerEXT handle, + const InstanceDispatch& dld) noexcept { + dld.vkDestroyDebugUtilsMessengerEXT(instance, handle, nullptr); +} + +void Destroy(VkInstance instance, VkSurfaceKHR handle, const InstanceDispatch& dld) noexcept { + dld.vkDestroySurfaceKHR(instance, handle, nullptr); +} + +VkResult Free(VkDevice device, VkDescriptorPool handle, Span<VkDescriptorSet> sets, + const DeviceDispatch& dld) noexcept { + return dld.vkFreeDescriptorSets(device, handle, sets.size(), sets.data()); +} + +VkResult Free(VkDevice device, VkCommandPool handle, Span<VkCommandBuffer> buffers, + const DeviceDispatch& dld) noexcept { + dld.vkFreeCommandBuffers(device, handle, buffers.size(), buffers.data()); + return VK_SUCCESS; +} + +Instance Instance::Create(Span<const char*> layers, Span<const char*> extensions, + InstanceDispatch& dld) noexcept { + VkApplicationInfo application_info; + application_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + application_info.pNext = nullptr; + application_info.pApplicationName = "yuzu Emulator"; + application_info.applicationVersion = VK_MAKE_VERSION(0, 1, 0); + application_info.pEngineName = "yuzu Emulator"; + application_info.engineVersion = VK_MAKE_VERSION(0, 1, 0); + application_info.apiVersion = VK_API_VERSION_1_1; + + VkInstanceCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + ci.pApplicationInfo = &application_info; + ci.enabledLayerCount = layers.size(); + ci.ppEnabledLayerNames = layers.data(); + ci.enabledExtensionCount = extensions.size(); + ci.ppEnabledExtensionNames = extensions.data(); + + VkInstance instance; + if (dld.vkCreateInstance(&ci, nullptr, &instance) != VK_SUCCESS) { + // Failed to create the instance. + return {}; + } + if (!Proc(dld.vkDestroyInstance, dld, "vkDestroyInstance", instance)) { + // We successfully created an instance but the destroy function couldn't be loaded. + // This is a good moment to panic. + return {}; + } + + return Instance(instance, dld); +} + +std::optional<std::vector<VkPhysicalDevice>> Instance::EnumeratePhysicalDevices() { + u32 num; + if (dld->vkEnumeratePhysicalDevices(handle, &num, nullptr) != VK_SUCCESS) { + return std::nullopt; + } + std::vector<VkPhysicalDevice> physical_devices(num); + if (dld->vkEnumeratePhysicalDevices(handle, &num, physical_devices.data()) != VK_SUCCESS) { + return std::nullopt; + } + return physical_devices; +} + +DebugCallback Instance::TryCreateDebugCallback( + PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept { + VkDebugUtilsMessengerCreateInfoEXT ci; + ci.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; + ci.pNext = nullptr; + ci.flags = 0; + ci.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT; + ci.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; + ci.pfnUserCallback = callback; + ci.pUserData = nullptr; + + VkDebugUtilsMessengerEXT messenger; + if (dld->vkCreateDebugUtilsMessengerEXT(handle, &ci, nullptr, &messenger) != VK_SUCCESS) { + return {}; + } + return DebugCallback(messenger, handle, *dld); +} + +std::vector<VkCheckpointDataNV> Queue::GetCheckpointDataNV(const DeviceDispatch& dld) const { + if (!dld.vkGetQueueCheckpointDataNV) { + return {}; + } + u32 num; + dld.vkGetQueueCheckpointDataNV(queue, &num, nullptr); + std::vector<VkCheckpointDataNV> checkpoints(num); + dld.vkGetQueueCheckpointDataNV(queue, &num, checkpoints.data()); + return checkpoints; +} + +void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { + Check(dld->vkBindBufferMemory(owner, handle, memory, offset)); +} + +void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { + Check(dld->vkBindImageMemory(owner, handle, memory, offset)); +} + +DescriptorSets DescriptorPool::Allocate(const VkDescriptorSetAllocateInfo& ai) const { + const std::size_t num = ai.descriptorSetCount; + std::unique_ptr sets = std::make_unique<VkDescriptorSet[]>(num); + switch (const VkResult result = dld->vkAllocateDescriptorSets(owner, &ai, sets.get())) { + case VK_SUCCESS: + return DescriptorSets(std::move(sets), num, owner, handle, *dld); + case VK_ERROR_OUT_OF_POOL_MEMORY: + return {}; + default: + throw Exception(result); + } +} + +CommandBuffers CommandPool::Allocate(std::size_t num_buffers, VkCommandBufferLevel level) const { + VkCommandBufferAllocateInfo ai; + ai.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + ai.pNext = nullptr; + ai.commandPool = handle; + ai.level = level; + ai.commandBufferCount = static_cast<u32>(num_buffers); + + std::unique_ptr buffers = std::make_unique<VkCommandBuffer[]>(num_buffers); + switch (const VkResult result = dld->vkAllocateCommandBuffers(owner, &ai, buffers.get())) { + case VK_SUCCESS: + return CommandBuffers(std::move(buffers), num_buffers, owner, handle, *dld); + case VK_ERROR_OUT_OF_POOL_MEMORY: + return {}; + default: + throw Exception(result); + } +} + +std::vector<VkImage> SwapchainKHR::GetImages() const { + u32 num; + Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, nullptr)); + std::vector<VkImage> images(num); + Check(dld->vkGetSwapchainImagesKHR(owner, handle, &num, images.data())); + return images; +} + +Device Device::Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, + Span<const char*> enabled_extensions, + const VkPhysicalDeviceFeatures2& enabled_features, + DeviceDispatch& dld) noexcept { + VkDeviceCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + ci.pNext = &enabled_features; + ci.flags = 0; + ci.queueCreateInfoCount = queues_ci.size(); + ci.pQueueCreateInfos = queues_ci.data(); + ci.enabledLayerCount = 0; + ci.ppEnabledLayerNames = nullptr; + ci.enabledExtensionCount = enabled_extensions.size(); + ci.ppEnabledExtensionNames = enabled_extensions.data(); + ci.pEnabledFeatures = nullptr; + + VkDevice device; + if (dld.vkCreateDevice(physical_device, &ci, nullptr, &device) != VK_SUCCESS) { + return {}; + } + Load(device, dld); + return Device(device, dld); +} + +Queue Device::GetQueue(u32 family_index) const noexcept { + VkQueue queue; + dld->vkGetDeviceQueue(handle, family_index, 0, &queue); + return Queue(queue, *dld); +} + +Buffer Device::CreateBuffer(const VkBufferCreateInfo& ci) const { + VkBuffer object; + Check(dld->vkCreateBuffer(handle, &ci, nullptr, &object)); + return Buffer(object, handle, *dld); +} + +BufferView Device::CreateBufferView(const VkBufferViewCreateInfo& ci) const { + VkBufferView object; + Check(dld->vkCreateBufferView(handle, &ci, nullptr, &object)); + return BufferView(object, handle, *dld); +} + +Image Device::CreateImage(const VkImageCreateInfo& ci) const { + VkImage object; + Check(dld->vkCreateImage(handle, &ci, nullptr, &object)); + return Image(object, handle, *dld); +} + +ImageView Device::CreateImageView(const VkImageViewCreateInfo& ci) const { + VkImageView object; + Check(dld->vkCreateImageView(handle, &ci, nullptr, &object)); + return ImageView(object, handle, *dld); +} + +Semaphore Device::CreateSemaphore() const { + VkSemaphoreCreateInfo ci; + ci.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + ci.pNext = nullptr; + ci.flags = 0; + + VkSemaphore object; + Check(dld->vkCreateSemaphore(handle, &ci, nullptr, &object)); + return Semaphore(object, handle, *dld); +} + +Fence Device::CreateFence(const VkFenceCreateInfo& ci) const { + VkFence object; + Check(dld->vkCreateFence(handle, &ci, nullptr, &object)); + return Fence(object, handle, *dld); +} + +DescriptorPool Device::CreateDescriptorPool(const VkDescriptorPoolCreateInfo& ci) const { + VkDescriptorPool object; + Check(dld->vkCreateDescriptorPool(handle, &ci, nullptr, &object)); + return DescriptorPool(object, handle, *dld); +} + +RenderPass Device::CreateRenderPass(const VkRenderPassCreateInfo& ci) const { + VkRenderPass object; + Check(dld->vkCreateRenderPass(handle, &ci, nullptr, &object)); + return RenderPass(object, handle, *dld); +} + +DescriptorSetLayout Device::CreateDescriptorSetLayout( + const VkDescriptorSetLayoutCreateInfo& ci) const { + VkDescriptorSetLayout object; + Check(dld->vkCreateDescriptorSetLayout(handle, &ci, nullptr, &object)); + return DescriptorSetLayout(object, handle, *dld); +} + +PipelineLayout Device::CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const { + VkPipelineLayout object; + Check(dld->vkCreatePipelineLayout(handle, &ci, nullptr, &object)); + return PipelineLayout(object, handle, *dld); +} + +Pipeline Device::CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const { + VkPipeline object; + Check(dld->vkCreateGraphicsPipelines(handle, nullptr, 1, &ci, nullptr, &object)); + return Pipeline(object, handle, *dld); +} + +Pipeline Device::CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const { + VkPipeline object; + Check(dld->vkCreateComputePipelines(handle, nullptr, 1, &ci, nullptr, &object)); + return Pipeline(object, handle, *dld); +} + +Sampler Device::CreateSampler(const VkSamplerCreateInfo& ci) const { + VkSampler object; + Check(dld->vkCreateSampler(handle, &ci, nullptr, &object)); + return Sampler(object, handle, *dld); +} + +Framebuffer Device::CreateFramebuffer(const VkFramebufferCreateInfo& ci) const { + VkFramebuffer object; + Check(dld->vkCreateFramebuffer(handle, &ci, nullptr, &object)); + return Framebuffer(object, handle, *dld); +} + +CommandPool Device::CreateCommandPool(const VkCommandPoolCreateInfo& ci) const { + VkCommandPool object; + Check(dld->vkCreateCommandPool(handle, &ci, nullptr, &object)); + return CommandPool(object, handle, *dld); +} + +DescriptorUpdateTemplateKHR Device::CreateDescriptorUpdateTemplateKHR( + const VkDescriptorUpdateTemplateCreateInfoKHR& ci) const { + VkDescriptorUpdateTemplateKHR object; + Check(dld->vkCreateDescriptorUpdateTemplateKHR(handle, &ci, nullptr, &object)); + return DescriptorUpdateTemplateKHR(object, handle, *dld); +} + +QueryPool Device::CreateQueryPool(const VkQueryPoolCreateInfo& ci) const { + VkQueryPool object; + Check(dld->vkCreateQueryPool(handle, &ci, nullptr, &object)); + return QueryPool(object, handle, *dld); +} + +ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) const { + VkShaderModule object; + Check(dld->vkCreateShaderModule(handle, &ci, nullptr, &object)); + return ShaderModule(object, handle, *dld); +} + +SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const { + VkSwapchainKHR object; + Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object)); + return SwapchainKHR(object, handle, *dld); +} + +DeviceMemory Device::TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept { + VkDeviceMemory memory; + if (dld->vkAllocateMemory(handle, &ai, nullptr, &memory) != VK_SUCCESS) { + return {}; + } + return DeviceMemory(memory, handle, *dld); +} + +DeviceMemory Device::AllocateMemory(const VkMemoryAllocateInfo& ai) const { + VkDeviceMemory memory; + Check(dld->vkAllocateMemory(handle, &ai, nullptr, &memory)); + return DeviceMemory(memory, handle, *dld); +} + +VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer) const noexcept { + VkMemoryRequirements requirements; + dld->vkGetBufferMemoryRequirements(handle, buffer, &requirements); + return requirements; +} + +VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept { + VkMemoryRequirements requirements; + dld->vkGetImageMemoryRequirements(handle, image, &requirements); + return requirements; +} + +void Device::UpdateDescriptorSets(Span<VkWriteDescriptorSet> writes, + Span<VkCopyDescriptorSet> copies) const noexcept { + dld->vkUpdateDescriptorSets(handle, writes.size(), writes.data(), copies.size(), copies.data()); +} + +VkPhysicalDeviceProperties PhysicalDevice::GetProperties() const noexcept { + VkPhysicalDeviceProperties properties; + dld->vkGetPhysicalDeviceProperties(physical_device, &properties); + return properties; +} + +void PhysicalDevice::GetProperties2KHR(VkPhysicalDeviceProperties2KHR& properties) const noexcept { + dld->vkGetPhysicalDeviceProperties2KHR(physical_device, &properties); +} + +VkPhysicalDeviceFeatures PhysicalDevice::GetFeatures() const noexcept { + VkPhysicalDeviceFeatures2KHR features2; + features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; + features2.pNext = nullptr; + dld->vkGetPhysicalDeviceFeatures2KHR(physical_device, &features2); + return features2.features; +} + +void PhysicalDevice::GetFeatures2KHR(VkPhysicalDeviceFeatures2KHR& features) const noexcept { + dld->vkGetPhysicalDeviceFeatures2KHR(physical_device, &features); +} + +VkFormatProperties PhysicalDevice::GetFormatProperties(VkFormat format) const noexcept { + VkFormatProperties properties; + dld->vkGetPhysicalDeviceFormatProperties(physical_device, format, &properties); + return properties; +} + +std::vector<VkExtensionProperties> PhysicalDevice::EnumerateDeviceExtensionProperties() const { + u32 num; + dld->vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &num, nullptr); + std::vector<VkExtensionProperties> properties(num); + dld->vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &num, properties.data()); + return properties; +} + +std::vector<VkQueueFamilyProperties> PhysicalDevice::GetQueueFamilyProperties() const { + u32 num; + dld->vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &num, nullptr); + std::vector<VkQueueFamilyProperties> properties(num); + dld->vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &num, properties.data()); + return properties; +} + +bool PhysicalDevice::GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR surface) const { + VkBool32 supported; + Check(dld->vkGetPhysicalDeviceSurfaceSupportKHR(physical_device, queue_family_index, surface, + &supported)); + return supported == VK_TRUE; +} + +VkSurfaceCapabilitiesKHR PhysicalDevice::GetSurfaceCapabilitiesKHR(VkSurfaceKHR surface) const + noexcept { + VkSurfaceCapabilitiesKHR capabilities; + Check(dld->vkGetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface, &capabilities)); + return capabilities; +} + +std::vector<VkSurfaceFormatKHR> PhysicalDevice::GetSurfaceFormatsKHR(VkSurfaceKHR surface) const { + u32 num; + Check(dld->vkGetPhysicalDeviceSurfaceFormatsKHR(physical_device, surface, &num, nullptr)); + std::vector<VkSurfaceFormatKHR> formats(num); + Check( + dld->vkGetPhysicalDeviceSurfaceFormatsKHR(physical_device, surface, &num, formats.data())); + return formats; +} + +std::vector<VkPresentModeKHR> PhysicalDevice::GetSurfacePresentModesKHR( + VkSurfaceKHR surface) const { + u32 num; + Check(dld->vkGetPhysicalDeviceSurfacePresentModesKHR(physical_device, surface, &num, nullptr)); + std::vector<VkPresentModeKHR> modes(num); + Check(dld->vkGetPhysicalDeviceSurfacePresentModesKHR(physical_device, surface, &num, + modes.data())); + return modes; +} + +VkPhysicalDeviceMemoryProperties PhysicalDevice::GetMemoryProperties() const noexcept { + VkPhysicalDeviceMemoryProperties properties; + dld->vkGetPhysicalDeviceMemoryProperties(physical_device, &properties); + return properties; +} + +std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties( + const InstanceDispatch& dld) { + u32 num; + if (dld.vkEnumerateInstanceExtensionProperties(nullptr, &num, nullptr) != VK_SUCCESS) { + return std::nullopt; + } + std::vector<VkExtensionProperties> properties(num); + if (dld.vkEnumerateInstanceExtensionProperties(nullptr, &num, properties.data()) != + VK_SUCCESS) { + return std::nullopt; + } + return properties; +} + +} // namespace Vulkan::vk diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h new file mode 100644 index 000000000..fb3657819 --- /dev/null +++ b/src/video_core/renderer_vulkan/wrapper.h @@ -0,0 +1,987 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <exception> +#include <iterator> +#include <limits> +#include <memory> +#include <optional> +#include <type_traits> +#include <utility> +#include <vector> + +#define VK_NO_PROTOTYPES +#include <vulkan/vulkan.h> + +#include "common/common_types.h" + +namespace Vulkan::vk { + +/** + * Span for Vulkan arrays. + * Based on std::span but optimized for array access instead of iterators. + * Size returns uint32_t instead of size_t to ease interaction with Vulkan functions. + */ +template <typename T> +class Span { +public: + using value_type = T; + using size_type = u32; + using difference_type = std::ptrdiff_t; + using reference = const T&; + using const_reference = const T&; + using pointer = const T*; + using const_pointer = const T*; + using iterator = const T*; + using const_iterator = const T*; + + /// Construct an empty span. + constexpr Span() noexcept = default; + + /// Construct a span from a single element. + constexpr Span(const T& value) noexcept : ptr{&value}, num{1} {} + + /// Construct a span from a range. + template <typename Range> + // requires std::data(const Range&) + // requires std::size(const Range&) + constexpr Span(const Range& range) : ptr{std::data(range)}, num{std::size(range)} {} + + /// Construct a span from a pointer and a size. + /// This is inteded for subranges. + constexpr Span(const T* ptr, std::size_t num) noexcept : ptr{ptr}, num{num} {} + + /// Returns the data pointer by the span. + constexpr const T* data() const noexcept { + return ptr; + } + + /// Returns the number of elements in the span. + /// @note Returns a 32 bits integer because most Vulkan functions expect this type. + constexpr u32 size() const noexcept { + return static_cast<u32>(num); + } + + /// Returns true when the span is empty. + constexpr bool empty() const noexcept { + return num == 0; + } + + /// Returns a reference to the element in the passed index. + /// @pre: index < size() + constexpr const T& operator[](std::size_t index) const noexcept { + return ptr[index]; + } + + /// Returns an iterator to the beginning of the span. + constexpr const T* begin() const noexcept { + return ptr; + } + + /// Returns an iterator to the end of the span. + constexpr const T* end() const noexcept { + return ptr + num; + } + + /// Returns an iterator to the beginning of the span. + constexpr const T* cbegin() const noexcept { + return ptr; + } + + /// Returns an iterator to the end of the span. + constexpr const T* cend() const noexcept { + return ptr + num; + } + +private: + const T* ptr = nullptr; + std::size_t num = 0; +}; + +/// Vulkan exception generated from a VkResult. +class Exception final : public std::exception { +public: + /// Construct the exception with a result. + /// @pre result != VK_SUCCESS + explicit Exception(VkResult result_) : result{result_} {} + virtual ~Exception() = default; + + const char* what() const noexcept override; + +private: + VkResult result; +}; + +/// Converts a VkResult enum into a rodata string +const char* ToString(VkResult) noexcept; + +/// Throws a Vulkan exception if result is not success. +inline void Check(VkResult result) { + if (result != VK_SUCCESS) { + throw Exception(result); + } +} + +/// Throws a Vulkan exception if result is an error. +/// @return result +inline VkResult Filter(VkResult result) { + if (result < 0) { + throw Exception(result); + } + return result; +} + +/// Table holding Vulkan instance function pointers. +struct InstanceDispatch { + PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr; + + PFN_vkCreateInstance vkCreateInstance; + PFN_vkDestroyInstance vkDestroyInstance; + PFN_vkEnumerateInstanceExtensionProperties vkEnumerateInstanceExtensionProperties; + + PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT; + PFN_vkCreateDevice vkCreateDevice; + PFN_vkDestroyDebugUtilsMessengerEXT vkDestroyDebugUtilsMessengerEXT; + PFN_vkDestroyDevice vkDestroyDevice; + PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR; + PFN_vkEnumerateDeviceExtensionProperties vkEnumerateDeviceExtensionProperties; + PFN_vkEnumeratePhysicalDevices vkEnumeratePhysicalDevices; + PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr; + PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR; + PFN_vkGetPhysicalDeviceFormatProperties vkGetPhysicalDeviceFormatProperties; + PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties; + PFN_vkGetPhysicalDeviceProperties vkGetPhysicalDeviceProperties; + PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR; + PFN_vkGetPhysicalDeviceQueueFamilyProperties vkGetPhysicalDeviceQueueFamilyProperties; + PFN_vkGetPhysicalDeviceSurfaceCapabilitiesKHR vkGetPhysicalDeviceSurfaceCapabilitiesKHR; + PFN_vkGetPhysicalDeviceSurfaceFormatsKHR vkGetPhysicalDeviceSurfaceFormatsKHR; + PFN_vkGetPhysicalDeviceSurfacePresentModesKHR vkGetPhysicalDeviceSurfacePresentModesKHR; + PFN_vkGetPhysicalDeviceSurfaceSupportKHR vkGetPhysicalDeviceSurfaceSupportKHR; + PFN_vkGetSwapchainImagesKHR vkGetSwapchainImagesKHR; + PFN_vkQueuePresentKHR vkQueuePresentKHR; +}; + +/// Table holding Vulkan device function pointers. +struct DeviceDispatch : public InstanceDispatch { + PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR; + PFN_vkAllocateCommandBuffers vkAllocateCommandBuffers; + PFN_vkAllocateDescriptorSets vkAllocateDescriptorSets; + PFN_vkAllocateMemory vkAllocateMemory; + PFN_vkBeginCommandBuffer vkBeginCommandBuffer; + PFN_vkBindBufferMemory vkBindBufferMemory; + PFN_vkBindImageMemory vkBindImageMemory; + PFN_vkCmdBeginQuery vkCmdBeginQuery; + PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass; + PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT; + PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets; + PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer; + PFN_vkCmdBindPipeline vkCmdBindPipeline; + PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT; + PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers; + PFN_vkCmdBlitImage vkCmdBlitImage; + PFN_vkCmdClearAttachments vkCmdClearAttachments; + PFN_vkCmdCopyBuffer vkCmdCopyBuffer; + PFN_vkCmdCopyBufferToImage vkCmdCopyBufferToImage; + PFN_vkCmdCopyImage vkCmdCopyImage; + PFN_vkCmdCopyImageToBuffer vkCmdCopyImageToBuffer; + PFN_vkCmdDispatch vkCmdDispatch; + PFN_vkCmdDraw vkCmdDraw; + PFN_vkCmdDrawIndexed vkCmdDrawIndexed; + PFN_vkCmdEndQuery vkCmdEndQuery; + PFN_vkCmdEndRenderPass vkCmdEndRenderPass; + PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT; + PFN_vkCmdFillBuffer vkCmdFillBuffer; + PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier; + PFN_vkCmdPushConstants vkCmdPushConstants; + PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants; + PFN_vkCmdSetCheckpointNV vkCmdSetCheckpointNV; + PFN_vkCmdSetDepthBias vkCmdSetDepthBias; + PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds; + PFN_vkCmdSetScissor vkCmdSetScissor; + PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask; + PFN_vkCmdSetStencilReference vkCmdSetStencilReference; + PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask; + PFN_vkCmdSetViewport vkCmdSetViewport; + PFN_vkCreateBuffer vkCreateBuffer; + PFN_vkCreateBufferView vkCreateBufferView; + PFN_vkCreateCommandPool vkCreateCommandPool; + PFN_vkCreateComputePipelines vkCreateComputePipelines; + PFN_vkCreateDescriptorPool vkCreateDescriptorPool; + PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout; + PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR; + PFN_vkCreateFence vkCreateFence; + PFN_vkCreateFramebuffer vkCreateFramebuffer; + PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines; + PFN_vkCreateImage vkCreateImage; + PFN_vkCreateImageView vkCreateImageView; + PFN_vkCreatePipelineLayout vkCreatePipelineLayout; + PFN_vkCreateQueryPool vkCreateQueryPool; + PFN_vkCreateRenderPass vkCreateRenderPass; + PFN_vkCreateSampler vkCreateSampler; + PFN_vkCreateSemaphore vkCreateSemaphore; + PFN_vkCreateShaderModule vkCreateShaderModule; + PFN_vkCreateSwapchainKHR vkCreateSwapchainKHR; + PFN_vkDestroyBuffer vkDestroyBuffer; + PFN_vkDestroyBufferView vkDestroyBufferView; + PFN_vkDestroyCommandPool vkDestroyCommandPool; + PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool; + PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout; + PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR; + PFN_vkDestroyFence vkDestroyFence; + PFN_vkDestroyFramebuffer vkDestroyFramebuffer; + PFN_vkDestroyImage vkDestroyImage; + PFN_vkDestroyImageView vkDestroyImageView; + PFN_vkDestroyPipeline vkDestroyPipeline; + PFN_vkDestroyPipelineLayout vkDestroyPipelineLayout; + PFN_vkDestroyQueryPool vkDestroyQueryPool; + PFN_vkDestroyRenderPass vkDestroyRenderPass; + PFN_vkDestroySampler vkDestroySampler; + PFN_vkDestroySemaphore vkDestroySemaphore; + PFN_vkDestroyShaderModule vkDestroyShaderModule; + PFN_vkDestroySwapchainKHR vkDestroySwapchainKHR; + PFN_vkDeviceWaitIdle vkDeviceWaitIdle; + PFN_vkEndCommandBuffer vkEndCommandBuffer; + PFN_vkFreeCommandBuffers vkFreeCommandBuffers; + PFN_vkFreeDescriptorSets vkFreeDescriptorSets; + PFN_vkFreeMemory vkFreeMemory; + PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements; + PFN_vkGetDeviceQueue vkGetDeviceQueue; + PFN_vkGetFenceStatus vkGetFenceStatus; + PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements; + PFN_vkGetQueryPoolResults vkGetQueryPoolResults; + PFN_vkGetQueueCheckpointDataNV vkGetQueueCheckpointDataNV; + PFN_vkMapMemory vkMapMemory; + PFN_vkQueueSubmit vkQueueSubmit; + PFN_vkResetFences vkResetFences; + PFN_vkResetQueryPoolEXT vkResetQueryPoolEXT; + PFN_vkUnmapMemory vkUnmapMemory; + PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR; + PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets; + PFN_vkWaitForFences vkWaitForFences; +}; + +/// Loads instance agnostic function pointers. +/// @return True on success, false on error. +bool Load(InstanceDispatch&) noexcept; + +/// Loads instance function pointers. +/// @return True on success, false on error. +bool Load(VkInstance, InstanceDispatch&) noexcept; + +void Destroy(VkInstance, const InstanceDispatch&) noexcept; +void Destroy(VkDevice, const InstanceDispatch&) noexcept; + +void Destroy(VkDevice, VkBuffer, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkBufferView, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkCommandPool, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkDescriptorPool, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkDescriptorSetLayout, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkDescriptorUpdateTemplateKHR, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkDeviceMemory, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkFence, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkImageView, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkPipeline, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkPipelineLayout, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkQueryPool, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkRenderPass, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkSampler, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkSwapchainKHR, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkSemaphore, const DeviceDispatch&) noexcept; +void Destroy(VkDevice, VkShaderModule, const DeviceDispatch&) noexcept; +void Destroy(VkInstance, VkDebugUtilsMessengerEXT, const InstanceDispatch&) noexcept; +void Destroy(VkInstance, VkSurfaceKHR, const InstanceDispatch&) noexcept; + +VkResult Free(VkDevice, VkDescriptorPool, Span<VkDescriptorSet>, const DeviceDispatch&) noexcept; +VkResult Free(VkDevice, VkCommandPool, Span<VkCommandBuffer>, const DeviceDispatch&) noexcept; + +template <typename Type, typename OwnerType, typename Dispatch> +class Handle; + +/// Handle with an owning type. +/// Analogue to std::unique_ptr. +template <typename Type, typename OwnerType, typename Dispatch> +class Handle { +public: + /// Construct a handle and hold it's ownership. + explicit Handle(Type handle_, OwnerType owner_, const Dispatch& dld_) noexcept + : handle{handle_}, owner{owner_}, dld{&dld_} {} + + /// Construct an empty handle. + Handle() = default; + + /// Copying Vulkan objects is not supported and will never be. + Handle(const Handle&) = delete; + Handle& operator=(const Handle&) = delete; + + /// Construct a handle transfering the ownership from another handle. + Handle(Handle&& rhs) noexcept + : handle{std::exchange(rhs.handle, nullptr)}, owner{rhs.owner}, dld{rhs.dld} {} + + /// Assign the current handle transfering the ownership from another handle. + /// Destroys any previously held object. + Handle& operator=(Handle&& rhs) noexcept { + Release(); + handle = std::exchange(rhs.handle, nullptr); + owner = rhs.owner; + dld = rhs.dld; + return *this; + } + + /// Destroys the current handle if it existed. + ~Handle() noexcept { + Release(); + } + + /// Destroys any held object. + void reset() noexcept { + Release(); + handle = nullptr; + } + + /// Returns the address of the held object. + /// Intended for Vulkan structures that expect a pointer to an array. + const Type* address() const noexcept { + return std::addressof(handle); + } + + /// Returns the held Vulkan handle. + Type operator*() const noexcept { + return handle; + } + + /// Returns true when there's a held object. + explicit operator bool() const noexcept { + return handle != nullptr; + } + +protected: + Type handle = nullptr; + OwnerType owner = nullptr; + const Dispatch* dld = nullptr; + +private: + /// Destroys the held object if it exists. + void Release() noexcept { + if (handle) { + Destroy(owner, handle, *dld); + } + } +}; + +/// Dummy type used to specify a handle has no owner. +struct NoOwner {}; + +/// Handle without an owning type. +/// Analogue to std::unique_ptr +template <typename Type, typename Dispatch> +class Handle<Type, NoOwner, Dispatch> { +public: + /// Construct a handle and hold it's ownership. + explicit Handle(Type handle_, const Dispatch& dld_) noexcept : handle{handle_}, dld{&dld_} {} + + /// Construct an empty handle. + Handle() noexcept = default; + + /// Copying Vulkan objects is not supported and will never be. + Handle(const Handle&) = delete; + Handle& operator=(const Handle&) = delete; + + /// Construct a handle transfering ownership from another handle. + Handle(Handle&& rhs) noexcept : handle{std::exchange(rhs.handle, nullptr)}, dld{rhs.dld} {} + + /// Assign the current handle transfering the ownership from another handle. + /// Destroys any previously held object. + Handle& operator=(Handle&& rhs) noexcept { + Release(); + handle = std::exchange(rhs.handle, nullptr); + dld = rhs.dld; + return *this; + } + + /// Destroys the current handle if it existed. + ~Handle() noexcept { + Release(); + } + + /// Destroys any held object. + void reset() noexcept { + Release(); + handle = nullptr; + } + + /// Returns the address of the held object. + /// Intended for Vulkan structures that expect a pointer to an array. + const Type* address() const noexcept { + return std::addressof(handle); + } + + /// Returns the held Vulkan handle. + Type operator*() const noexcept { + return handle; + } + + /// Returns true when there's a held object. + operator bool() const noexcept { + return handle != nullptr; + } + +protected: + Type handle = nullptr; + const Dispatch* dld = nullptr; + +private: + /// Destroys the held object if it exists. + void Release() noexcept { + if (handle) { + Destroy(handle, *dld); + } + } +}; + +/// Array of a pool allocation. +/// Analogue to std::vector +template <typename AllocationType, typename PoolType> +class PoolAllocations { +public: + /// Construct an empty allocation. + PoolAllocations() = default; + + /// Construct an allocation. Errors are reported through IsOutOfPoolMemory(). + explicit PoolAllocations(std::unique_ptr<AllocationType[]> allocations, std::size_t num, + VkDevice device, PoolType pool, const DeviceDispatch& dld) noexcept + : allocations{std::move(allocations)}, num{num}, device{device}, pool{pool}, dld{&dld} {} + + /// Copying Vulkan allocations is not supported and will never be. + PoolAllocations(const PoolAllocations&) = delete; + PoolAllocations& operator=(const PoolAllocations&) = delete; + + /// Construct an allocation transfering ownership from another allocation. + PoolAllocations(PoolAllocations&& rhs) noexcept + : allocations{std::move(rhs.allocations)}, num{rhs.num}, device{rhs.device}, pool{rhs.pool}, + dld{rhs.dld} {} + + /// Assign an allocation transfering ownership from another allocation. + /// Releases any previously held allocation. + PoolAllocations& operator=(PoolAllocations&& rhs) noexcept { + Release(); + allocations = std::move(rhs.allocations); + num = rhs.num; + device = rhs.device; + pool = rhs.pool; + dld = rhs.dld; + return *this; + } + + /// Destroys any held allocation. + ~PoolAllocations() { + Release(); + } + + /// Returns the number of allocations. + std::size_t size() const noexcept { + return num; + } + + /// Returns a pointer to the array of allocations. + AllocationType const* data() const noexcept { + return allocations.get(); + } + + /// Returns the allocation in the specified index. + /// @pre index < size() + AllocationType operator[](std::size_t index) const noexcept { + return allocations[index]; + } + + /// True when a pool fails to construct. + bool IsOutOfPoolMemory() const noexcept { + return !device; + } + +private: + /// Destroys the held allocations if they exist. + void Release() noexcept { + if (!allocations) { + return; + } + const Span<AllocationType> span(allocations.get(), num); + const VkResult result = Free(device, pool, span, *dld); + // There's no way to report errors from a destructor. + if (result != VK_SUCCESS) { + std::terminate(); + } + } + + std::unique_ptr<AllocationType[]> allocations; + std::size_t num = 0; + VkDevice device = nullptr; + PoolType pool = nullptr; + const DeviceDispatch* dld = nullptr; +}; + +using BufferView = Handle<VkBufferView, VkDevice, DeviceDispatch>; +using DebugCallback = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>; +using DescriptorSetLayout = Handle<VkDescriptorSetLayout, VkDevice, DeviceDispatch>; +using DescriptorUpdateTemplateKHR = Handle<VkDescriptorUpdateTemplateKHR, VkDevice, DeviceDispatch>; +using Framebuffer = Handle<VkFramebuffer, VkDevice, DeviceDispatch>; +using ImageView = Handle<VkImageView, VkDevice, DeviceDispatch>; +using Pipeline = Handle<VkPipeline, VkDevice, DeviceDispatch>; +using PipelineLayout = Handle<VkPipelineLayout, VkDevice, DeviceDispatch>; +using QueryPool = Handle<VkQueryPool, VkDevice, DeviceDispatch>; +using RenderPass = Handle<VkRenderPass, VkDevice, DeviceDispatch>; +using Sampler = Handle<VkSampler, VkDevice, DeviceDispatch>; +using Semaphore = Handle<VkSemaphore, VkDevice, DeviceDispatch>; +using ShaderModule = Handle<VkShaderModule, VkDevice, DeviceDispatch>; +using SurfaceKHR = Handle<VkSurfaceKHR, VkInstance, InstanceDispatch>; + +using DescriptorSets = PoolAllocations<VkDescriptorSet, VkDescriptorPool>; +using CommandBuffers = PoolAllocations<VkCommandBuffer, VkCommandPool>; + +/// Vulkan instance owning handle. +class Instance : public Handle<VkInstance, NoOwner, InstanceDispatch> { + using Handle<VkInstance, NoOwner, InstanceDispatch>::Handle; + +public: + /// Creates a Vulkan instance. Use "operator bool" for error handling. + static Instance Create(Span<const char*> layers, Span<const char*> extensions, + InstanceDispatch& dld) noexcept; + + /// Enumerates physical devices. + /// @return Physical devices and an empty handle on failure. + std::optional<std::vector<VkPhysicalDevice>> EnumeratePhysicalDevices(); + + /// Tries to create a debug callback messenger. Returns an empty handle on failure. + DebugCallback TryCreateDebugCallback(PFN_vkDebugUtilsMessengerCallbackEXT callback) noexcept; +}; + +class Queue { +public: + /// Construct an empty queue handle. + constexpr Queue() noexcept = default; + + /// Construct a queue handle. + constexpr Queue(VkQueue queue, const DeviceDispatch& dld) noexcept : queue{queue}, dld{&dld} {} + + /// Returns the checkpoint data. + /// @note Returns an empty vector when the function pointer is not present. + std::vector<VkCheckpointDataNV> GetCheckpointDataNV(const DeviceDispatch& dld) const; + + void Submit(Span<VkSubmitInfo> submit_infos, VkFence fence) const { + Check(dld->vkQueueSubmit(queue, submit_infos.size(), submit_infos.data(), fence)); + } + + VkResult Present(const VkPresentInfoKHR& present_info) const noexcept { + return dld->vkQueuePresentKHR(queue, &present_info); + } + +private: + VkQueue queue = nullptr; + const DeviceDispatch* dld = nullptr; +}; + +class Buffer : public Handle<VkBuffer, VkDevice, DeviceDispatch> { + using Handle<VkBuffer, VkDevice, DeviceDispatch>::Handle; + +public: + /// Attaches a memory allocation. + void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; +}; + +class Image : public Handle<VkImage, VkDevice, DeviceDispatch> { + using Handle<VkImage, VkDevice, DeviceDispatch>::Handle; + +public: + /// Attaches a memory allocation. + void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; +}; + +class DeviceMemory : public Handle<VkDeviceMemory, VkDevice, DeviceDispatch> { + using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle; + +public: + u8* Map(VkDeviceSize offset, VkDeviceSize size) const { + void* data; + Check(dld->vkMapMemory(owner, handle, offset, size, 0, &data)); + return static_cast<u8*>(data); + } + + void Unmap() const noexcept { + dld->vkUnmapMemory(owner, handle); + } +}; + +class Fence : public Handle<VkFence, VkDevice, DeviceDispatch> { + using Handle<VkFence, VkDevice, DeviceDispatch>::Handle; + +public: + VkResult Wait(u64 timeout = std::numeric_limits<u64>::max()) const noexcept { + return dld->vkWaitForFences(owner, 1, &handle, true, timeout); + } + + VkResult GetStatus() const noexcept { + return dld->vkGetFenceStatus(owner, handle); + } + + void Reset() const { + Check(dld->vkResetFences(owner, 1, &handle)); + } +}; + +class DescriptorPool : public Handle<VkDescriptorPool, VkDevice, DeviceDispatch> { + using Handle<VkDescriptorPool, VkDevice, DeviceDispatch>::Handle; + +public: + DescriptorSets Allocate(const VkDescriptorSetAllocateInfo& ai) const; +}; + +class CommandPool : public Handle<VkCommandPool, VkDevice, DeviceDispatch> { + using Handle<VkCommandPool, VkDevice, DeviceDispatch>::Handle; + +public: + CommandBuffers Allocate(std::size_t num_buffers, + VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) const; +}; + +class SwapchainKHR : public Handle<VkSwapchainKHR, VkDevice, DeviceDispatch> { + using Handle<VkSwapchainKHR, VkDevice, DeviceDispatch>::Handle; + +public: + std::vector<VkImage> GetImages() const; +}; + +class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> { + using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle; + +public: + static Device Create(VkPhysicalDevice physical_device, Span<VkDeviceQueueCreateInfo> queues_ci, + Span<const char*> enabled_extensions, + const VkPhysicalDeviceFeatures2& enabled_features, + DeviceDispatch& dld) noexcept; + + Queue GetQueue(u32 family_index) const noexcept; + + Buffer CreateBuffer(const VkBufferCreateInfo& ci) const; + + BufferView CreateBufferView(const VkBufferViewCreateInfo& ci) const; + + Image CreateImage(const VkImageCreateInfo& ci) const; + + ImageView CreateImageView(const VkImageViewCreateInfo& ci) const; + + Semaphore CreateSemaphore() const; + + Fence CreateFence(const VkFenceCreateInfo& ci) const; + + DescriptorPool CreateDescriptorPool(const VkDescriptorPoolCreateInfo& ci) const; + + RenderPass CreateRenderPass(const VkRenderPassCreateInfo& ci) const; + + DescriptorSetLayout CreateDescriptorSetLayout(const VkDescriptorSetLayoutCreateInfo& ci) const; + + PipelineLayout CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const; + + Pipeline CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const; + + Pipeline CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const; + + Sampler CreateSampler(const VkSamplerCreateInfo& ci) const; + + Framebuffer CreateFramebuffer(const VkFramebufferCreateInfo& ci) const; + + CommandPool CreateCommandPool(const VkCommandPoolCreateInfo& ci) const; + + DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplateKHR( + const VkDescriptorUpdateTemplateCreateInfoKHR& ci) const; + + QueryPool CreateQueryPool(const VkQueryPoolCreateInfo& ci) const; + + ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const; + + SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const; + + DeviceMemory TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept; + + DeviceMemory AllocateMemory(const VkMemoryAllocateInfo& ai) const; + + VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer) const noexcept; + + VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept; + + void UpdateDescriptorSets(Span<VkWriteDescriptorSet> writes, + Span<VkCopyDescriptorSet> copies) const noexcept; + + void UpdateDescriptorSet(VkDescriptorSet set, VkDescriptorUpdateTemplateKHR update_template, + const void* data) const noexcept { + dld->vkUpdateDescriptorSetWithTemplateKHR(handle, set, update_template, data); + } + + VkResult AcquireNextImageKHR(VkSwapchainKHR swapchain, u64 timeout, VkSemaphore semaphore, + VkFence fence, u32* image_index) const noexcept { + return dld->vkAcquireNextImageKHR(handle, swapchain, timeout, semaphore, fence, + image_index); + } + + VkResult WaitIdle() const noexcept { + return dld->vkDeviceWaitIdle(handle); + } + + void ResetQueryPoolEXT(VkQueryPool query_pool, u32 first, u32 count) const noexcept { + dld->vkResetQueryPoolEXT(handle, query_pool, first, count); + } + + void GetQueryResults(VkQueryPool query_pool, u32 first, u32 count, std::size_t data_size, + void* data, VkDeviceSize stride, VkQueryResultFlags flags) const { + Check(dld->vkGetQueryPoolResults(handle, query_pool, first, count, data_size, data, stride, + flags)); + } + + template <typename T> + T GetQueryResult(VkQueryPool query_pool, u32 first, VkQueryResultFlags flags) const { + static_assert(std::is_trivially_copyable_v<T>); + T value; + GetQueryResults(query_pool, first, 1, sizeof(T), &value, sizeof(T), flags); + return value; + } +}; + +class PhysicalDevice { +public: + constexpr PhysicalDevice() noexcept = default; + + constexpr PhysicalDevice(VkPhysicalDevice physical_device, const InstanceDispatch& dld) noexcept + : physical_device{physical_device}, dld{&dld} {} + + constexpr operator VkPhysicalDevice() const noexcept { + return physical_device; + } + + VkPhysicalDeviceProperties GetProperties() const noexcept; + + void GetProperties2KHR(VkPhysicalDeviceProperties2KHR&) const noexcept; + + VkPhysicalDeviceFeatures GetFeatures() const noexcept; + + void GetFeatures2KHR(VkPhysicalDeviceFeatures2KHR&) const noexcept; + + VkFormatProperties GetFormatProperties(VkFormat) const noexcept; + + std::vector<VkExtensionProperties> EnumerateDeviceExtensionProperties() const; + + std::vector<VkQueueFamilyProperties> GetQueueFamilyProperties() const; + + bool GetSurfaceSupportKHR(u32 queue_family_index, VkSurfaceKHR) const; + + VkSurfaceCapabilitiesKHR GetSurfaceCapabilitiesKHR(VkSurfaceKHR) const noexcept; + + std::vector<VkSurfaceFormatKHR> GetSurfaceFormatsKHR(VkSurfaceKHR) const; + + std::vector<VkPresentModeKHR> GetSurfacePresentModesKHR(VkSurfaceKHR) const; + + VkPhysicalDeviceMemoryProperties GetMemoryProperties() const noexcept; + +private: + VkPhysicalDevice physical_device = nullptr; + const InstanceDispatch* dld = nullptr; +}; + +class CommandBuffer { +public: + CommandBuffer() noexcept = default; + + explicit CommandBuffer(VkCommandBuffer handle, const DeviceDispatch& dld) noexcept + : handle{handle}, dld{&dld} {} + + const VkCommandBuffer* address() const noexcept { + return &handle; + } + + void Begin(const VkCommandBufferBeginInfo& begin_info) const { + Check(dld->vkBeginCommandBuffer(handle, &begin_info)); + } + + void End() const { + Check(dld->vkEndCommandBuffer(handle)); + } + + void BeginRenderPass(const VkRenderPassBeginInfo& renderpass_bi, + VkSubpassContents contents) const noexcept { + dld->vkCmdBeginRenderPass(handle, &renderpass_bi, contents); + } + + void EndRenderPass() const noexcept { + dld->vkCmdEndRenderPass(handle); + } + + void BeginQuery(VkQueryPool query_pool, u32 query, VkQueryControlFlags flags) const noexcept { + dld->vkCmdBeginQuery(handle, query_pool, query, flags); + } + + void EndQuery(VkQueryPool query_pool, u32 query) const noexcept { + dld->vkCmdEndQuery(handle, query_pool, query); + } + + void BindDescriptorSets(VkPipelineBindPoint bind_point, VkPipelineLayout layout, u32 first, + Span<VkDescriptorSet> sets, Span<u32> dynamic_offsets) const noexcept { + dld->vkCmdBindDescriptorSets(handle, bind_point, layout, first, sets.size(), sets.data(), + dynamic_offsets.size(), dynamic_offsets.data()); + } + + void BindPipeline(VkPipelineBindPoint bind_point, VkPipeline pipeline) const noexcept { + dld->vkCmdBindPipeline(handle, bind_point, pipeline); + } + + void BindIndexBuffer(VkBuffer buffer, VkDeviceSize offset, VkIndexType index_type) const + noexcept { + dld->vkCmdBindIndexBuffer(handle, buffer, offset, index_type); + } + + void BindVertexBuffers(u32 first, u32 count, const VkBuffer* buffers, + const VkDeviceSize* offsets) const noexcept { + dld->vkCmdBindVertexBuffers(handle, first, count, buffers, offsets); + } + + void BindVertexBuffer(u32 binding, VkBuffer buffer, VkDeviceSize offset) const noexcept { + BindVertexBuffers(binding, 1, &buffer, &offset); + } + + void Draw(u32 vertex_count, u32 instance_count, u32 first_vertex, u32 first_instance) const + noexcept { + dld->vkCmdDraw(handle, vertex_count, instance_count, first_vertex, first_instance); + } + + void DrawIndexed(u32 index_count, u32 instance_count, u32 first_index, u32 vertex_offset, + u32 first_instance) const noexcept { + dld->vkCmdDrawIndexed(handle, index_count, instance_count, first_index, vertex_offset, + first_instance); + } + + void ClearAttachments(Span<VkClearAttachment> attachments, Span<VkClearRect> rects) const + noexcept { + dld->vkCmdClearAttachments(handle, attachments.size(), attachments.data(), rects.size(), + rects.data()); + } + + void BlitImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image, + VkImageLayout dst_layout, Span<VkImageBlit> regions, VkFilter filter) const + noexcept { + dld->vkCmdBlitImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(), + regions.data(), filter); + } + + void Dispatch(u32 x, u32 y, u32 z) const noexcept { + dld->vkCmdDispatch(handle, x, y, z); + } + + void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, + VkDependencyFlags dependency_flags, Span<VkMemoryBarrier> memory_barriers, + Span<VkBufferMemoryBarrier> buffer_barriers, + Span<VkImageMemoryBarrier> image_barriers) const noexcept { + dld->vkCmdPipelineBarrier(handle, src_stage_mask, dst_stage_mask, dependency_flags, + memory_barriers.size(), memory_barriers.data(), + buffer_barriers.size(), buffer_barriers.data(), + image_barriers.size(), image_barriers.data()); + } + + void CopyBufferToImage(VkBuffer src_buffer, VkImage dst_image, VkImageLayout dst_image_layout, + Span<VkBufferImageCopy> regions) const noexcept { + dld->vkCmdCopyBufferToImage(handle, src_buffer, dst_image, dst_image_layout, regions.size(), + regions.data()); + } + + void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, Span<VkBufferCopy> regions) const + noexcept { + dld->vkCmdCopyBuffer(handle, src_buffer, dst_buffer, regions.size(), regions.data()); + } + + void CopyImage(VkImage src_image, VkImageLayout src_layout, VkImage dst_image, + VkImageLayout dst_layout, Span<VkImageCopy> regions) const noexcept { + dld->vkCmdCopyImage(handle, src_image, src_layout, dst_image, dst_layout, regions.size(), + regions.data()); + } + + void CopyImageToBuffer(VkImage src_image, VkImageLayout src_layout, VkBuffer dst_buffer, + Span<VkBufferImageCopy> regions) const noexcept { + dld->vkCmdCopyImageToBuffer(handle, src_image, src_layout, dst_buffer, regions.size(), + regions.data()); + } + + void FillBuffer(VkBuffer dst_buffer, VkDeviceSize dst_offset, VkDeviceSize size, u32 data) const + noexcept { + dld->vkCmdFillBuffer(handle, dst_buffer, dst_offset, size, data); + } + + void PushConstants(VkPipelineLayout layout, VkShaderStageFlags flags, u32 offset, u32 size, + const void* values) const noexcept { + dld->vkCmdPushConstants(handle, layout, flags, offset, size, values); + } + + void SetCheckpointNV(const void* checkpoint_marker) const noexcept { + dld->vkCmdSetCheckpointNV(handle, checkpoint_marker); + } + + void SetViewport(u32 first, Span<VkViewport> viewports) const noexcept { + dld->vkCmdSetViewport(handle, first, viewports.size(), viewports.data()); + } + + void SetScissor(u32 first, Span<VkRect2D> scissors) const noexcept { + dld->vkCmdSetScissor(handle, first, scissors.size(), scissors.data()); + } + + void SetBlendConstants(const float blend_constants[4]) const noexcept { + dld->vkCmdSetBlendConstants(handle, blend_constants); + } + + void SetStencilCompareMask(VkStencilFaceFlags face_mask, u32 compare_mask) const noexcept { + dld->vkCmdSetStencilCompareMask(handle, face_mask, compare_mask); + } + + void SetStencilReference(VkStencilFaceFlags face_mask, u32 reference) const noexcept { + dld->vkCmdSetStencilReference(handle, face_mask, reference); + } + + void SetStencilWriteMask(VkStencilFaceFlags face_mask, u32 write_mask) const noexcept { + dld->vkCmdSetStencilWriteMask(handle, face_mask, write_mask); + } + + void SetDepthBias(float constant_factor, float clamp, float slope_factor) const noexcept { + dld->vkCmdSetDepthBias(handle, constant_factor, clamp, slope_factor); + } + + void SetDepthBounds(float min_depth_bounds, float max_depth_bounds) const noexcept { + dld->vkCmdSetDepthBounds(handle, min_depth_bounds, max_depth_bounds); + } + + void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, + const VkDeviceSize* offsets, + const VkDeviceSize* sizes) const noexcept { + dld->vkCmdBindTransformFeedbackBuffersEXT(handle, first, count, buffers, offsets, sizes); + } + + void BeginTransformFeedbackEXT(u32 first_counter_buffer, u32 counter_buffers_count, + const VkBuffer* counter_buffers, + const VkDeviceSize* counter_buffer_offsets) const noexcept { + dld->vkCmdBeginTransformFeedbackEXT(handle, first_counter_buffer, counter_buffers_count, + counter_buffers, counter_buffer_offsets); + } + + void EndTransformFeedbackEXT(u32 first_counter_buffer, u32 counter_buffers_count, + const VkBuffer* counter_buffers, + const VkDeviceSize* counter_buffer_offsets) const noexcept { + dld->vkCmdEndTransformFeedbackEXT(handle, first_counter_buffer, counter_buffers_count, + counter_buffers, counter_buffer_offsets); + } + +private: + VkCommandBuffer handle; + const DeviceDispatch* dld; +}; + +std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties( + const InstanceDispatch& dld); + +} // namespace Vulkan::vk diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index 2fe787d6f..0f4c3103a 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -235,34 +235,30 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { case OpCode::Id::LEA_IMM: case OpCode::Id::LEA_RZ: case OpCode::Id::LEA_HI: { - const auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> { + auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> { switch (opcode->get().GetId()) { case OpCode::Id::LEA_R2: { return {GetRegister(instr.gpr20), GetRegister(instr.gpr39), Immediate(static_cast<u32>(instr.lea.r2.entry_a))}; } - case OpCode::Id::LEA_R1: { const bool neg = instr.lea.r1.neg != 0; return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), GetRegister(instr.gpr20), Immediate(static_cast<u32>(instr.lea.r1.entry_a))}; } - case OpCode::Id::LEA_IMM: { const bool neg = instr.lea.imm.neg != 0; return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; } - case OpCode::Id::LEA_RZ: { const bool neg = instr.lea.rz.neg != 0; return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset), GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), Immediate(static_cast<u32>(instr.lea.rz.entry_a))}; } - case OpCode::Id::LEA_HI: default: UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName()); @@ -275,12 +271,9 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex), "Unhandled LEA Predicate"); - const Node shifted_c = - Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, Immediate(1), op_c); - const Node mul_bc = Operation(OperationCode::IMul, NO_PRECISE, op_b, shifted_c); - const Node value = Operation(OperationCode::IAdd, NO_PRECISE, op_a, mul_bc); - - SetRegister(bb, instr.gpr0, value); + Node value = Operation(OperationCode::ILogicalShiftLeft, std::move(op_a), std::move(op_c)); + value = Operation(OperationCode::IAdd, std::move(op_b), std::move(value)); + SetRegister(bb, instr.gpr0, std::move(value)); break; } diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp index 6ead42070..c72690b2b 100644 --- a/src/video_core/shader/decode/conversion.cpp +++ b/src/video_core/shader/decode/conversion.cpp @@ -138,18 +138,23 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); - value = [&]() { + value = [&] { + if (instr.conversion.src_size != instr.conversion.dst_size) { + // Rounding operations only matter when the source and destination conversion size + // is the same. + return value; + } switch (instr.conversion.f2f.GetRoundingMode()) { case Tegra::Shader::F2fRoundingOp::None: return value; case Tegra::Shader::F2fRoundingOp::Round: - return Operation(OperationCode::FRoundEven, PRECISE, value); + return Operation(OperationCode::FRoundEven, value); case Tegra::Shader::F2fRoundingOp::Floor: - return Operation(OperationCode::FFloor, PRECISE, value); + return Operation(OperationCode::FFloor, value); case Tegra::Shader::F2fRoundingOp::Ceil: - return Operation(OperationCode::FCeil, PRECISE, value); + return Operation(OperationCode::FCeil, value); case Tegra::Shader::F2fRoundingOp::Trunc: - return Operation(OperationCode::FTrunc, PRECISE, value); + return Operation(OperationCode::FTrunc, value); default: UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", static_cast<u32>(instr.conversion.f2f.rounding.Value())); diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index d2fe4ec5d..0dd7a1196 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -13,13 +13,247 @@ #include "video_core/engines/shader_bytecode.h" #include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" +#include "video_core/textures/texture.h" namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; +using Tegra::Shader::PredCondition; +using Tegra::Shader::StoreType; +using Tegra::Texture::ComponentType; +using Tegra::Texture::TextureFormat; +using Tegra::Texture::TICEntry; namespace { + +ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, + std::size_t component) { + const TextureFormat format{descriptor.format}; + switch (format) { + case TextureFormat::R16_G16_B16_A16: + case TextureFormat::R32_G32_B32_A32: + case TextureFormat::R32_G32_B32: + case TextureFormat::R32_G32: + case TextureFormat::R16_G16: + case TextureFormat::R32: + case TextureFormat::R16: + case TextureFormat::R8: + case TextureFormat::R1: + if (component == 0) { + return descriptor.r_type; + } + if (component == 1) { + return descriptor.g_type; + } + if (component == 2) { + return descriptor.b_type; + } + if (component == 3) { + return descriptor.a_type; + } + break; + case TextureFormat::A8R8G8B8: + if (component == 0) { + return descriptor.a_type; + } + if (component == 1) { + return descriptor.r_type; + } + if (component == 2) { + return descriptor.g_type; + } + if (component == 3) { + return descriptor.b_type; + } + break; + case TextureFormat::A2B10G10R10: + case TextureFormat::A4B4G4R4: + case TextureFormat::A5B5G5R1: + case TextureFormat::A1B5G5R5: + if (component == 0) { + return descriptor.a_type; + } + if (component == 1) { + return descriptor.b_type; + } + if (component == 2) { + return descriptor.g_type; + } + if (component == 3) { + return descriptor.r_type; + } + break; + case TextureFormat::R32_B24G8: + if (component == 0) { + return descriptor.r_type; + } + if (component == 1) { + return descriptor.b_type; + } + if (component == 2) { + return descriptor.g_type; + } + break; + case TextureFormat::B5G6R5: + case TextureFormat::B6G5R5: + if (component == 0) { + return descriptor.b_type; + } + if (component == 1) { + return descriptor.g_type; + } + if (component == 2) { + return descriptor.r_type; + } + break; + case TextureFormat::G8R24: + case TextureFormat::G24R8: + case TextureFormat::G8R8: + case TextureFormat::G4R4: + if (component == 0) { + return descriptor.g_type; + } + if (component == 1) { + return descriptor.r_type; + } + break; + } + UNIMPLEMENTED_MSG("texture format not implement={}", format); + return ComponentType::FLOAT; +} + +bool IsComponentEnabled(std::size_t component_mask, std::size_t component) { + constexpr u8 R = 0b0001; + constexpr u8 G = 0b0010; + constexpr u8 B = 0b0100; + constexpr u8 A = 0b1000; + constexpr std::array<u8, 16> mask = { + 0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B), + (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; + return std::bitset<4>{mask.at(component_mask)}.test(component); +} + +u32 GetComponentSize(TextureFormat format, std::size_t component) { + switch (format) { + case TextureFormat::R32_G32_B32_A32: + return 32; + case TextureFormat::R16_G16_B16_A16: + return 16; + case TextureFormat::R32_G32_B32: + return component <= 2 ? 32 : 0; + case TextureFormat::R32_G32: + return component <= 1 ? 32 : 0; + case TextureFormat::R16_G16: + return component <= 1 ? 16 : 0; + case TextureFormat::R32: + return component == 0 ? 32 : 0; + case TextureFormat::R16: + return component == 0 ? 16 : 0; + case TextureFormat::R8: + return component == 0 ? 8 : 0; + case TextureFormat::R1: + return component == 0 ? 1 : 0; + case TextureFormat::A8R8G8B8: + return 8; + case TextureFormat::A2B10G10R10: + return (component == 3 || component == 2 || component == 1) ? 10 : 2; + case TextureFormat::A4B4G4R4: + return 4; + case TextureFormat::A5B5G5R1: + return (component == 0 || component == 1 || component == 2) ? 5 : 1; + case TextureFormat::A1B5G5R5: + return (component == 1 || component == 2 || component == 3) ? 5 : 1; + case TextureFormat::R32_B24G8: + if (component == 0) { + return 32; + } + if (component == 1) { + return 24; + } + if (component == 2) { + return 8; + } + return 0; + case TextureFormat::B5G6R5: + if (component == 0 || component == 2) { + return 5; + } + if (component == 1) { + return 6; + } + return 0; + case TextureFormat::B6G5R5: + if (component == 1 || component == 2) { + return 5; + } + if (component == 0) { + return 6; + } + return 0; + case TextureFormat::G8R24: + if (component == 0) { + return 8; + } + if (component == 1) { + return 24; + } + return 0; + case TextureFormat::G24R8: + if (component == 0) { + return 8; + } + if (component == 1) { + return 24; + } + return 0; + case TextureFormat::G8R8: + return (component == 0 || component == 1) ? 8 : 0; + case TextureFormat::G4R4: + return (component == 0 || component == 1) ? 4 : 0; + default: + UNIMPLEMENTED_MSG("texture format not implement={}", format); + return 0; + } +} + +std::size_t GetImageComponentMask(TextureFormat format) { + constexpr u8 R = 0b0001; + constexpr u8 G = 0b0010; + constexpr u8 B = 0b0100; + constexpr u8 A = 0b1000; + switch (format) { + case TextureFormat::R32_G32_B32_A32: + case TextureFormat::R16_G16_B16_A16: + case TextureFormat::A8R8G8B8: + case TextureFormat::A2B10G10R10: + case TextureFormat::A4B4G4R4: + case TextureFormat::A5B5G5R1: + case TextureFormat::A1B5G5R5: + return std::size_t{R | G | B | A}; + case TextureFormat::R32_G32_B32: + case TextureFormat::R32_B24G8: + case TextureFormat::B5G6R5: + case TextureFormat::B6G5R5: + return std::size_t{R | G | B}; + case TextureFormat::R32_G32: + case TextureFormat::R16_G16: + case TextureFormat::G8R24: + case TextureFormat::G24R8: + case TextureFormat::G8R8: + case TextureFormat::G4R4: + return std::size_t{R | G}; + case TextureFormat::R32: + case TextureFormat::R16: + case TextureFormat::R8: + case TextureFormat::R1: + return std::size_t{R}; + default: + UNIMPLEMENTED_MSG("texture format not implement={}", format); + return std::size_t{R | G | B | A}; + } +} + std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { switch (image_type) { case Tegra::Shader::ImageType::Texture1D: @@ -37,6 +271,39 @@ std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { } } // Anonymous namespace +std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size, + Node original_value) { + switch (component_type) { + case ComponentType::SNORM: { + // range [-1.0, 1.0] + auto cnv_value = Operation(OperationCode::FMul, original_value, + Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f)); + cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value)); + return {BitfieldExtract(std::move(cnv_value), 0, component_size), true}; + } + case ComponentType::SINT: + case ComponentType::UNORM: { + bool is_signed = component_type == ComponentType::SINT; + // range [0.0, 1.0] + auto cnv_value = Operation(OperationCode::FMul, original_value, + Immediate(static_cast<float>(1 << component_size) - 1.f)); + return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)), + is_signed}; + } + case ComponentType::UINT: // range [0, (1 << component_size) - 1] + return {std::move(original_value), false}; + case ComponentType::FLOAT: + if (component_size == 16) { + return {Operation(OperationCode::HCastFloat, original_value), true}; + } else { + return {std::move(original_value), true}; + } + default: + UNIMPLEMENTED_MSG("Unimplement component type={}", component_type); + return {std::move(original_value), true}; + } +} + u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); @@ -53,7 +320,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { switch (opcode->get().GetId()) { case OpCode::Id::SULD: { - UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P); UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore); @@ -62,17 +328,89 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { : GetBindlessImage(instr.gpr39, type)}; image.MarkRead(); - u32 indexer = 0; - for (u32 element = 0; element < 4; ++element) { - if (!instr.suldst.IsComponentEnabled(element)) { - continue; + if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) { + u32 indexer = 0; + for (u32 element = 0; element < 4; ++element) { + if (!instr.suldst.IsComponentEnabled(element)) { + continue; + } + MetaImage meta{image, {}, element}; + Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); + SetTemporary(bb, indexer++, std::move(value)); + } + for (u32 i = 0; i < indexer; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); + } + } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) { + UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 && + instr.suldst.GetStoreDataLayout() != StoreType::Bits64); + + auto descriptor = [this, instr] { + std::optional<Tegra::Engines::SamplerDescriptor> descriptor; + if (instr.suldst.is_immediate) { + descriptor = + registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value())); + } else { + const Node image_register = GetRegister(instr.gpr39); + const auto [base_image, buffer, offset] = TrackCbuf( + image_register, global_code, static_cast<s64>(global_code.size())); + descriptor = registry.ObtainBindlessSampler(buffer, offset); + } + if (!descriptor) { + UNREACHABLE_MSG("Failed to obtain image descriptor"); + } + return *descriptor; + }(); + + const auto comp_mask = GetImageComponentMask(descriptor.format); + + switch (instr.suldst.GetStoreDataLayout()) { + case StoreType::Bits32: + case StoreType::Bits64: { + u32 indexer = 0; + u32 shifted_counter = 0; + Node value = Immediate(0); + for (u32 element = 0; element < 4; ++element) { + if (!IsComponentEnabled(comp_mask, element)) { + continue; + } + const auto component_type = GetComponentType(descriptor, element); + const auto component_size = GetComponentSize(descriptor.format, element); + MetaImage meta{image, {}, element}; + + auto [converted_value, is_signed] = GetComponentValue( + component_type, component_size, + Operation(OperationCode::ImageLoad, meta, GetCoordinates(type))); + + // shift element to correct position + const auto shifted = shifted_counter; + if (shifted > 0) { + converted_value = + SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, + std::move(converted_value), Immediate(shifted)); + } + shifted_counter += component_size; + + // add value into result + value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value)); + + // if we shifted enough for 1 byte -> we save it into temp + if (shifted_counter >= 32) { + SetTemporary(bb, indexer++, std::move(value)); + // reset counter and value to prepare pack next byte + value = Immediate(0); + shifted_counter = 0; + } + } + for (u32 i = 0; i < indexer; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); + } + break; + } + default: + UNREACHABLE(); + break; } - MetaImage meta{image, {}, element}; - Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); - SetTemporary(bb, indexer++, std::move(value)); - } - for (u32 i = 0; i < indexer; ++i) { - SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); } break; } diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index b5fbc4d58..b8f63922f 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -19,7 +19,6 @@ namespace VideoCommon::Shader { using Tegra::Shader::AtomicOp; using Tegra::Shader::AtomicType; using Tegra::Shader::Attribute; -using Tegra::Shader::GlobalAtomicOp; using Tegra::Shader::GlobalAtomicType; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; @@ -28,6 +27,31 @@ using Tegra::Shader::StoreType; namespace { +Node GetAtomOperation(AtomicOp op, bool is_signed, Node memory, Node data) { + const OperationCode operation_code = [op] { + switch (op) { + case AtomicOp::Add: + return OperationCode::AtomicIAdd; + case AtomicOp::Min: + return OperationCode::AtomicIMin; + case AtomicOp::Max: + return OperationCode::AtomicIMax; + case AtomicOp::And: + return OperationCode::AtomicIAnd; + case AtomicOp::Or: + return OperationCode::AtomicIOr; + case AtomicOp::Xor: + return OperationCode::AtomicIXor; + case AtomicOp::Exch: + return OperationCode::AtomicIExchange; + default: + UNIMPLEMENTED_MSG("op={}", static_cast<int>(op)); + return OperationCode::AtomicIAdd; + } + }(); + return SignedOperation(operation_code, is_signed, std::move(memory), std::move(data)); +} + bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { return uniform_type == Tegra::Shader::UniformType::UnsignedByte || uniform_type == Tegra::Shader::UniformType::UnsignedShort; @@ -363,10 +387,13 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::ATOM: { - UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}", - static_cast<int>(instr.atom.operation.Value())); - UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}", - static_cast<int>(instr.atom.type.Value())); + UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || + instr.atom.operation == AtomicOp::Dec || + instr.atom.operation == AtomicOp::SafeAdd, + "operation={}", static_cast<int>(instr.atom.operation.Value())); + UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || + instr.atom.type == GlobalAtomicType::U64, + "type={}", static_cast<int>(instr.atom.type.Value())); const auto [real_address, base_address, descriptor] = TrackGlobalMemory(bb, instr, true, true); @@ -375,25 +402,29 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } + const bool is_signed = + instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); - Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20)); + Node value = GetAtomOperation(static_cast<AtomicOp>(instr.atom.operation), is_signed, gmem, + GetRegister(instr.gpr20)); SetRegister(bb, instr.gpr0, std::move(value)); break; } case OpCode::Id::ATOMS: { - UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}", - static_cast<int>(instr.atoms.operation.Value())); - UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}", - static_cast<int>(instr.atoms.type.Value())); - + UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc || + instr.atoms.operation == AtomicOp::Dec, + "operation={}", static_cast<int>(instr.atoms.operation.Value())); + UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 || + instr.atoms.type == AtomicType::U64, + "type={}", static_cast<int>(instr.atoms.type.Value())); + const bool is_signed = + instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; const s32 offset = instr.atoms.GetImmediateOffset(); Node address = GetRegister(instr.gpr8); address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset)); - - Node memory = GetSharedMemory(std::move(address)); - Node data = GetRegister(instr.gpr20); - - Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data)); + Node value = + GetAtomOperation(static_cast<AtomicOp>(instr.atoms.operation), is_signed, + GetSharedMemory(std::move(address)), GetRegister(instr.gpr20)); SetRegister(bb, instr.gpr0, std::move(value)); break; } diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index 4944e9d69..d4f95b18c 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -11,12 +11,17 @@ namespace VideoCommon::Shader { +using std::move; using Tegra::Shader::ConditionCode; using Tegra::Shader::Instruction; +using Tegra::Shader::IpaInterpMode; using Tegra::Shader::OpCode; +using Tegra::Shader::PixelImap; using Tegra::Shader::Register; using Tegra::Shader::SystemVariable; +using Index = Tegra::Shader::Attribute::Index; + u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); @@ -66,18 +71,24 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { bb.push_back(Operation(OperationCode::Discard)); break; } - case OpCode::Id::MOV_SYS: { + case OpCode::Id::S2R: { const Node value = [this, instr] { switch (instr.sys20) { case SystemVariable::LaneId: - LOG_WARNING(HW_GPU, "MOV_SYS instruction with LaneId is incomplete"); + LOG_WARNING(HW_GPU, "S2R instruction with LaneId is incomplete"); return Immediate(0U); case SystemVariable::InvocationId: return Operation(OperationCode::InvocationId); case SystemVariable::Ydirection: return Operation(OperationCode::YNegate); case SystemVariable::InvocationInfo: - LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); + LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); + return Immediate(0U); + case SystemVariable::WscaleFactorXY: + UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented"); + return Immediate(0U); + case SystemVariable::WscaleFactorZ: + UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented"); return Immediate(0U); case SystemVariable::Tid: { Node value = Immediate(0); @@ -213,27 +224,28 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { } case OpCode::Id::IPA: { const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff; - const auto attribute = instr.attribute.fmt28; - const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(), - instr.ipa.sample_mode.Value()}; + const Index index = attribute.index; Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8) - : GetInputAttribute(attribute.index, attribute.element); - const Tegra::Shader::Attribute::Index index = attribute.index.Value(); - const bool is_generic = index >= Tegra::Shader::Attribute::Index::Attribute_0 && - index <= Tegra::Shader::Attribute::Index::Attribute_31; - if (is_generic || is_physical) { - // TODO(Blinkhawk): There are cases where a perspective attribute use PASS. - // In theory by setting them as perspective, OpenGL does the perspective correction. - // A way must figured to reverse the last step of it. - if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) { - value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20)); + : GetInputAttribute(index, attribute.element); + + // Code taken from Ryujinx. + if (index >= Index::Attribute_0 && index <= Index::Attribute_31) { + const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0); + if (header.ps.GetPixelImap(location) == PixelImap::Perspective) { + Node position_w = GetInputAttribute(Index::Position, 3); + value = Operation(OperationCode::FMul, move(value), move(position_w)); } } - value = GetSaturatedFloat(value, instr.ipa.saturate); - SetRegister(bb, instr.gpr0, value); + if (instr.ipa.interp_mode == IpaInterpMode::Multiply) { + value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20)); + } + + value = GetSaturatedFloat(move(value), instr.ipa.saturate); + + SetRegister(bb, instr.gpr0, move(value)); break; } case OpCode::Id::OUT_R: { diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index a1828546e..5fcc9da60 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -162,7 +162,21 @@ enum class OperationCode { AtomicImageXor, /// (MetaImage, int[N] coords) -> void AtomicImageExchange, /// (MetaImage, int[N] coords) -> void - AtomicAdd, /// (memory, {u}int) -> {u}int + AtomicUExchange, /// (memory, uint) -> uint + AtomicUAdd, /// (memory, uint) -> uint + AtomicUMin, /// (memory, uint) -> uint + AtomicUMax, /// (memory, uint) -> uint + AtomicUAnd, /// (memory, uint) -> uint + AtomicUOr, /// (memory, uint) -> uint + AtomicUXor, /// (memory, uint) -> uint + + AtomicIExchange, /// (memory, int) -> int + AtomicIAdd, /// (memory, int) -> int + AtomicIMin, /// (memory, int) -> int + AtomicIMax, /// (memory, int) -> int + AtomicIAnd, /// (memory, int) -> int + AtomicIOr, /// (memory, int) -> int + AtomicIXor, /// (memory, int) -> int Branch, /// (uint branch_target) -> void BranchIndirect, /// (uint branch_target) -> void diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp index 76c56abb5..7bf4ff387 100644 --- a/src/video_core/shader/node_helper.cpp +++ b/src/video_core/shader/node_helper.cpp @@ -86,6 +86,20 @@ OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) return OperationCode::LogicalUNotEqual; case OperationCode::LogicalIGreaterEqual: return OperationCode::LogicalUGreaterEqual; + case OperationCode::AtomicIExchange: + return OperationCode::AtomicUExchange; + case OperationCode::AtomicIAdd: + return OperationCode::AtomicUAdd; + case OperationCode::AtomicIMin: + return OperationCode::AtomicUMin; + case OperationCode::AtomicIMax: + return OperationCode::AtomicUMax; + case OperationCode::AtomicIAnd: + return OperationCode::AtomicUAnd; + case OperationCode::AtomicIOr: + return OperationCode::AtomicUOr; + case OperationCode::AtomicIXor: + return OperationCode::AtomicUXor; case OperationCode::INegate: UNREACHABLE_MSG("Can't negate an unsigned integer"); return {}; diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index baf7188d2..8852c8a1b 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -359,6 +359,9 @@ Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const { switch (cc) { case Tegra::Shader::ConditionCode::NEU: return GetInternalFlag(InternalFlag::Zero, true); + case Tegra::Shader::ConditionCode::FCSM_TR: + UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented"); + return MakeNode<PredicateNode>(Pred::NeverExecute, false); default: UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc)); return MakeNode<PredicateNode>(Pred::NeverExecute, false); diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 0f1ebef1b..c6e7bdf50 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -312,6 +312,10 @@ private: /// Conditionally saturates a half float pair Node GetSaturatedHalfFloat(Node value, bool saturate = true); + /// Get image component value by type and size + std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type, + u32 component_size, Node original_value); + /// Returns a predicate comparing two floats Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); /// Returns a predicate comparing two integers diff --git a/src/video_core/surface.h b/src/video_core/surface.h index ae8817465..e0acd44d3 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -504,103 +504,6 @@ static constexpr u32 GetBytesPerPixel(PixelFormat pixel_format) { return GetFormatBpp(pixel_format) / CHAR_BIT; } -enum class SurfaceCompression { - None, // Not compressed - Compressed, // Texture is compressed - Converted, // Texture is converted before upload or after download - Rearranged, // Texture is swizzled before upload or after download -}; - -constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table = {{ - SurfaceCompression::None, // ABGR8U - SurfaceCompression::None, // ABGR8S - SurfaceCompression::None, // ABGR8UI - SurfaceCompression::None, // B5G6R5U - SurfaceCompression::None, // A2B10G10R10U - SurfaceCompression::None, // A1B5G5R5U - SurfaceCompression::None, // R8U - SurfaceCompression::None, // R8UI - SurfaceCompression::None, // RGBA16F - SurfaceCompression::None, // RGBA16U - SurfaceCompression::None, // RGBA16S - SurfaceCompression::None, // RGBA16UI - SurfaceCompression::None, // R11FG11FB10F - SurfaceCompression::None, // RGBA32UI - SurfaceCompression::Compressed, // DXT1 - SurfaceCompression::Compressed, // DXT23 - SurfaceCompression::Compressed, // DXT45 - SurfaceCompression::Compressed, // DXN1 - SurfaceCompression::Compressed, // DXN2UNORM - SurfaceCompression::Compressed, // DXN2SNORM - SurfaceCompression::Compressed, // BC7U - SurfaceCompression::Compressed, // BC6H_UF16 - SurfaceCompression::Compressed, // BC6H_SF16 - SurfaceCompression::Converted, // ASTC_2D_4X4 - SurfaceCompression::None, // BGRA8 - SurfaceCompression::None, // RGBA32F - SurfaceCompression::None, // RG32F - SurfaceCompression::None, // R32F - SurfaceCompression::None, // R16F - SurfaceCompression::None, // R16U - SurfaceCompression::None, // R16S - SurfaceCompression::None, // R16UI - SurfaceCompression::None, // R16I - SurfaceCompression::None, // RG16 - SurfaceCompression::None, // RG16F - SurfaceCompression::None, // RG16UI - SurfaceCompression::None, // RG16I - SurfaceCompression::None, // RG16S - SurfaceCompression::None, // RGB32F - SurfaceCompression::None, // RGBA8_SRGB - SurfaceCompression::None, // RG8U - SurfaceCompression::None, // RG8S - SurfaceCompression::None, // RG32UI - SurfaceCompression::None, // RGBX16F - SurfaceCompression::None, // R32UI - SurfaceCompression::None, // R32I - SurfaceCompression::Converted, // ASTC_2D_8X8 - SurfaceCompression::Converted, // ASTC_2D_8X5 - SurfaceCompression::Converted, // ASTC_2D_5X4 - SurfaceCompression::None, // BGRA8_SRGB - SurfaceCompression::Compressed, // DXT1_SRGB - SurfaceCompression::Compressed, // DXT23_SRGB - SurfaceCompression::Compressed, // DXT45_SRGB - SurfaceCompression::Compressed, // BC7U_SRGB - SurfaceCompression::None, // R4G4B4A4U - SurfaceCompression::Converted, // ASTC_2D_4X4_SRGB - SurfaceCompression::Converted, // ASTC_2D_8X8_SRGB - SurfaceCompression::Converted, // ASTC_2D_8X5_SRGB - SurfaceCompression::Converted, // ASTC_2D_5X4_SRGB - SurfaceCompression::Converted, // ASTC_2D_5X5 - SurfaceCompression::Converted, // ASTC_2D_5X5_SRGB - SurfaceCompression::Converted, // ASTC_2D_10X8 - SurfaceCompression::Converted, // ASTC_2D_10X8_SRGB - SurfaceCompression::Converted, // ASTC_2D_6X6 - SurfaceCompression::Converted, // ASTC_2D_6X6_SRGB - SurfaceCompression::Converted, // ASTC_2D_10X10 - SurfaceCompression::Converted, // ASTC_2D_10X10_SRGB - SurfaceCompression::Converted, // ASTC_2D_12X12 - SurfaceCompression::Converted, // ASTC_2D_12X12_SRGB - SurfaceCompression::Converted, // ASTC_2D_8X6 - SurfaceCompression::Converted, // ASTC_2D_8X6_SRGB - SurfaceCompression::Converted, // ASTC_2D_6X5 - SurfaceCompression::Converted, // ASTC_2D_6X5_SRGB - SurfaceCompression::None, // E5B9G9R9F - SurfaceCompression::None, // Z32F - SurfaceCompression::None, // Z16 - SurfaceCompression::None, // Z24S8 - SurfaceCompression::Rearranged, // S8Z24 - SurfaceCompression::None, // Z32FS8 -}}; - -constexpr SurfaceCompression GetFormatCompressionType(PixelFormat format) { - if (format == PixelFormat::Invalid) { - return SurfaceCompression::None; - } - DEBUG_ASSERT(static_cast<std::size_t>(format) < compression_type_table.size()); - return compression_type_table[static_cast<std::size_t>(format)]; -} - SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type); bool SurfaceTargetIsLayered(SurfaceTarget target); diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 002df414f..7af0e792c 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -18,15 +18,20 @@ MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, using Tegra::Texture::ConvertFromGuestToHost; using VideoCore::MortonSwizzleMode; -using VideoCore::Surface::SurfaceCompression; +using VideoCore::Surface::IsPixelFormatASTC; +using VideoCore::Surface::PixelFormat; StagingCache::StagingCache() = default; StagingCache::~StagingCache() = default; -SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) - : params{params}, host_memory_size{params.GetHostSizeInBytes()}, gpu_addr{gpu_addr}, - mipmap_sizes(params.num_levels), mipmap_offsets(params.num_levels) { +SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params, + bool is_astc_supported) + : params{params}, gpu_addr{gpu_addr}, mipmap_sizes(params.num_levels), + mipmap_offsets(params.num_levels) { + is_converted = IsPixelFormatASTC(params.pixel_format) && !is_astc_supported; + host_memory_size = params.GetHostSizeInBytes(is_converted); + std::size_t offset = 0; for (u32 level = 0; level < params.num_levels; ++level) { const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; @@ -164,7 +169,7 @@ void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const Surf std::size_t guest_offset{mipmap_offsets[level]}; if (params.is_layered) { - std::size_t host_offset{0}; + std::size_t host_offset = 0; const std::size_t guest_stride = layer_size; const std::size_t host_stride = params.GetHostLayerSize(level); for (u32 layer = 0; layer < params.depth; ++layer) { @@ -185,28 +190,17 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, MICROPROFILE_SCOPE(GPU_Load_Texture); auto& staging_buffer = staging_cache.GetBuffer(0); u8* host_ptr; - is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size); - - // Handle continuouty - if (is_continuous) { - // Use physical memory directly - host_ptr = memory_manager.GetPointer(gpu_addr); - if (!host_ptr) { - return; - } - } else { - // Use an extra temporal buffer - auto& tmp_buffer = staging_cache.GetBuffer(1); - tmp_buffer.resize(guest_memory_size); - host_ptr = tmp_buffer.data(); - memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); - } + // Use an extra temporal buffer + auto& tmp_buffer = staging_cache.GetBuffer(1); + tmp_buffer.resize(guest_memory_size); + host_ptr = tmp_buffer.data(); + memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); if (params.is_tiled) { ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", params.block_width, static_cast<u32>(params.target)); for (u32 level = 0; level < params.num_levels; ++level) { - const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; + const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)}; SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, staging_buffer.data() + host_offset, level); } @@ -219,7 +213,7 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, const u32 height{(params.height + block_height - 1) / block_height}; const u32 copy_size{width * bpp}; if (params.pitch == copy_size) { - std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes()); + std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes(false)); } else { const u8* start{host_ptr}; u8* write_to{staging_buffer.data()}; @@ -231,19 +225,15 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, } } - auto compression_type = params.GetCompressionType(); - if (compression_type == SurfaceCompression::None || - compression_type == SurfaceCompression::Compressed) + if (!is_converted && params.pixel_format != PixelFormat::S8Z24) { return; + } - for (u32 level_up = params.num_levels; level_up > 0; --level_up) { - const u32 level = level_up - 1; - const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level)}; - const std::size_t out_host_offset = compression_type == SurfaceCompression::Rearranged - ? in_host_offset - : params.GetConvertedMipmapOffset(level); - u8* in_buffer = staging_buffer.data() + in_host_offset; - u8* out_buffer = staging_buffer.data() + out_host_offset; + for (u32 level = params.num_levels; level--;) { + const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level, false)}; + const std::size_t out_host_offset{params.GetHostMipmapLevelOffset(level, is_converted)}; + u8* const in_buffer = staging_buffer.data() + in_host_offset; + u8* const out_buffer = staging_buffer.data() + out_host_offset; ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format, params.GetMipWidth(level), params.GetMipHeight(level), params.GetMipDepth(level), true, true); @@ -256,24 +246,15 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, auto& staging_buffer = staging_cache.GetBuffer(0); u8* host_ptr; - // Handle continuouty - if (is_continuous) { - // Use physical memory directly - host_ptr = memory_manager.GetPointer(gpu_addr); - if (!host_ptr) { - return; - } - } else { - // Use an extra temporal buffer - auto& tmp_buffer = staging_cache.GetBuffer(1); - tmp_buffer.resize(guest_memory_size); - host_ptr = tmp_buffer.data(); - } + // Use an extra temporal buffer + auto& tmp_buffer = staging_cache.GetBuffer(1); + tmp_buffer.resize(guest_memory_size); + host_ptr = tmp_buffer.data(); if (params.is_tiled) { ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); for (u32 level = 0; level < params.num_levels; ++level) { - const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; + const std::size_t host_offset{params.GetHostMipmapLevelOffset(level, false)}; SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, staging_buffer.data() + host_offset, level); } @@ -299,9 +280,7 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, } } } - if (!is_continuous) { - memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); - } + memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); } } // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 5f79bb0aa..a39a8661b 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -68,8 +68,8 @@ public: return gpu_addr; } - bool Overlaps(const CacheAddr start, const CacheAddr end) const { - return (cache_addr < end) && (cache_addr_end > start); + bool Overlaps(const VAddr start, const VAddr end) const { + return (cpu_addr < end) && (cpu_addr_end > start); } bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) { @@ -86,21 +86,13 @@ public: return cpu_addr; } - void SetCpuAddr(const VAddr new_addr) { - cpu_addr = new_addr; - } - - CacheAddr GetCacheAddr() const { - return cache_addr; + VAddr GetCpuAddrEnd() const { + return cpu_addr_end; } - CacheAddr GetCacheAddrEnd() const { - return cache_addr_end; - } - - void SetCacheAddr(const CacheAddr new_addr) { - cache_addr = new_addr; - cache_addr_end = new_addr + guest_memory_size; + void SetCpuAddr(const VAddr new_addr) { + cpu_addr = new_addr; + cpu_addr_end = new_addr + guest_memory_size; } const SurfaceParams& GetSurfaceParams() const { @@ -119,18 +111,14 @@ public: return mipmap_sizes[level]; } - void MarkAsContinuous(const bool is_continuous) { - this->is_continuous = is_continuous; - } - - bool IsContinuous() const { - return is_continuous; - } - bool IsLinear() const { return !params.is_tiled; } + bool IsConverted() const { + return is_converted; + } + bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const { return params.pixel_format == pixel_format; } @@ -160,7 +148,8 @@ public: } protected: - explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params); + explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params, + bool is_astc_supported); ~SurfaceBaseImpl() = default; virtual void DecorateSurfaceName() = 0; @@ -168,12 +157,11 @@ protected: const SurfaceParams params; std::size_t layer_size; std::size_t guest_memory_size; - const std::size_t host_memory_size; + std::size_t host_memory_size; GPUVAddr gpu_addr{}; - CacheAddr cache_addr{}; - CacheAddr cache_addr_end{}; VAddr cpu_addr{}; - bool is_continuous{}; + VAddr cpu_addr_end{}; + bool is_converted{}; std::vector<std::size_t> mipmap_sizes; std::vector<std::size_t> mipmap_offsets; @@ -288,8 +276,9 @@ public: } protected: - explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params) - : SurfaceBaseImpl(gpu_addr, params) {} + explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params, + bool is_astc_supported) + : SurfaceBaseImpl(gpu_addr, params, is_astc_supported) {} ~SurfaceBase() = default; diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 9931c5ef7..6f3ef45be 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -113,10 +113,8 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta params.height = tic.Height(); params.depth = tic.Depth(); params.pitch = params.is_tiled ? 0 : tic.Pitch(); - if (params.target == SurfaceTarget::Texture2D && params.depth > 1) { - params.depth = 1; - } else if (params.target == SurfaceTarget::TextureCubemap || - params.target == SurfaceTarget::TextureCubeArray) { + if (params.target == SurfaceTarget::TextureCubemap || + params.target == SurfaceTarget::TextureCubeArray) { params.depth *= 6; } params.num_levels = tic.max_mip_level + 1; @@ -309,28 +307,26 @@ std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { return offset; } -std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const { - std::size_t offset = 0; - for (u32 i = 0; i < level; i++) { - offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers(); - } - return offset; -} - -std::size_t SurfaceParams::GetConvertedMipmapOffset(u32 level) const { +std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level, bool is_converted) const { std::size_t offset = 0; - for (u32 i = 0; i < level; i++) { - offset += GetConvertedMipmapSize(i); + if (is_converted) { + for (u32 i = 0; i < level; ++i) { + offset += GetConvertedMipmapSize(i) * GetNumLayers(); + } + } else { + for (u32 i = 0; i < level; ++i) { + offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers(); + } } return offset; } std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { constexpr std::size_t rgba8_bpp = 4ULL; - const std::size_t width_t = GetMipWidth(level); - const std::size_t height_t = GetMipHeight(level); - const std::size_t depth_t = is_layered ? depth : GetMipDepth(level); - return width_t * height_t * depth_t * rgba8_bpp; + const std::size_t mip_width = GetMipWidth(level); + const std::size_t mip_height = GetMipHeight(level); + const std::size_t mip_depth = is_layered ? 1 : GetMipDepth(level); + return mip_width * mip_height * mip_depth * rgba8_bpp; } std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 995cc3818..24957df8d 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -20,8 +20,6 @@ namespace VideoCommon { class FormatLookupTable; -using VideoCore::Surface::SurfaceCompression; - class SurfaceParams { public: /// Creates SurfaceCachedParams from a texture configuration. @@ -67,16 +65,14 @@ public: return GetInnerMemorySize(false, false, false); } - std::size_t GetHostSizeInBytes() const { - std::size_t host_size_in_bytes; - if (GetCompressionType() == SurfaceCompression::Converted) { - // ASTC is uncompressed in software, in emulated as RGBA8 - host_size_in_bytes = 0; - for (u32 level = 0; level < num_levels; ++level) { - host_size_in_bytes += GetConvertedMipmapSize(level); - } - } else { - host_size_in_bytes = GetInnerMemorySize(true, false, false); + std::size_t GetHostSizeInBytes(bool is_converted) const { + if (!is_converted) { + return GetInnerMemorySize(true, false, false); + } + // ASTC is uncompressed in software, in emulated as RGBA8 + std::size_t host_size_in_bytes = 0; + for (u32 level = 0; level < num_levels; ++level) { + host_size_in_bytes += GetConvertedMipmapSize(level) * GetNumLayers(); } return host_size_in_bytes; } @@ -107,9 +103,8 @@ public: u32 GetMipBlockDepth(u32 level) const; /// Returns the best possible row/pitch alignment for the surface. - u32 GetRowAlignment(u32 level) const { - const u32 bpp = - GetCompressionType() == SurfaceCompression::Converted ? 4 : GetBytesPerPixel(); + u32 GetRowAlignment(u32 level, bool is_converted) const { + const u32 bpp = is_converted ? 4 : GetBytesPerPixel(); return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp); } @@ -117,11 +112,7 @@ public: std::size_t GetGuestMipmapLevelOffset(u32 level) const; /// Returns the offset in bytes in host memory (linear) of a given mipmap level. - std::size_t GetHostMipmapLevelOffset(u32 level) const; - - /// Returns the offset in bytes in host memory (linear) of a given mipmap level - /// for a texture that is converted in host gpu. - std::size_t GetConvertedMipmapOffset(u32 level) const; + std::size_t GetHostMipmapLevelOffset(u32 level, bool is_converted) const; /// Returns the size in bytes in guest memory of a given mipmap level. std::size_t GetGuestMipmapSize(u32 level) const { @@ -196,11 +187,6 @@ public: pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; } - /// Returns how the compression should be handled for this texture. - SurfaceCompression GetCompressionType() const { - return VideoCore::Surface::GetFormatCompressionType(pixel_format); - } - /// Returns is the surface is a TextureBuffer type of surface. bool IsBuffer() const { return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 6cdbe63d0..88fe3e25f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -52,11 +52,9 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; template <typename TSurface, typename TView> class TextureCache { - using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>; - using IntervalType = typename IntervalMap::interval_type; public: - void InvalidateRegion(CacheAddr addr, std::size_t size) { + void InvalidateRegion(VAddr addr, std::size_t size) { std::lock_guard lock{mutex}; for (const auto& surface : GetSurfacesInRegion(addr, size)) { @@ -76,7 +74,7 @@ public: guard_samplers = new_guard; } - void FlushRegion(CacheAddr addr, std::size_t size) { + void FlushRegion(VAddr addr, std::size_t size) { std::lock_guard lock{mutex}; auto surfaces = GetSurfacesInRegion(addr, size); @@ -99,9 +97,9 @@ public: return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); } - const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; - const auto cache_addr{ToCacheAddr(host_ptr)}; - if (!cache_addr) { + const std::optional<VAddr> cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); } @@ -110,7 +108,7 @@ public: } const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; - const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); + const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); if (guard_samplers) { sampled_textures.push_back(surface); } @@ -124,13 +122,13 @@ public: if (!gpu_addr) { return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); } - const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; - const auto cache_addr{ToCacheAddr(host_ptr)}; - if (!cache_addr) { + const std::optional<VAddr> cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); } const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; - const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); + const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); if (guard_samplers) { sampled_textures.push_back(surface); } @@ -159,14 +157,14 @@ public: SetEmptyDepthBuffer(); return {}; } - const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; - const auto cache_addr{ToCacheAddr(host_ptr)}; - if (!cache_addr) { + const std::optional<VAddr> cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { SetEmptyDepthBuffer(); return {}; } const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)}; - auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true); + auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true); if (depth_buffer.target) depth_buffer.target->MarkAsRenderTarget(false, NO_RT); depth_buffer.target = surface_view.first; @@ -199,15 +197,15 @@ public: return {}; } - const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; - const auto cache_addr{ToCacheAddr(host_ptr)}; - if (!cache_addr) { + const std::optional<VAddr> cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { SetEmptyColorBuffer(index); return {}; } auto surface_view = - GetSurface(gpu_addr, cache_addr, SurfaceParams::CreateForFramebuffer(system, index), + GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index), preserve_contents, true); if (render_targets[index].target) render_targets[index].target->MarkAsRenderTarget(false, NO_RT); @@ -257,27 +255,26 @@ public: const GPUVAddr src_gpu_addr = src_config.Address(); const GPUVAddr dst_gpu_addr = dst_config.Address(); DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); - const auto dst_host_ptr{system.GPU().MemoryManager().GetPointer(dst_gpu_addr)}; - const auto dst_cache_addr{ToCacheAddr(dst_host_ptr)}; - const auto src_host_ptr{system.GPU().MemoryManager().GetPointer(src_gpu_addr)}; - const auto src_cache_addr{ToCacheAddr(src_host_ptr)}; + const std::optional<VAddr> dst_cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr); + const std::optional<VAddr> src_cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr); std::pair<TSurface, TView> dst_surface = - GetSurface(dst_gpu_addr, dst_cache_addr, dst_params, true, false); + GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); std::pair<TSurface, TView> src_surface = - GetSurface(src_gpu_addr, src_cache_addr, src_params, true, false); + GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false); ImageBlit(src_surface.second, dst_surface.second, copy_config); dst_surface.first->MarkAsModified(true, Tick()); } - TSurface TryFindFramebufferSurface(const u8* host_ptr) { - const CacheAddr cache_addr = ToCacheAddr(host_ptr); - if (!cache_addr) { + TSurface TryFindFramebufferSurface(VAddr addr) { + if (!addr) { return nullptr; } - const CacheAddr page = cache_addr >> registry_page_bits; + const VAddr page = addr >> registry_page_bits; std::vector<TSurface>& list = registry[page]; for (auto& surface : list) { - if (surface->GetCacheAddr() == cache_addr) { + if (surface->GetCpuAddr() == addr) { return surface; } } @@ -289,8 +286,9 @@ public: } protected: - TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) - : system{system}, rasterizer{rasterizer} { + explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + bool is_astc_supported) + : system{system}, is_astc_supported{is_astc_supported}, rasterizer{rasterizer} { for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { SetEmptyColorBuffer(i); } @@ -337,18 +335,14 @@ protected: void Register(TSurface surface) { const GPUVAddr gpu_addr = surface->GetGpuAddr(); - const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); const std::size_t size = surface->GetSizeInBytes(); const std::optional<VAddr> cpu_addr = system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); - if (!cache_ptr || !cpu_addr) { + if (!cpu_addr) { LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", gpu_addr); return; } - const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size); - surface->MarkAsContinuous(continuous); - surface->SetCacheAddr(cache_ptr); surface->SetCpuAddr(*cpu_addr); RegisterInnerCache(surface); surface->MarkAsRegistered(true); @@ -381,6 +375,7 @@ protected: } Core::System& system; + const bool is_astc_supported; private: enum class RecycleStrategy : u32 { @@ -632,7 +627,7 @@ private: std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, const SurfaceParams& params, const GPUVAddr gpu_addr, - const CacheAddr cache_addr, + const VAddr cpu_addr, bool preserve_contents) { if (params.target == SurfaceTarget::Texture3D) { bool failed = false; @@ -657,7 +652,7 @@ private: failed = true; break; } - const u32 offset = static_cast<u32>(surface->GetCacheAddr() - cache_addr); + const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); const auto [x, y, z] = params.GetBlockOffsetXYZ(offset); modified |= surface->IsModified(); const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, @@ -677,7 +672,7 @@ private: } else { for (const auto& surface : overlaps) { if (!surface->MatchTarget(params.target)) { - if (overlaps.size() == 1 && surface->GetCacheAddr() == cache_addr) { + if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { if (Settings::values.use_accurate_gpu_emulation) { return std::nullopt; } @@ -686,7 +681,7 @@ private: } return std::nullopt; } - if (surface->GetCacheAddr() != cache_addr) { + if (surface->GetCpuAddr() != cpu_addr) { continue; } if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { @@ -720,13 +715,13 @@ private: * left blank. * @param is_render Whether or not the surface is a render target. **/ - std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const CacheAddr cache_addr, + std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr, const SurfaceParams& params, bool preserve_contents, bool is_render) { // Step 1 // Check Level 1 Cache for a fast structural match. If candidate surface // matches at certain level we are pretty much done. - if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { + if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) { TSurface& current_surface = iter->second; const auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { @@ -753,7 +748,7 @@ private: // Step 2 // Obtain all possible overlaps in the memory region const std::size_t candidate_size = params.GetGuestSizeInBytes(); - auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; + auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)}; // If none are found, we are done. we just load the surface and create it. if (overlaps.empty()) { @@ -775,7 +770,7 @@ private: // Check if it's a 3D texture if (params.block_depth > 0) { auto surface = - Manage3DSurfaces(overlaps, params, gpu_addr, cache_addr, preserve_contents); + Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); if (surface) { return *surface; } @@ -850,16 +845,16 @@ private: * @param params The parameters on the candidate surface. **/ Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { - const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; - const auto cache_addr{ToCacheAddr(host_ptr)}; + const std::optional<VAddr> cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); - if (!cache_addr) { + if (!cpu_addr) { Deduction result{}; result.type = DeductionType::DeductionFailed; return result; } - if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { + if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) { TSurface& current_surface = iter->second; const auto topological_result = current_surface->MatchesTopology(params); if (topological_result != MatchTopologyResult::FullMatch) { @@ -878,7 +873,7 @@ private: } const std::size_t candidate_size = params.GetGuestSizeInBytes(); - auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; + auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)}; if (overlaps.empty()) { Deduction result{}; @@ -1022,10 +1017,10 @@ private: } void RegisterInnerCache(TSurface& surface) { - const CacheAddr cache_addr = surface->GetCacheAddr(); - CacheAddr start = cache_addr >> registry_page_bits; - const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; - l1_cache[cache_addr] = surface; + const VAddr cpu_addr = surface->GetCpuAddr(); + VAddr start = cpu_addr >> registry_page_bits; + const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; + l1_cache[cpu_addr] = surface; while (start <= end) { registry[start].push_back(surface); start++; @@ -1033,10 +1028,10 @@ private: } void UnregisterInnerCache(TSurface& surface) { - const CacheAddr cache_addr = surface->GetCacheAddr(); - CacheAddr start = cache_addr >> registry_page_bits; - const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; - l1_cache.erase(cache_addr); + const VAddr cpu_addr = surface->GetCpuAddr(); + VAddr start = cpu_addr >> registry_page_bits; + const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; + l1_cache.erase(cpu_addr); while (start <= end) { auto& reg{registry[start]}; reg.erase(std::find(reg.begin(), reg.end(), surface)); @@ -1044,18 +1039,18 @@ private: } } - std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) { + std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { if (size == 0) { return {}; } - const CacheAddr cache_addr_end = cache_addr + size; - CacheAddr start = cache_addr >> registry_page_bits; - const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits; + const VAddr cpu_addr_end = cpu_addr + size; + VAddr start = cpu_addr >> registry_page_bits; + const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; std::vector<TSurface> surfaces; while (start <= end) { std::vector<TSurface>& list = registry[start]; for (auto& surface : list) { - if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) { + if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) { surface->MarkAsPicked(true); surfaces.push_back(surface); } @@ -1144,14 +1139,14 @@ private: // large in size. static constexpr u64 registry_page_bits{20}; static constexpr u64 registry_page_size{1 << registry_page_bits}; - std::unordered_map<CacheAddr, std::vector<TSurface>> registry; + std::unordered_map<VAddr, std::vector<TSurface>> registry; static constexpr u32 DEPTH_RT = 8; static constexpr u32 NO_RT = 0xFFFFFFFF; // The L1 Cache is used for fast texture lookup before checking the overlaps // This avoids calculating size and other stuffs. - std::unordered_map<CacheAddr, TSurface> l1_cache; + std::unordered_map<VAddr, TSurface> l1_cache; /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have /// previously been used. This is to prevent surfaces from being constantly created and diff --git a/src/video_core/textures/texture.cpp b/src/video_core/textures/texture.cpp new file mode 100644 index 000000000..d1939d744 --- /dev/null +++ b/src/video_core/textures/texture.cpp @@ -0,0 +1,80 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> + +#include "core/settings.h" +#include "video_core/textures/texture.h" + +namespace Tegra::Texture { + +namespace { + +constexpr std::array<float, 256> SRGB_CONVERSION_LUT = { + 0.000000f, 0.000000f, 0.000000f, 0.000012f, 0.000021f, 0.000033f, 0.000046f, 0.000062f, + 0.000081f, 0.000102f, 0.000125f, 0.000151f, 0.000181f, 0.000214f, 0.000251f, 0.000293f, + 0.000338f, 0.000388f, 0.000443f, 0.000503f, 0.000568f, 0.000639f, 0.000715f, 0.000798f, + 0.000887f, 0.000983f, 0.001085f, 0.001195f, 0.001312f, 0.001437f, 0.001569f, 0.001710f, + 0.001860f, 0.002019f, 0.002186f, 0.002364f, 0.002551f, 0.002748f, 0.002955f, 0.003174f, + 0.003403f, 0.003643f, 0.003896f, 0.004160f, 0.004436f, 0.004725f, 0.005028f, 0.005343f, + 0.005672f, 0.006015f, 0.006372f, 0.006744f, 0.007130f, 0.007533f, 0.007950f, 0.008384f, + 0.008834f, 0.009301f, 0.009785f, 0.010286f, 0.010805f, 0.011342f, 0.011898f, 0.012472f, + 0.013066f, 0.013680f, 0.014313f, 0.014967f, 0.015641f, 0.016337f, 0.017054f, 0.017793f, + 0.018554f, 0.019337f, 0.020144f, 0.020974f, 0.021828f, 0.022706f, 0.023609f, 0.024536f, + 0.025489f, 0.026468f, 0.027473f, 0.028504f, 0.029563f, 0.030649f, 0.031762f, 0.032904f, + 0.034074f, 0.035274f, 0.036503f, 0.037762f, 0.039050f, 0.040370f, 0.041721f, 0.043103f, + 0.044518f, 0.045964f, 0.047444f, 0.048956f, 0.050503f, 0.052083f, 0.053699f, 0.055349f, + 0.057034f, 0.058755f, 0.060513f, 0.062307f, 0.064139f, 0.066008f, 0.067915f, 0.069861f, + 0.071845f, 0.073869f, 0.075933f, 0.078037f, 0.080182f, 0.082369f, 0.084597f, 0.086867f, + 0.089180f, 0.091535f, 0.093935f, 0.096378f, 0.098866f, 0.101398f, 0.103977f, 0.106601f, + 0.109271f, 0.111988f, 0.114753f, 0.117565f, 0.120426f, 0.123335f, 0.126293f, 0.129301f, + 0.132360f, 0.135469f, 0.138629f, 0.141841f, 0.145105f, 0.148421f, 0.151791f, 0.155214f, + 0.158691f, 0.162224f, 0.165810f, 0.169453f, 0.173152f, 0.176907f, 0.180720f, 0.184589f, + 0.188517f, 0.192504f, 0.196549f, 0.200655f, 0.204820f, 0.209046f, 0.213334f, 0.217682f, + 0.222093f, 0.226567f, 0.231104f, 0.235704f, 0.240369f, 0.245099f, 0.249894f, 0.254754f, + 0.259681f, 0.264674f, 0.269736f, 0.274864f, 0.280062f, 0.285328f, 0.290664f, 0.296070f, + 0.301546f, 0.307094f, 0.312713f, 0.318404f, 0.324168f, 0.330006f, 0.335916f, 0.341902f, + 0.347962f, 0.354097f, 0.360309f, 0.366597f, 0.372961f, 0.379403f, 0.385924f, 0.392524f, + 0.399202f, 0.405960f, 0.412798f, 0.419718f, 0.426719f, 0.433802f, 0.440967f, 0.448216f, + 0.455548f, 0.462965f, 0.470465f, 0.478052f, 0.485725f, 0.493484f, 0.501329f, 0.509263f, + 0.517285f, 0.525396f, 0.533595f, 0.541885f, 0.550265f, 0.558736f, 0.567299f, 0.575954f, + 0.584702f, 0.593542f, 0.602477f, 0.611507f, 0.620632f, 0.629852f, 0.639168f, 0.648581f, + 0.658092f, 0.667700f, 0.677408f, 0.687214f, 0.697120f, 0.707127f, 0.717234f, 0.727443f, + 0.737753f, 0.748167f, 0.758685f, 0.769305f, 0.780031f, 0.790861f, 0.801798f, 0.812839f, + 0.823989f, 0.835246f, 0.846611f, 0.858085f, 0.869668f, 0.881360f, 0.893164f, 0.905078f, + 0.917104f, 0.929242f, 0.941493f, 0.953859f, 0.966338f, 1.000000f, 1.000000f, 1.000000f, +}; + +unsigned SettingsMinimumAnisotropy() noexcept { + switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) { + default: + case Anisotropy::Default: + return 1U; + case Anisotropy::Filter2x: + return 2U; + case Anisotropy::Filter4x: + return 4U; + case Anisotropy::Filter8x: + return 8U; + case Anisotropy::Filter16x: + return 16U; + } +} + +} // Anonymous namespace + +std::array<float, 4> TSCEntry::GetBorderColor() const noexcept { + if (!srgb_conversion) { + return border_color; + } + return {SRGB_CONVERSION_LUT[srgb_border_color_r], SRGB_CONVERSION_LUT[srgb_border_color_g], + SRGB_CONVERSION_LUT[srgb_border_color_b], border_color[3]}; +} + +float TSCEntry::GetMaxAnisotropy() const noexcept { + return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy())); +} + +} // namespace Tegra::Texture diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h index 7edc4abe1..eba05aced 100644 --- a/src/video_core/textures/texture.h +++ b/src/video_core/textures/texture.h @@ -8,7 +8,6 @@ #include "common/assert.h" #include "common/bit_field.h" #include "common/common_types.h" -#include "core/settings.h" namespace Tegra::Texture { @@ -132,6 +131,20 @@ enum class SwizzleSource : u32 { OneFloat = 7, }; +enum class MsaaMode : u32 { + Msaa1x1 = 0, + Msaa2x1 = 1, + Msaa2x2 = 2, + Msaa4x2 = 3, + Msaa4x2_D3D = 4, + Msaa2x1_D3D = 5, + Msaa4x4 = 6, + Msaa2x2_VC4 = 8, + Msaa2x2_VC12 = 9, + Msaa4x2_VC8 = 10, + Msaa4x2_VC24 = 11, +}; + union TextureHandle { TextureHandle(u32 raw) : raw{raw} {} @@ -198,6 +211,7 @@ struct TICEntry { union { BitField<0, 4, u32> res_min_mip_level; BitField<4, 4, u32> res_max_mip_level; + BitField<8, 4, MsaaMode> msaa_mode; BitField<12, 12, u32> min_lod_clamp; }; @@ -336,24 +350,9 @@ struct TSCEntry { std::array<u8, 0x20> raw; }; - float GetMaxAnisotropy() const { - const u32 min_value = [] { - switch (static_cast<Anisotropy>(Settings::values.max_anisotropy)) { - default: - case Anisotropy::Default: - return 1U; - case Anisotropy::Filter2x: - return 2U; - case Anisotropy::Filter4x: - return 4U; - case Anisotropy::Filter8x: - return 8U; - case Anisotropy::Filter16x: - return 16U; - } - }(); - return static_cast<float>(std::max(1U << max_anisotropy, min_value)); - } + std::array<float, 4> GetBorderColor() const noexcept; + + float GetMaxAnisotropy() const noexcept; float GetMinLod() const { return static_cast<float>(min_lod_clamp) / 256.0f; @@ -368,15 +367,6 @@ struct TSCEntry { constexpr u32 mask = 1U << (13 - 1); return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f; } - - std::array<float, 4> GetBorderColor() const { - if (srgb_conversion) { - return {static_cast<float>(srgb_border_color_r) / 255.0f, - static_cast<float>(srgb_border_color_g) / 255.0f, - static_cast<float>(srgb_border_color_b) / 255.0f, border_color[3]}; - } - return border_color; - } }; static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size"); diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index a5f81a8a0..f60bdc60a 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -15,13 +15,13 @@ #endif #include "video_core/video_core.h" -namespace VideoCore { - -std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window, - Core::System& system) { +namespace { +std::unique_ptr<VideoCore::RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window, + Core::System& system, + Core::Frontend::GraphicsContext& context) { switch (Settings::values.renderer_backend) { case Settings::RendererBackend::OpenGL: - return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system); + return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system, context); #ifdef HAS_VULKAN case Settings::RendererBackend::Vulkan: return std::make_unique<Vulkan::RendererVulkan>(emu_window, system); @@ -30,13 +30,23 @@ std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_wind return nullptr; } } +} // Anonymous namespace -std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system) { - if (Settings::values.use_asynchronous_gpu_emulation) { - return std::make_unique<VideoCommon::GPUAsynch>(system, system.Renderer()); +namespace VideoCore { + +std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { + auto context = emu_window.CreateSharedContext(); + const auto scope = context->Acquire(); + auto renderer = CreateRenderer(emu_window, system, *context); + if (!renderer->Init()) { + return nullptr; } - return std::make_unique<VideoCommon::GPUSynch>(system, system.Renderer()); + if (Settings::values.use_asynchronous_gpu_emulation) { + return std::make_unique<VideoCommon::GPUAsynch>(system, std::move(renderer), + std::move(context)); + } + return std::make_unique<VideoCommon::GPUSynch>(system, std::move(renderer), std::move(context)); } u16 GetResolutionScaleFactor(const RendererBase& renderer) { diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index b8e0ac372..f5c27125d 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -22,17 +22,8 @@ namespace VideoCore { class RendererBase; -/** - * Creates a renderer instance. - * - * @note The returned renderer instance is simply allocated. Its Init() - * function still needs to be called to fully complete its setup. - */ -std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window, - Core::System& system); - /// Creates an emulated GPU instance using the given system context. -std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system); +std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system); u16 GetResolutionScaleFactor(const RendererBase& renderer); |