From 139ea93512aeead8a4aee3910a3de86eb109a838 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 5 Nov 2021 15:52:31 +0100 Subject: VideoCore: implement channels on gpu caches. --- src/video_core/texture_cache/texture_cache.h | 209 ++++++++++++++++++--------- 1 file changed, 137 insertions(+), 72 deletions(-) (limited to 'src/video_core/texture_cache/texture_cache.h') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 1dbe01bc0..2731aead0 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -7,6 +7,7 @@ #include "common/alignment.h" #include "common/settings.h" +#include "video_core/control/channel_state.h" #include "video_core/dirty_flags.h" #include "video_core/engines/kepler_compute.h" #include "video_core/texture_cache/image_view_base.h" @@ -29,12 +30,8 @@ using VideoCore::Surface::SurfaceType; using namespace Common::Literals; template -TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_) - : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, - kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { +TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_) + : runtime{runtime_}, rasterizer{rasterizer_} { // Configure null sampler TSCEntry sampler_descriptor{}; sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); @@ -42,6 +39,13 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); sampler_descriptor.cubemap_anisotropy.Assign(1); + // Setup channels + current_channel_id = UNSET_CHANNEL; + state = nullptr; + maxwell3d = nullptr; + kepler_compute = nullptr; + gpu_memory = nullptr; + // Make sure the first index is reserved for the null resources // This way the null resource becomes a compile time constant void(slot_images.insert(NullImageParams{})); @@ -93,7 +97,7 @@ void TextureCache

::RunGarbageCollector() { const auto copies = FullDownloadCopies(image.info); image.DownloadMemory(map, copies); runtime.Finish(); - SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); + SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); } if (True(image.flags & ImageFlagBits::Tracked)) { UntrackImage(image, image_id); @@ -152,22 +156,23 @@ void TextureCache

::MarkModification(ImageId id) noexcept { template template void TextureCache

::FillGraphicsImageViews(std::span views) { - FillImageViews(graphics_image_table, graphics_image_view_ids, views); + FillImageViews(state->graphics_image_table, state->graphics_image_view_ids, + views); } template void TextureCache

::FillComputeImageViews(std::span views) { - FillImageViews(compute_image_table, compute_image_view_ids, views); + FillImageViews(state->compute_image_table, state->compute_image_view_ids, views); } template typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { - if (index > graphics_sampler_table.Limit()) { + if (index > state->graphics_sampler_table.Limit()) { LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); return &slot_samplers[NULL_SAMPLER_ID]; } - const auto [descriptor, is_new] = graphics_sampler_table.Read(index); - SamplerId& id = graphics_sampler_ids[index]; + const auto [descriptor, is_new] = state->graphics_sampler_table.Read(index); + SamplerId& id = state->graphics_sampler_ids[index]; if (is_new) { id = FindSampler(descriptor); } @@ -176,12 +181,12 @@ typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { template typename P::Sampler* TextureCache

::GetComputeSampler(u32 index) { - if (index > compute_sampler_table.Limit()) { + if (index > state->compute_sampler_table.Limit()) { LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); return &slot_samplers[NULL_SAMPLER_ID]; } - const auto [descriptor, is_new] = compute_sampler_table.Read(index); - SamplerId& id = compute_sampler_ids[index]; + const auto [descriptor, is_new] = state->compute_sampler_table.Read(index); + SamplerId& id = state->compute_sampler_ids[index]; if (is_new) { id = FindSampler(descriptor); } @@ -191,34 +196,34 @@ typename P::Sampler* TextureCache

::GetComputeSampler(u32 index) { template void TextureCache

::SynchronizeGraphicsDescriptors() { using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; - const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; - const u32 tic_limit = maxwell3d.regs.tic.limit; - const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; - if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { - graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + const bool linked_tsc = maxwell3d->regs.sampler_index == SamplerIndex::ViaHeaderIndex; + const u32 tic_limit = maxwell3d->regs.tic.limit; + const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tsc.limit; + if (state->graphics_sampler_table.Synchornize(maxwell3d->regs.tsc.Address(), tsc_limit)) { + state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); } - if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { - graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + if (state->graphics_image_table.Synchornize(maxwell3d->regs.tic.Address(), tic_limit)) { + state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); } } template void TextureCache

::SynchronizeComputeDescriptors() { - const bool linked_tsc = kepler_compute.launch_description.linked_tsc; - const u32 tic_limit = kepler_compute.regs.tic.limit; - const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; - const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); - if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { - compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + const bool linked_tsc = kepler_compute->launch_description.linked_tsc; + const u32 tic_limit = kepler_compute->regs.tic.limit; + const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit; + const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address(); + if (state->compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { + state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); } - if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { - compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + if (state->compute_image_table.Synchornize(kepler_compute->regs.tic.Address(), tic_limit)) { + state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); } } template bool TextureCache

::RescaleRenderTargets(bool is_clear) { - auto& flags = maxwell3d.dirty.flags; + auto& flags = maxwell3d->dirty.flags; u32 scale_rating = 0; bool rescaled = false; std::array tmp_color_images{}; @@ -315,7 +320,7 @@ bool TextureCache

::RescaleRenderTargets(bool is_clear) { template void TextureCache

::UpdateRenderTargets(bool is_clear) { using namespace VideoCommon::Dirty; - auto& flags = maxwell3d.dirty.flags; + auto& flags = maxwell3d->dirty.flags; if (!flags[Dirty::RenderTargets]) { for (size_t index = 0; index < NUM_RT; ++index) { ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; @@ -342,7 +347,7 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); for (size_t index = 0; index < NUM_RT; ++index) { - render_targets.draw_buffers[index] = static_cast(maxwell3d.regs.rt_control.Map(index)); + render_targets.draw_buffers[index] = static_cast(maxwell3d->regs.rt_control.Map(index)); } u32 up_scale = 1; u32 down_shift = 0; @@ -351,8 +356,8 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { down_shift = Settings::values.resolution_info.down_shift; } render_targets.size = Extent2D{ - (maxwell3d.regs.render_area.width * up_scale) >> down_shift, - (maxwell3d.regs.render_area.height * up_scale) >> down_shift, + (maxwell3d->regs.render_area.width * up_scale) >> down_shift, + (maxwell3d->regs.render_area.height * up_scale) >> down_shift, }; flags[Dirty::DepthBiasGlobal] = true; @@ -458,7 +463,7 @@ void TextureCache

::DownloadMemory(VAddr cpu_addr, size_t size) { const auto copies = FullDownloadCopies(image.info); image.DownloadMemory(map, copies); runtime.Finish(); - SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); + SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); } } @@ -655,7 +660,7 @@ void TextureCache

::PopAsyncFlushes() { for (const ImageId image_id : download_ids) { const ImageBase& image = slot_images[image_id]; const auto copies = FullDownloadCopies(image.info); - SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); + SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span); download_map.offset += image.unswizzled_size_bytes; download_span = download_span.subspan(image.unswizzled_size_bytes); } @@ -714,26 +719,26 @@ void TextureCache

::UploadImageContents(Image& image, StagingBuffer& staging) const GPUVAddr gpu_addr = image.gpu_addr; if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { - gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); + gpu_memory->ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); const auto uploads = FullUploadSwizzles(image.info); runtime.AccelerateImageUpload(image, staging, uploads); } else if (True(image.flags & ImageFlagBits::Converted)) { std::vector unswizzled_data(image.unswizzled_size_bytes); - auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); + auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, unswizzled_data); ConvertImage(unswizzled_data, image.info, mapped_span, copies); image.UploadMemory(staging, copies); } else { - const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); + const auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, mapped_span); image.UploadMemory(staging, copies); } } template ImageViewId TextureCache

::FindImageView(const TICEntry& config) { - if (!IsValidEntry(gpu_memory, config)) { + if (!IsValidEntry(*gpu_memory, config)) { return NULL_IMAGE_VIEW_ID; } - const auto [pair, is_new] = image_views.try_emplace(config); + const auto [pair, is_new] = state->image_views.try_emplace(config); ImageViewId& image_view_id = pair->second; if (is_new) { image_view_id = CreateImageView(config); @@ -777,9 +782,9 @@ ImageId TextureCache

::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a template ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, RelaxedOptions options) { - std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + std::optional cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); if (!cpu_addr) { - cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); + cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); if (!cpu_addr) { return ImageId{}; } @@ -860,7 +865,7 @@ void TextureCache

::InvalidateScale(Image& image) { image.scale_tick = frame_tick + 1; } const std::span image_view_ids = image.image_view_ids; - auto& dirty = maxwell3d.dirty.flags; + auto& dirty = maxwell3d->dirty.flags; dirty[Dirty::RenderTargets] = true; dirty[Dirty::ZetaBuffer] = true; for (size_t rt = 0; rt < NUM_RT; ++rt) { @@ -881,11 +886,11 @@ void TextureCache

::InvalidateScale(Image& image) { image.image_view_ids.clear(); image.image_view_infos.clear(); if constexpr (ENABLE_VALIDATION) { - std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); - std::ranges::fill(compute_image_view_ids, CORRUPT_ID); + std::ranges::fill(state->graphics_image_view_ids, CORRUPT_ID); + std::ranges::fill(state->compute_image_view_ids, CORRUPT_ID); } - graphics_image_table.Invalidate(); - compute_image_table.Invalidate(); + state->graphics_image_table.Invalidate(); + state->compute_image_table.Invalidate(); has_deleted_images = true; } @@ -929,10 +934,10 @@ bool TextureCache

::ScaleDown(Image& image) { template ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, RelaxedOptions options) { - std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + std::optional cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); if (!cpu_addr) { const auto size = CalculateGuestSizeInBytes(info); - cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); + cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size); if (!cpu_addr) { const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; virtual_invalid_space += Common::AlignUp(size, 32); @@ -1050,7 +1055,7 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); Image& new_image = slot_images[new_image_id]; - if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { + if (!gpu_memory->IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { new_image.flags |= ImageFlagBits::Sparse; } @@ -1192,7 +1197,7 @@ SamplerId TextureCache

::FindSampler(const TSCEntry& config) { if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { return NULL_SAMPLER_ID; } - const auto [pair, is_new] = samplers.try_emplace(config); + const auto [pair, is_new] = state->samplers.try_emplace(config); if (is_new) { pair->second = slot_samplers.insert(runtime, config); } @@ -1201,7 +1206,7 @@ SamplerId TextureCache

::FindSampler(const TSCEntry& config) { template ImageViewId TextureCache

::FindColorBuffer(size_t index, bool is_clear) { - const auto& regs = maxwell3d.regs; + const auto& regs = maxwell3d->regs; if (index >= regs.rt_control.count) { return ImageViewId{}; } @@ -1219,7 +1224,7 @@ ImageViewId TextureCache

::FindColorBuffer(size_t index, bool is_clear) { template ImageViewId TextureCache

::FindDepthBuffer(bool is_clear) { - const auto& regs = maxwell3d.regs; + const auto& regs = maxwell3d->regs; if (!regs.zeta_enable) { return ImageViewId{}; } @@ -1321,8 +1326,8 @@ void TextureCache

::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Fu static constexpr bool BOOL_BREAK = std::is_same_v; boost::container::small_vector images; ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { - const auto it = gpu_page_table.find(page); - if (it == gpu_page_table.end()) { + const auto it = state->gpu_page_table.find(page); + if (it == state->gpu_page_table.end()) { if constexpr (BOOL_BREAK) { return false; } else { @@ -1403,9 +1408,9 @@ template void TextureCache

::ForEachSparseSegment(ImageBase& image, Func&& func) { using FuncReturn = typename std::invoke_result::type; static constexpr bool RETURNS_BOOL = std::is_same_v; - const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); + const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); for (const auto& [gpu_addr, size] : segments) { - std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + std::optional cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); ASSERT(cpu_addr); if constexpr (RETURNS_BOOL) { if (func(gpu_addr, *cpu_addr, size)) { @@ -1449,7 +1454,7 @@ void TextureCache

::RegisterImage(ImageId image_id) { image.lru_index = lru_cache.Insert(image_id, frame_tick); ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); + [this, image_id](u64 page) { state->gpu_page_table[page].push_back(image_id); }); if (False(image.flags & ImageFlagBits::Sparse)) { auto map_id = slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); @@ -1497,8 +1502,9 @@ void TextureCache

::UnregisterImage(ImageId image_id) { } image_ids.erase(vector_it); }; - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { + clear_page_table(page, state->gpu_page_table); + }); if (False(image.flags & ImageFlagBits::Sparse)) { const auto map_id = image.map_view_id; ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { @@ -1631,7 +1637,7 @@ void TextureCache

::DeleteImage(ImageId image_id, bool immediate_delete) { ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); // Mark render targets as dirty - auto& dirty = maxwell3d.dirty.flags; + auto& dirty = maxwell3d->dirty.flags; dirty[Dirty::RenderTargets] = true; dirty[Dirty::ZetaBuffer] = true; for (size_t rt = 0; rt < NUM_RT; ++rt) { @@ -1681,22 +1687,24 @@ void TextureCache

::DeleteImage(ImageId image_id, bool immediate_delete) { if (alloc_images.empty()) { image_allocs_table.erase(alloc_it); } - if constexpr (ENABLE_VALIDATION) { - std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); - std::ranges::fill(compute_image_view_ids, CORRUPT_ID); + for (auto& this_state : channel_storage) { + if constexpr (ENABLE_VALIDATION) { + std::ranges::fill(this_state.graphics_image_view_ids, CORRUPT_ID); + std::ranges::fill(this_state.compute_image_view_ids, CORRUPT_ID); + } + this_state.graphics_image_table.Invalidate(); + this_state.compute_image_table.Invalidate(); } - graphics_image_table.Invalidate(); - compute_image_table.Invalidate(); has_deleted_images = true; } template void TextureCache

::RemoveImageViewReferences(std::span removed_views) { - auto it = image_views.begin(); - while (it != image_views.end()) { + auto it = state->image_views.begin(); + while (it != state->image_views.end()) { const auto found = std::ranges::find(removed_views, it->second); if (found != removed_views.end()) { - it = image_views.erase(it); + it = state->image_views.erase(it); } else { ++it; } @@ -1943,7 +1951,7 @@ bool TextureCache

::IsFullClear(ImageViewId id) { const ImageViewBase& image_view = slot_image_views[id]; const ImageBase& image = slot_images[image_view.image_id]; const Extent3D size = image_view.size; - const auto& regs = maxwell3d.regs; + const auto& regs = maxwell3d->regs; const auto& scissor = regs.scissor_test[0]; if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { // Images with multiple resources can't be cleared in a single call @@ -1958,4 +1966,61 @@ bool TextureCache

::IsFullClear(ImageViewId id) { scissor.max_y >= size.height; } +template +TextureCache

::ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& state) noexcept + : maxwell3d{*state.maxwell_3d}, kepler_compute{*state.kepler_compute}, + gpu_memory{*state.memory_manager}, graphics_image_table{gpu_memory}, + graphics_sampler_table{gpu_memory}, compute_image_table{gpu_memory}, compute_sampler_table{ + gpu_memory} {} + +template +void TextureCache

::CreateChannel(struct Tegra::Control::ChannelState& channel) { + ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0); + auto new_id = [this, &channel]() { + if (!free_channel_ids.empty()) { + auto id = free_channel_ids.front(); + free_channel_ids.pop_front(); + new (&channel_storage[id]) ChannelInfo(channel); + return id; + } + channel_storage.emplace_back(channel); + return channel_storage.size() - 1; + }(); + channel_map.emplace(channel.bind_id, new_id); + if (current_channel_id != UNSET_CHANNEL) { + state = &channel_storage[current_channel_id]; + } +} + +/// Bind a channel for execution. +template +void TextureCache

::BindToChannel(s32 id) { + auto it = channel_map.find(id); + ASSERT(it != channel_map.end() && id >= 0); + current_channel_id = it->second; + state = &channel_storage[current_channel_id]; + maxwell3d = &state->maxwell3d; + kepler_compute = &state->kepler_compute; + gpu_memory = &state->gpu_memory; +} + +/// Erase channel's state. +template +void TextureCache

::EraseChannel(s32 id) { + const auto it = channel_map.find(id); + ASSERT(it != channel_map.end() && id >= 0); + const auto this_id = it->second; + free_channel_ids.push_back(this_id); + channel_map.erase(it); + if (this_id == current_channel_id) { + current_channel_id = UNSET_CHANNEL; + state = nullptr; + maxwell3d = nullptr; + kepler_compute = nullptr; + gpu_memory = nullptr; + } else if (current_channel_id != UNSET_CHANNEL) { + state = &channel_storage[current_channel_id]; + } +} + } // namespace VideoCommon -- cgit v1.2.3