summaryrefslogblamecommitdiffstats
path: root/src/video_core/texture_cache.h
blob: 0415516910c29769a46a361e2f56e1643d8faf42 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586









































































































































































































































































































































































































































































































































































































                                                                                                    
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.

#pragma once

#include <list>
#include <memory>
#include <set>
#include <tuple>
#include <type_traits>
#include <unordered_map>

#include <boost/icl/interval_map.hpp>
#include <boost/range/iterator_range.hpp>

#include "common/assert.h"
#include "common/common_types.h"
#include "core/memory.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/surface.h"

namespace Core {
class System;
}

namespace Tegra::Texture {
struct FullTextureInfo;
}

namespace VideoCore {
class RasterizerInterface;
}

namespace VideoCommon {

class HasheableSurfaceParams {
public:
    std::size_t Hash() const;

    bool operator==(const HasheableSurfaceParams& rhs) const;

protected:
    // Avoid creation outside of a managed environment.
    HasheableSurfaceParams() = default;

    bool is_tiled;
    u32 block_width;
    u32 block_height;
    u32 block_depth;
    u32 tile_width_spacing;
    u32 width;
    u32 height;
    u32 depth;
    u32 pitch;
    u32 unaligned_height;
    u32 num_levels;
    VideoCore::Surface::PixelFormat pixel_format;
    VideoCore::Surface::ComponentType component_type;
    VideoCore::Surface::SurfaceType type;
    VideoCore::Surface::SurfaceTarget target;
};

class SurfaceParams final : public HasheableSurfaceParams {
public:
    /// Creates SurfaceCachedParams from a texture configuration.
    static SurfaceParams CreateForTexture(Core::System& system,
                                          const Tegra::Texture::FullTextureInfo& config);

    /// Creates SurfaceCachedParams for a depth buffer configuration.
    static SurfaceParams CreateForDepthBuffer(
        Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
        u32 block_width, u32 block_height, u32 block_depth,
        Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);

    /// Creates SurfaceCachedParams from a framebuffer configuration.
    static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index);

    /// Creates SurfaceCachedParams from a Fermi2D surface configuration.
    static SurfaceParams CreateForFermiCopySurface(
        const Tegra::Engines::Fermi2D::Regs::Surface& config);

    bool IsTiled() const {
        return is_tiled;
    }

    u32 GetBlockWidth() const {
        return block_width;
    }

    u32 GetTileWidthSpacing() const {
        return tile_width_spacing;
    }

    u32 GetWidth() const {
        return width;
    }

    u32 GetHeight() const {
        return height;
    }

    u32 GetDepth() const {
        return depth;
    }

    u32 GetPitch() const {
        return pitch;
    }

    u32 GetNumLevels() const {
        return num_levels;
    }

    VideoCore::Surface::PixelFormat GetPixelFormat() const {
        return pixel_format;
    }

    VideoCore::Surface::ComponentType GetComponentType() const {
        return component_type;
    }

    VideoCore::Surface::SurfaceTarget GetTarget() const {
        return target;
    }

    VideoCore::Surface::SurfaceType GetType() const {
        return type;
    }

    std::size_t GetGuestSizeInBytes() const {
        return guest_size_in_bytes;
    }

    std::size_t GetHostSizeInBytes() const {
        return host_size_in_bytes;
    }

    u32 GetNumLayers() const {
        return num_layers;
    }

    /// Returns the width of a given mipmap level.
    u32 GetMipWidth(u32 level) const;

    /// Returns the height of a given mipmap level.
    u32 GetMipHeight(u32 level) const;

    /// Returns the depth of a given mipmap level.
    u32 GetMipDepth(u32 level) const;

    /// Returns true if these parameters are from a layered surface.
    bool IsLayered() const;

    /// Returns the block height of a given mipmap level.
    u32 GetMipBlockHeight(u32 level) const;

    /// Returns the block depth of a given mipmap level.
    u32 GetMipBlockDepth(u32 level) const;

    /// Returns the offset in bytes in guest memory of a given mipmap level.
    std::size_t GetGuestMipmapLevelOffset(u32 level) const;

    /// Returns the offset in bytes in host memory (linear) of a given mipmap level.
    std::size_t GetHostMipmapLevelOffset(u32 level) const;

    /// Returns the size of a layer in bytes in guest memory.
    std::size_t GetGuestLayerSize() const;

    /// Returns the size of a layer in bytes in host memory for a given mipmap level.
    std::size_t GetHostLayerSize(u32 level) const;

    /// Returns true if another surface can be familiar with this. This is a loosely defined term
    /// that reflects the possibility of these two surface parameters potentially being part of a
    /// bigger superset.
    bool IsFamiliar(const SurfaceParams& view_params) const;

    /// Returns true if the pixel format is a depth and/or stencil format.
    bool IsPixelFormatZeta() const;

    /// Creates a map that redirects an address difference to a layer and mipmap level.
    std::map<u64, std::pair<u32, u32>> CreateViewOffsetMap() const;

    /// Returns true if the passed surface view parameters is equal or a valid subset of this.
    bool IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const;

private:
    /// Calculates values that can be deduced from HasheableSurfaceParams.
    void CalculateCachedValues();

    /// Returns the size of a given mipmap level.
    std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only,
                                         bool uncompressed) const;

    /// Returns the size of all mipmap levels and aligns as needed.
    std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const;

    /// Returns true if the passed view width and height match the size of this params in a given
    /// mipmap level.
    bool IsDimensionValid(const SurfaceParams& view_params, u32 level) const;

    /// Returns true if the passed view depth match the size of this params in a given mipmap level.
    bool IsDepthValid(const SurfaceParams& view_params, u32 level) const;

    /// Returns true if the passed view layers and mipmap levels are in bounds.
    bool IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const;

    std::size_t guest_size_in_bytes;
    std::size_t host_size_in_bytes;
    u32 num_layers;
};

struct ViewKey {
    std::size_t Hash() const;

    bool operator==(const ViewKey& rhs) const;

    u32 base_layer{};
    u32 num_layers{};
    u32 base_level{};
    u32 num_levels{};
};

} // namespace VideoCommon

namespace std {

template <>
struct hash<VideoCommon::SurfaceParams> {
    std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept {
        return k.Hash();
    }
};

template <>
struct hash<VideoCommon::ViewKey> {
    std::size_t operator()(const VideoCommon::ViewKey& k) const noexcept {
        return k.Hash();
    }
};

} // namespace std

namespace VideoCommon {

template <typename TView, typename TExecutionContext>
class SurfaceBase {
    static_assert(std::is_trivially_copyable_v<TExecutionContext>);

public:
    virtual void LoadBuffer() = 0;

    virtual TExecutionContext FlushBuffer(TExecutionContext exctx) = 0;

    virtual TExecutionContext UploadTexture(TExecutionContext exctx) = 0;

    TView* TryGetView(VAddr view_addr, const SurfaceParams& view_params) {
        if (view_addr < cpu_addr || !params.IsFamiliar(view_params)) {
            // It can't be a view if it's in a prior address.
            return {};
        }

        const auto relative_offset{static_cast<u64>(view_addr - cpu_addr)};
        const auto it{view_offset_map.find(relative_offset)};
        if (it == view_offset_map.end()) {
            // Couldn't find an aligned view.
            return {};
        }
        const auto [layer, level] = it->second;

        if (!params.IsViewValid(view_params, layer, level)) {
            return {};
        }

        return GetView(layer, view_params.GetNumLayers(), level, view_params.GetNumLevels());
    }

    VAddr GetCpuAddr() const {
        ASSERT(is_registered);
        return cpu_addr;
    }

    u8* GetHostPtr() const {
        ASSERT(is_registered);
        return host_ptr;
    }

    CacheAddr GetCacheAddr() const {
        ASSERT(is_registered);
        return cache_addr;
    }

    std::size_t GetSizeInBytes() const {
        return params.GetGuestSizeInBytes();
    }

    void MarkAsModified(bool is_modified_) {
        is_modified = is_modified_;
    }

    const SurfaceParams& GetSurfaceParams() const {
        return params;
    }

    TView* GetView(VAddr view_addr, const SurfaceParams& view_params) {
        TView* view{TryGetView(view_addr, view_params)};
        ASSERT(view != nullptr);
        return view;
    }

    void Register(VAddr cpu_addr_, u8* host_ptr_) {
        ASSERT(!is_registered);
        is_registered = true;
        cpu_addr = cpu_addr_;
        host_ptr = host_ptr_;
        cache_addr = ToCacheAddr(host_ptr_);
    }

    void Register(VAddr cpu_addr_) {
        Register(cpu_addr_, Memory::GetPointer(cpu_addr_));
    }

    void Unregister() {
        ASSERT(is_registered);
        is_registered = false;
    }

    bool IsRegistered() const {
        return is_registered;
    }

protected:
    explicit SurfaceBase(const SurfaceParams& params)
        : params{params}, view_offset_map{params.CreateViewOffsetMap()} {}

    ~SurfaceBase() = default;

    virtual std::unique_ptr<TView> CreateView(const ViewKey& view_key) = 0;

    bool IsModified() const {
        return is_modified;
    }

    const SurfaceParams params;

private:
    TView* GetView(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels) {
        const ViewKey key{base_layer, num_layers, base_level, num_levels};
        const auto [entry, is_cache_miss] = views.try_emplace(key);
        auto& view{entry->second};
        if (is_cache_miss) {
            view = CreateView(key);
        }
        return view.get();
    }

    const std::map<u64, std::pair<u32, u32>> view_offset_map;

    VAddr cpu_addr{};
    u8* host_ptr{};
    CacheAddr cache_addr{};
    bool is_modified{};
    bool is_registered{};
    std::unordered_map<ViewKey, std::unique_ptr<TView>> views;
};

template <typename TSurface, typename TView, typename TExecutionContext>
class TextureCache {
    static_assert(std::is_trivially_copyable_v<TExecutionContext>);
    using ResultType = std::tuple<TView*, TExecutionContext>;
    using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface*>>;
    using IntervalType = typename IntervalMap::interval_type;

public:
    void InvalidateRegion(CacheAddr addr, std::size_t size) {
        for (TSurface* surface : GetSurfacesInRegion(addr, size)) {
            if (!surface->IsRegistered()) {
                // Skip duplicates
                continue;
            }
            Unregister(surface);
        }
    }

    ResultType GetTextureSurface(TExecutionContext exctx,
                                 const Tegra::Texture::FullTextureInfo& config) {
        auto& memory_manager{system.GPU().MemoryManager()};
        const auto cpu_addr{memory_manager.GpuToCpuAddress(config.tic.Address())};
        if (!cpu_addr) {
            return {{}, exctx};
        }
        const auto params{SurfaceParams::CreateForTexture(system, config)};
        return GetSurfaceView(exctx, *cpu_addr, params, true);
    }

    ResultType GetDepthBufferSurface(TExecutionContext exctx, bool preserve_contents) {
        const auto& regs{system.GPU().Maxwell3D().regs};
        if (!regs.zeta.Address() || !regs.zeta_enable) {
            return {{}, exctx};
        }

        auto& memory_manager{system.GPU().MemoryManager()};
        const auto cpu_addr{memory_manager.GpuToCpuAddress(regs.zeta.Address())};
        if (!cpu_addr) {
            return {{}, exctx};
        }

        const auto depth_params{SurfaceParams::CreateForDepthBuffer(
            system, regs.zeta_width, regs.zeta_height, regs.zeta.format,
            regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
            regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
        return GetSurfaceView(exctx, *cpu_addr, depth_params, preserve_contents);
    }

    ResultType GetColorBufferSurface(TExecutionContext exctx, std::size_t index,
                                     bool preserve_contents) {
        ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);

        const auto& regs{system.GPU().Maxwell3D().regs};
        if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
            regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
            return {{}, exctx};
        }

        auto& memory_manager{system.GPU().MemoryManager()};
        const auto& config{system.GPU().Maxwell3D().regs.rt[index]};
        const auto cpu_addr{memory_manager.GpuToCpuAddress(
            config.Address() + config.base_layer * config.layer_stride * sizeof(u32))};
        if (!cpu_addr) {
            return {{}, exctx};
        }

        return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
                              preserve_contents);
    }

    ResultType GetFermiSurface(TExecutionContext exctx,
                               const Tegra::Engines::Fermi2D::Regs::Surface& config) {
        const auto cpu_addr{system.GPU().MemoryManager().GpuToCpuAddress(config.Address())};
        ASSERT(cpu_addr);
        return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFermiCopySurface(config),
                              true);
    }

    TSurface* TryFindFramebufferSurface(const u8* host_ptr) const {
        const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))};
        return it != registered_surfaces.end() ? *it->second.begin() : nullptr;
    }

protected:
    TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
        : system{system}, rasterizer{rasterizer} {}

    ~TextureCache() = default;

    virtual ResultType TryFastGetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr,
                                             const SurfaceParams& params, bool preserve_contents,
                                             const std::vector<TSurface*>& overlaps) = 0;

    virtual std::unique_ptr<TSurface> CreateSurface(const SurfaceParams& params) = 0;

    void Register(TSurface* surface, VAddr cpu_addr, u8* host_ptr) {
        surface->Register(cpu_addr, host_ptr);
        registered_surfaces.add({GetSurfaceInterval(surface), {surface}});
        rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), 1);
    }

    void Unregister(TSurface* surface) {
        registered_surfaces.subtract({GetSurfaceInterval(surface), {surface}});
        rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), -1);
        surface->Unregister();
    }

    TSurface* GetUncachedSurface(const SurfaceParams& params) {
        if (TSurface* surface = TryGetReservedSurface(params); surface)
            return surface;
        // No reserved surface available, create a new one and reserve it
        auto new_surface{CreateSurface(params)};
        TSurface* surface{new_surface.get()};
        ReserveSurface(params, std::move(new_surface));
        return surface;
    }

    Core::System& system;

private:
    ResultType GetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, const SurfaceParams& params,
                              bool preserve_contents) {
        const auto host_ptr{Memory::GetPointer(cpu_addr)};
        const auto cache_addr{ToCacheAddr(host_ptr)};
        const auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())};
        if (overlaps.empty()) {
            return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents);
        }

        if (overlaps.size() == 1) {
            if (TView* view = overlaps[0]->TryGetView(cpu_addr, params); view)
                return {view, exctx};
        }

        TView* fast_view;
        std::tie(fast_view, exctx) =
            TryFastGetSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents, overlaps);

        for (TSurface* surface : overlaps) {
            if (!fast_view) {
                // Flush even when we don't care about the contents, to preserve memory not written
                // by the new surface.
                exctx = surface->FlushBuffer(exctx);
            }
            Unregister(surface);
        }

        if (fast_view) {
            return {fast_view, exctx};
        }

        return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents);
    }

    ResultType LoadSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr,
                               const SurfaceParams& params, bool preserve_contents) {
        TSurface* new_surface{GetUncachedSurface(params)};
        Register(new_surface, cpu_addr, host_ptr);
        if (preserve_contents) {
            exctx = LoadSurface(exctx, new_surface);
        }
        return {new_surface->GetView(cpu_addr, params), exctx};
    }

    TExecutionContext LoadSurface(TExecutionContext exctx, TSurface* surface) {
        surface->LoadBuffer();
        exctx = surface->UploadTexture(exctx);
        surface->MarkAsModified(false);
        return exctx;
    }

    std::vector<TSurface*> GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const {
        if (size == 0) {
            return {};
        }
        const IntervalType interval{cache_addr, cache_addr + size};

        std::vector<TSurface*> surfaces;
        for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) {
            surfaces.push_back(*pair.second.begin());
        }
        return surfaces;
    }

    void ReserveSurface(const SurfaceParams& params, std::unique_ptr<TSurface> surface) {
        surface_reserve[params].push_back(std::move(surface));
    }

    TSurface* TryGetReservedSurface(const SurfaceParams& params) {
        auto search{surface_reserve.find(params)};
        if (search == surface_reserve.end()) {
            return {};
        }
        for (auto& surface : search->second) {
            if (!surface->IsRegistered()) {
                return surface.get();
            }
        }
        return {};
    }

    IntervalType GetSurfaceInterval(TSurface* surface) const {
        return IntervalType::right_open(surface->GetCacheAddr(),
                                        surface->GetCacheAddr() + surface->GetSizeInBytes());
    }

    VideoCore::RasterizerInterface& rasterizer;

    IntervalMap registered_surfaces;

    /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have
    /// previously been used. This is to prevent surfaces from being constantly created and
    /// destroyed when used with different surface parameters.
    std::unordered_map<SurfaceParams, std::list<std::unique_ptr<TSurface>>> surface_reserve;
};

} // namespace VideoCommon