summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/maxwell_3d.h9
-rw-r--r--src/video_core/gpu.cpp65
-rw-r--r--src/video_core/gpu.h7
-rw-r--r--src/video_core/morton.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp36
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_device.h7
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp17
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h10
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.cpp11
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp52
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h4
-rw-r--r--src/video_core/shader/decode/texture.cpp2
-rw-r--r--src/video_core/surface.cpp2
-rw-r--r--src/video_core/surface.h72
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp3
-rw-r--r--src/video_core/texture_cache/surface_base.cpp4
-rw-r--r--src/video_core/texture_cache/surface_params.cpp47
-rw-r--r--src/video_core/texture_cache/surface_params.h5
-rw-r--r--src/video_core/texture_cache/texture_cache.h24
21 files changed, 213 insertions, 177 deletions
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 26939be3f..6ea7cc6a5 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -542,7 +542,7 @@ public:
BitField<12, 1, InvMemoryLayout> type;
} memory_layout;
union {
- BitField<0, 16, u32> array_mode;
+ BitField<0, 16, u32> layers;
BitField<16, 1, u32> volume;
};
u32 layer_stride;
@@ -800,8 +800,12 @@ public:
u32 zeta_width;
u32 zeta_height;
+ union {
+ BitField<0, 16, u32> zeta_layers;
+ BitField<16, 1, u32> zeta_volume;
+ };
- INSERT_UNION_PADDING_WORDS(0x27);
+ INSERT_UNION_PADDING_WORDS(0x26);
u32 depth_test_enable;
@@ -1507,6 +1511,7 @@ ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
ASSERT_REG_POSITION(rt_control, 0x487);
ASSERT_REG_POSITION(zeta_width, 0x48a);
ASSERT_REG_POSITION(zeta_height, 0x48b);
+ASSERT_REG_POSITION(zeta_layers, 0x48c);
ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 7d7137109..e8f763ce9 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -140,71 +140,6 @@ void GPU::FlushCommands() {
renderer.Rasterizer().FlushCommands();
}
-u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
- ASSERT(format != RenderTargetFormat::NONE);
-
- switch (format) {
- case RenderTargetFormat::RGBA32_FLOAT:
- case RenderTargetFormat::RGBA32_UINT:
- return 16;
- case RenderTargetFormat::RGBA16_UINT:
- case RenderTargetFormat::RGBA16_UNORM:
- case RenderTargetFormat::RGBA16_FLOAT:
- case RenderTargetFormat::RGBX16_FLOAT:
- case RenderTargetFormat::RG32_FLOAT:
- case RenderTargetFormat::RG32_UINT:
- return 8;
- case RenderTargetFormat::RGBA8_UNORM:
- case RenderTargetFormat::RGBA8_SNORM:
- case RenderTargetFormat::RGBA8_SRGB:
- case RenderTargetFormat::RGBA8_UINT:
- case RenderTargetFormat::RGB10_A2_UNORM:
- case RenderTargetFormat::BGRA8_UNORM:
- case RenderTargetFormat::BGRA8_SRGB:
- case RenderTargetFormat::RG16_UNORM:
- case RenderTargetFormat::RG16_SNORM:
- case RenderTargetFormat::RG16_UINT:
- case RenderTargetFormat::RG16_SINT:
- case RenderTargetFormat::RG16_FLOAT:
- case RenderTargetFormat::R32_FLOAT:
- case RenderTargetFormat::R11G11B10_FLOAT:
- case RenderTargetFormat::R32_UINT:
- return 4;
- case RenderTargetFormat::R16_UNORM:
- case RenderTargetFormat::R16_SNORM:
- case RenderTargetFormat::R16_UINT:
- case RenderTargetFormat::R16_SINT:
- case RenderTargetFormat::R16_FLOAT:
- case RenderTargetFormat::RG8_UNORM:
- case RenderTargetFormat::RG8_SNORM:
- return 2;
- case RenderTargetFormat::R8_UNORM:
- case RenderTargetFormat::R8_UINT:
- return 1;
- default:
- UNIMPLEMENTED_MSG("Unimplemented render target format {}", static_cast<u32>(format));
- return 1;
- }
-}
-
-u32 DepthFormatBytesPerPixel(DepthFormat format) {
- switch (format) {
- case DepthFormat::Z32_S8_X24_FLOAT:
- return 8;
- case DepthFormat::Z32_FLOAT:
- case DepthFormat::S8_Z24_UNORM:
- case DepthFormat::Z24_X8_UNORM:
- case DepthFormat::Z24_S8_UNORM:
- case DepthFormat::Z24_C8_UNORM:
- return 4;
- case DepthFormat::Z16_UNORM:
- return 2;
- default:
- UNIMPLEMENTED_MSG("Unimplemented Depth format {}", static_cast<u32>(format));
- return 1;
- }
-}
-
// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
// So the values you see in docs might be multiplied by 4.
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 07727210c..ba8c9d665 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -57,6 +57,7 @@ enum class RenderTargetFormat : u32 {
RG16_UINT = 0xDD,
RG16_FLOAT = 0xDE,
R11G11B10_FLOAT = 0xE0,
+ R32_SINT = 0xE3,
R32_UINT = 0xE4,
R32_FLOAT = 0xE5,
B5G6R5_UNORM = 0xE8,
@@ -82,12 +83,6 @@ enum class DepthFormat : u32 {
Z32_S8_X24_FLOAT = 0x19,
};
-/// Returns the number of bytes per pixel of each rendertarget format.
-u32 RenderTargetBytesPerPixel(RenderTargetFormat format);
-
-/// Returns the number of bytes per pixel of each depth format.
-u32 DepthFormatBytesPerPixel(DepthFormat format);
-
struct CommandListHeader;
class DebugContext;
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index 2f2fe6859..f2c83266e 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -85,6 +85,7 @@ static constexpr ConversionArray morton_to_linear_fns = {
MortonCopy<true, PixelFormat::RG32UI>,
MortonCopy<true, PixelFormat::RGBX16F>,
MortonCopy<true, PixelFormat::R32UI>,
+ MortonCopy<true, PixelFormat::R32I>,
MortonCopy<true, PixelFormat::ASTC_2D_8X8>,
MortonCopy<true, PixelFormat::ASTC_2D_8X5>,
MortonCopy<true, PixelFormat::ASTC_2D_5X4>,
@@ -166,6 +167,7 @@ static constexpr ConversionArray linear_to_morton_fns = {
MortonCopy<false, PixelFormat::RG32UI>,
MortonCopy<false, PixelFormat::RGBX16F>,
MortonCopy<false, PixelFormat::R32UI>,
+ MortonCopy<false, PixelFormat::R32I>,
nullptr,
nullptr,
nullptr,
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index d4b81cd87..cf934b0d8 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -87,6 +87,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI
{GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI
+ {GL_R32I, GL_RED_INTEGER, GL_INT, false}, // R32I
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4
@@ -260,6 +261,13 @@ CachedSurface::~CachedSurface() = default;
void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
MICROPROFILE_SCOPE(OpenGL_Texture_Download);
+ if (params.IsBuffer()) {
+ glGetNamedBufferSubData(texture_buffer.handle, 0,
+ static_cast<GLsizeiptr>(params.GetHostSizeInBytes()),
+ staging_buffer.data());
+ return;
+ }
+
SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); });
for (u32 level = 0; level < params.emulated_levels; ++level) {
@@ -398,24 +406,36 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p
CachedSurfaceView::~CachedSurfaceView() = default;
void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
- ASSERT(params.num_layers == 1 && params.num_levels == 1);
+ ASSERT(params.num_levels == 1);
- const auto& owner_params = surface.GetSurfaceParams();
+ const GLuint texture = surface.GetTexture();
+ if (params.num_layers > 1) {
+ // Layered framebuffer attachments
+ UNIMPLEMENTED_IF(params.base_layer != 0);
+
+ switch (params.target) {
+ case SurfaceTarget::Texture2DArray:
+ glFramebufferTexture(target, attachment, texture, params.base_level);
+ break;
+ default:
+ UNIMPLEMENTED();
+ }
+ return;
+ }
- switch (owner_params.target) {
+ const GLenum view_target = surface.GetTarget();
+ switch (surface.GetSurfaceParams().target) {
case SurfaceTarget::Texture1D:
- glFramebufferTexture1D(target, attachment, surface.GetTarget(), surface.GetTexture(),
- params.base_level);
+ glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level);
break;
case SurfaceTarget::Texture2D:
- glFramebufferTexture2D(target, attachment, surface.GetTarget(), surface.GetTexture(),
- params.base_level);
+ glFramebufferTexture2D(target, attachment, view_target, texture, params.base_level);
break;
case SurfaceTarget::Texture1DArray:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubemap:
case SurfaceTarget::TextureCubeArray:
- glFramebufferTextureLayer(target, attachment, surface.GetTexture(), params.base_level,
+ glFramebufferTextureLayer(target, attachment, texture, params.base_level,
params.base_layer);
break;
default:
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 331808113..ef66dd141 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -159,12 +159,13 @@ struct FormatTuple {
{vk::Format::eR32G32Uint, Attachable | Storage}, // RG32UI
{vk::Format::eUndefined, {}}, // RGBX16F
{vk::Format::eR32Uint, Attachable | Storage}, // R32UI
+ {vk::Format::eR32Sint, Attachable | Storage}, // R32I
{vk::Format::eAstc8x8UnormBlock, {}}, // ASTC_2D_8X8
{vk::Format::eUndefined, {}}, // ASTC_2D_8X5
{vk::Format::eUndefined, {}}, // ASTC_2D_5X4
{vk::Format::eUndefined, {}}, // BGRA8_SRGB
{vk::Format::eBc1RgbaSrgbBlock, {}}, // DXT1_SRGB
- {vk::Format::eUndefined, {}}, // DXT23_SRGB
+ {vk::Format::eBc2SrgbBlock, {}}, // DXT23_SRGB
{vk::Format::eBc3SrgbBlock, {}}, // DXT45_SRGB
{vk::Format::eBc7SrgbBlock, {}}, // BC7U_SRGB
{vk::Format::eR4G4B4A4UnormPack16, Attachable}, // R4G4B4A4U
@@ -363,6 +364,8 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr
return vk::Format::eR8G8B8A8Uint;
case Maxwell::VertexAttribute::Size::Size_32:
return vk::Format::eR32Uint;
+ case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
+ return vk::Format::eR32G32B32A32Uint;
default:
break;
}
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 588a6835f..886bde3b9 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -107,6 +107,8 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
features.occlusionQueryPrecise = true;
features.fragmentStoresAndAtomics = true;
features.shaderImageGatherExtended = true;
+ features.shaderStorageImageReadWithoutFormat =
+ is_shader_storage_img_read_without_format_supported;
features.shaderStorageImageWriteWithoutFormat = true;
features.textureCompressionASTC_LDR = is_optimal_astc_supported;
@@ -465,6 +467,8 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
const auto supported_features{physical.getFeatures(dldi)};
+ is_shader_storage_img_read_without_format_supported =
+ supported_features.shaderStorageImageReadWithoutFormat;
is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
}
@@ -519,6 +523,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti
vk::Format::eB10G11R11UfloatPack32,
vk::Format::eR32Sfloat,
vk::Format::eR32Uint,
+ vk::Format::eR32Sint,
vk::Format::eR16Sfloat,
vk::Format::eR16G16B16A16Sfloat,
vk::Format::eB8G8R8A8Unorm,
@@ -538,6 +543,7 @@ std::unordered_map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperti
vk::Format::eBc6HUfloatBlock,
vk::Format::eBc6HSfloatBlock,
vk::Format::eBc1RgbaSrgbBlock,
+ vk::Format::eBc2SrgbBlock,
vk::Format::eBc3SrgbBlock,
vk::Format::eBc7SrgbBlock,
vk::Format::eAstc4x4SrgbBlock,
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index 72603f9f6..2c27ad730 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -122,6 +122,11 @@ public:
return properties.limits.maxPushConstantsSize;
}
+ /// Returns true if Shader storage Image Read Without Format supported.
+ bool IsShaderStorageImageReadWithoutFormatSupported() const {
+ return is_shader_storage_img_read_without_format_supported;
+ }
+
/// Returns true if ASTC is natively supported.
bool IsOptimalAstcSupported() const {
return is_optimal_astc_supported;
@@ -227,6 +232,8 @@ private:
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints.
+ bool is_shader_storage_img_read_without_format_supported{}; ///< Support for shader storage
+ ///< image read without format
// Telemetry parameters
std::string vendor_name; ///< Device's driver name.
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 31c078f6a..3bf86da87 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -611,33 +611,34 @@ bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachmen
std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffers(
vk::RenderPass renderpass) {
FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(),
- std::numeric_limits<u32>::max()};
+ std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()};
- const auto MarkAsModifiedAndPush = [&](const View& view) {
- if (view == nullptr) {
+ const auto try_push = [&](const View& view) {
+ if (!view) {
return false;
}
key.views.push_back(view->GetHandle());
key.width = std::min(key.width, view->GetWidth());
key.height = std::min(key.height, view->GetHeight());
+ key.layers = std::min(key.layers, view->GetNumLayers());
return true;
};
for (std::size_t index = 0; index < std::size(color_attachments); ++index) {
- if (MarkAsModifiedAndPush(color_attachments[index])) {
+ if (try_push(color_attachments[index])) {
texture_cache.MarkColorBufferInUse(index);
}
}
- if (MarkAsModifiedAndPush(zeta_attachment)) {
+ if (try_push(zeta_attachment)) {
texture_cache.MarkDepthBufferInUse();
}
const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key);
auto& framebuffer = fbentry->second;
if (is_cache_miss) {
- const vk::FramebufferCreateInfo framebuffer_ci({}, key.renderpass,
- static_cast<u32>(key.views.size()),
- key.views.data(), key.width, key.height, 1);
+ const vk::FramebufferCreateInfo framebuffer_ci(
+ {}, key.renderpass, static_cast<u32>(key.views.size()), key.views.data(), key.width,
+ key.height, key.layers);
const auto dev = device.GetLogical();
const auto& dld = device.GetDispatchLoader();
framebuffer = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld);
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 138903d60..4dc8af6e8 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -56,6 +56,7 @@ struct FramebufferCacheKey {
vk::RenderPass renderpass{};
u32 width = 0;
u32 height = 0;
+ u32 layers = 0;
ImageViewsPack views;
std::size_t Hash() const noexcept {
@@ -66,12 +67,17 @@ struct FramebufferCacheKey {
}
boost::hash_combine(hash, width);
boost::hash_combine(hash, height);
+ boost::hash_combine(hash, layers);
return hash;
}
bool operator==(const FramebufferCacheKey& rhs) const noexcept {
- return std::tie(renderpass, views, width, height) ==
- std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height);
+ return std::tie(renderpass, views, width, height, layers) ==
+ std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height, rhs.layers);
+ }
+
+ bool operator!=(const FramebufferCacheKey& rhs) const noexcept {
+ return !operator==(rhs);
}
};
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
index 0a8ec8398..204b7c39c 100644
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -23,7 +23,14 @@ static std::optional<vk::BorderColor> TryConvertBorderColor(std::array<float, 4>
} else if (color == std::array<float, 4>{1, 1, 1, 1}) {
return vk::BorderColor::eFloatOpaqueWhite;
} else {
- return {};
+ if (color[0] + color[1] + color[2] > 1.35f) {
+ // If color elements are brighter than roughly 0.5 average, use white border
+ return vk::BorderColor::eFloatOpaqueWhite;
+ }
+ if (color[3] > 0.5f) {
+ return vk::BorderColor::eFloatOpaqueBlack;
+ }
+ return vk::BorderColor::eFloatTransparentBlack;
}
}
@@ -37,8 +44,6 @@ UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc)
const auto border_color{tsc.GetBorderColor()};
const auto vk_border_color{TryConvertBorderColor(border_color)};
- UNIMPLEMENTED_IF_MSG(!vk_border_color, "Unimplemented border color {} {} {} {}",
- border_color[0], border_color[1], border_color[2], border_color[3]);
constexpr bool unnormalized_coords{false};
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index f64f5da28..2da622d15 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -86,6 +86,7 @@ struct AttributeType {
struct VertexIndices {
std::optional<u32> position;
+ std::optional<u32> layer;
std::optional<u32> viewport;
std::optional<u32> point_size;
std::optional<u32> clip_distances;
@@ -284,14 +285,20 @@ public:
AddExtension("SPV_KHR_variable_pointers");
AddExtension("SPV_KHR_shader_draw_parameters");
- if (ir.UsesViewportIndex()) {
- AddCapability(spv::Capability::MultiViewport);
- if (device.IsExtShaderViewportIndexLayerSupported()) {
+ if (ir.UsesLayer() || ir.UsesViewportIndex()) {
+ if (ir.UsesViewportIndex()) {
+ AddCapability(spv::Capability::MultiViewport);
+ }
+ if (stage != ShaderType::Geometry && device.IsExtShaderViewportIndexLayerSupported()) {
AddExtension("SPV_EXT_shader_viewport_index_layer");
AddCapability(spv::Capability::ShaderViewportIndexLayerEXT);
}
}
+ if (device.IsShaderStorageImageReadWithoutFormatSupported()) {
+ AddCapability(spv::Capability::StorageImageReadWithoutFormat);
+ }
+
if (device.IsFloat16Supported()) {
AddCapability(spv::Capability::Float16);
}
@@ -924,13 +931,22 @@ private:
VertexIndices indices;
indices.position = AddBuiltIn(t_float4, spv::BuiltIn::Position, "position");
+ if (ir.UsesLayer()) {
+ if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) {
+ indices.layer = AddBuiltIn(t_int, spv::BuiltIn::Layer, "layer");
+ } else {
+ LOG_ERROR(
+ Render_Vulkan,
+ "Shader requires Layer but it's not supported on this stage with this device.");
+ }
+ }
+
if (ir.UsesViewportIndex()) {
if (stage != ShaderType::Vertex || device.IsExtShaderViewportIndexLayerSupported()) {
indices.viewport = AddBuiltIn(t_int, spv::BuiltIn::ViewportIndex, "viewport_index");
} else {
- LOG_ERROR(Render_Vulkan,
- "Shader requires ViewportIndex but it's not supported on this "
- "stage with this device.");
+ LOG_ERROR(Render_Vulkan, "Shader requires ViewportIndex but it's not supported on "
+ "this stage with this device.");
}
}
@@ -1292,6 +1308,13 @@ private:
}
case Attribute::Index::LayerViewportPointSize:
switch (element) {
+ case 1: {
+ if (!out_indices.layer) {
+ return {};
+ }
+ const u32 index = out_indices.layer.value();
+ return {AccessElement(t_out_int, out_vertex, index), Type::Int};
+ }
case 2: {
if (!out_indices.viewport) {
return {};
@@ -1362,6 +1385,11 @@ private:
UNIMPLEMENTED();
}
+ if (!target.id) {
+ // On failure we return a nullptr target.id, skip these stores.
+ return {};
+ }
+
OpStore(target.id, As(Visit(src), target.type));
return {};
}
@@ -1755,8 +1783,16 @@ private:
}
Expression ImageLoad(Operation operation) {
- UNIMPLEMENTED();
- return {};
+ if (!device.IsShaderStorageImageReadWithoutFormatSupported()) {
+ return {v_float_zero, Type::Float};
+ }
+
+ const auto& meta{std::get<MetaImage>(operation.GetMeta())};
+
+ const Id coords = GetCoordinates(operation, Type::Int);
+ const Id texel = OpImageRead(t_uint4, GetImage(operation), coords);
+
+ return {OpCompositeExtract(t_uint, texel, meta.element), Type::Uint};
}
Expression ImageStore(Operation operation) {
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index d3edbe80c..22e3d34de 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -151,6 +151,10 @@ public:
return params.GetMipHeight(base_level);
}
+ u32 GetNumLayers() const {
+ return num_layers;
+ }
+
bool IsBufferView() const {
return buffer_view;
}
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 542636430..bee7d8cad 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -527,7 +527,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
const bool is_bindless = bindless_reg.has_value();
UNIMPLEMENTED_IF(texture_type == TextureType::TextureCube && is_array && is_shadow);
- ASSERT_MSG(texture_type != TextureType::Texture3D || is_array || is_shadow,
+ ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow,
"Illegal texture type");
const SamplerInfo info{texture_type, is_array, is_shadow, false};
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 1655ccf16..9707c353d 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -155,6 +155,8 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
return PixelFormat::R16I;
case Tegra::RenderTargetFormat::R32_FLOAT:
return PixelFormat::R32F;
+ case Tegra::RenderTargetFormat::R32_SINT:
+ return PixelFormat::R32I;
case Tegra::RenderTargetFormat::R32_UINT:
return PixelFormat::R32UI;
case Tegra::RenderTargetFormat::RG32_UINT:
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 0d17a93ed..d88109e5a 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -59,47 +59,48 @@ enum class PixelFormat {
RG32UI = 41,
RGBX16F = 42,
R32UI = 43,
- ASTC_2D_8X8 = 44,
- ASTC_2D_8X5 = 45,
- ASTC_2D_5X4 = 46,
- BGRA8_SRGB = 47,
- DXT1_SRGB = 48,
- DXT23_SRGB = 49,
- DXT45_SRGB = 50,
- BC7U_SRGB = 51,
- R4G4B4A4U = 52,
- ASTC_2D_4X4_SRGB = 53,
- ASTC_2D_8X8_SRGB = 54,
- ASTC_2D_8X5_SRGB = 55,
- ASTC_2D_5X4_SRGB = 56,
- ASTC_2D_5X5 = 57,
- ASTC_2D_5X5_SRGB = 58,
- ASTC_2D_10X8 = 59,
- ASTC_2D_10X8_SRGB = 60,
- ASTC_2D_6X6 = 61,
- ASTC_2D_6X6_SRGB = 62,
- ASTC_2D_10X10 = 63,
- ASTC_2D_10X10_SRGB = 64,
- ASTC_2D_12X12 = 65,
- ASTC_2D_12X12_SRGB = 66,
- ASTC_2D_8X6 = 67,
- ASTC_2D_8X6_SRGB = 68,
- ASTC_2D_6X5 = 69,
- ASTC_2D_6X5_SRGB = 70,
- E5B9G9R9F = 71,
+ R32I = 44,
+ ASTC_2D_8X8 = 45,
+ ASTC_2D_8X5 = 46,
+ ASTC_2D_5X4 = 47,
+ BGRA8_SRGB = 48,
+ DXT1_SRGB = 49,
+ DXT23_SRGB = 50,
+ DXT45_SRGB = 51,
+ BC7U_SRGB = 52,
+ R4G4B4A4U = 53,
+ ASTC_2D_4X4_SRGB = 54,
+ ASTC_2D_8X8_SRGB = 55,
+ ASTC_2D_8X5_SRGB = 56,
+ ASTC_2D_5X4_SRGB = 57,
+ ASTC_2D_5X5 = 58,
+ ASTC_2D_5X5_SRGB = 59,
+ ASTC_2D_10X8 = 60,
+ ASTC_2D_10X8_SRGB = 61,
+ ASTC_2D_6X6 = 62,
+ ASTC_2D_6X6_SRGB = 63,
+ ASTC_2D_10X10 = 64,
+ ASTC_2D_10X10_SRGB = 65,
+ ASTC_2D_12X12 = 66,
+ ASTC_2D_12X12_SRGB = 67,
+ ASTC_2D_8X6 = 68,
+ ASTC_2D_8X6_SRGB = 69,
+ ASTC_2D_6X5 = 70,
+ ASTC_2D_6X5_SRGB = 71,
+ E5B9G9R9F = 72,
MaxColorFormat,
// Depth formats
- Z32F = 72,
- Z16 = 73,
+ Z32F = 73,
+ Z16 = 74,
MaxDepthFormat,
// DepthStencil formats
- Z24S8 = 74,
- S8Z24 = 75,
- Z32FS8 = 76,
+ Z24S8 = 75,
+ S8Z24 = 76,
+ Z32FS8 = 77,
MaxDepthStencilFormat,
@@ -171,6 +172,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
0, // RG32UI
0, // RGBX16F
0, // R32UI
+ 0, // R32I
2, // ASTC_2D_8X8
2, // ASTC_2D_8X5
2, // ASTC_2D_5X4
@@ -267,6 +269,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
1, // RG32UI
1, // RGBX16F
1, // R32UI
+ 1, // R32I
8, // ASTC_2D_8X8
8, // ASTC_2D_8X5
5, // ASTC_2D_5X4
@@ -355,6 +358,7 @@ constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
1, // RG32UI
1, // RGBX16F
1, // R32UI
+ 1, // R32I
8, // ASTC_2D_8X8
5, // ASTC_2D_8X5
4, // ASTC_2D_5X4
@@ -443,6 +447,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
64, // RG32UI
64, // RGBX16F
32, // R32UI
+ 32, // R32I
128, // ASTC_2D_8X8
128, // ASTC_2D_8X5
128, // ASTC_2D_5X4
@@ -546,6 +551,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table
SurfaceCompression::None, // RG32UI
SurfaceCompression::None, // RGBX16F
SurfaceCompression::None, // R32UI
+ SurfaceCompression::None, // R32I
SurfaceCompression::Converted, // ASTC_2D_8X8
SurfaceCompression::Converted, // ASTC_2D_8X5
SurfaceCompression::Converted, // ASTC_2D_5X4
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 81fb9f633..cc3ad8417 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -41,7 +41,7 @@ struct Table {
ComponentType alpha_component;
bool is_srgb;
};
-constexpr std::array<Table, 74> DefinitionTable = {{
+constexpr std::array<Table, 75> DefinitionTable = {{
{TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U},
{TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S},
{TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI},
@@ -89,6 +89,7 @@ constexpr std::array<Table, 74> DefinitionTable = {{
{TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32F},
{TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32UI},
+ {TextureFormat::R32, C, SINT, SINT, SINT, SINT, PixelFormat::R32I},
{TextureFormat::E5B9G9R9_SHAREDEXP, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9F},
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 84469b7ba..002df414f 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -277,6 +277,10 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager,
SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params,
staging_buffer.data() + host_offset, level);
}
+ } else if (params.IsBuffer()) {
+ // Buffers don't have pitch or any fancy layout property. We can just memcpy them to guest
+ // memory.
+ std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size);
} else {
ASSERT(params.target == SurfaceTarget::Texture2D);
ASSERT(params.num_levels == 1);
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index 38b3a4ba8..f00839313 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -84,19 +84,16 @@ SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_ta
if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) {
switch (params.pixel_format) {
case PixelFormat::R16U:
- case PixelFormat::R16F: {
+ case PixelFormat::R16F:
params.pixel_format = PixelFormat::Z16;
break;
- }
- case PixelFormat::R32F: {
+ case PixelFormat::R32F:
params.pixel_format = PixelFormat::Z32F;
break;
- }
- default: {
+ default:
UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}",
static_cast<u32>(params.pixel_format));
}
- }
params.type = GetFormatType(params.pixel_format);
}
params.type = GetFormatType(params.pixel_format);
@@ -168,27 +165,29 @@ SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_tabl
return params;
}
-SurfaceParams SurfaceParams::CreateForDepthBuffer(
- Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
- u32 block_width, u32 block_height, u32 block_depth,
- Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) {
+SurfaceParams SurfaceParams::CreateForDepthBuffer(Core::System& system) {
+ const auto& regs = system.GPU().Maxwell3D().regs;
+ regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.type;
SurfaceParams params;
- params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
+ params.is_tiled = regs.zeta.memory_layout.type ==
+ Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
params.srgb_conversion = false;
- params.block_width = std::min(block_width, 5U);
- params.block_height = std::min(block_height, 5U);
- params.block_depth = std::min(block_depth, 5U);
+ params.block_width = std::min(regs.zeta.memory_layout.block_width.Value(), 5U);
+ params.block_height = std::min(regs.zeta.memory_layout.block_height.Value(), 5U);
+ params.block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U);
params.tile_width_spacing = 1;
- params.pixel_format = PixelFormatFromDepthFormat(format);
+ params.pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
params.type = GetFormatType(params.pixel_format);
- params.width = zeta_width;
- params.height = zeta_height;
- params.target = SurfaceTarget::Texture2D;
- params.depth = 1;
+ params.width = regs.zeta_width;
+ params.height = regs.zeta_height;
params.pitch = 0;
params.num_levels = 1;
params.emulated_levels = 1;
- params.is_layered = false;
+
+ const bool is_layered = regs.zeta_layers > 1 && params.block_depth == 0;
+ params.is_layered = is_layered;
+ params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
+ params.depth = is_layered ? regs.zeta_layers.Value() : 1U;
return params;
}
@@ -214,11 +213,13 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz
params.width = params.pitch / bpp;
}
params.height = config.height;
- params.depth = 1;
- params.target = SurfaceTarget::Texture2D;
params.num_levels = 1;
params.emulated_levels = 1;
- params.is_layered = false;
+
+ const bool is_layered = config.layers > 1 && params.block_depth == 0;
+ params.is_layered = is_layered;
+ params.depth = is_layered ? config.layers.Value() : 1;
+ params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D;
return params;
}
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
index 9256fd6d9..995cc3818 100644
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -35,10 +35,7 @@ public:
const VideoCommon::Shader::Image& entry);
/// Creates SurfaceCachedParams for a depth buffer configuration.
- static SurfaceParams CreateForDepthBuffer(
- Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format,
- u32 block_width, u32 block_height, u32 block_depth,
- Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type);
+ static SurfaceParams CreateForDepthBuffer(Core::System& system);
/// Creates SurfaceCachedParams from a framebuffer configuration.
static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index);
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index f4c015635..c70e4aec2 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -160,10 +160,7 @@ public:
SetEmptyDepthBuffer();
return {};
}
- const auto depth_params{SurfaceParams::CreateForDepthBuffer(
- system, regs.zeta_width, regs.zeta_height, regs.zeta.format,
- regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height,
- regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)};
+ const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};
auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true);
if (depth_buffer.target)
depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
@@ -721,7 +718,6 @@ private:
std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const CacheAddr cache_addr,
const SurfaceParams& params, bool preserve_contents,
bool is_render) {
-
// Step 1
// Check Level 1 Cache for a fast structural match. If candidate surface
// matches at certain level we are pretty much done.
@@ -733,14 +729,18 @@ private:
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
topological_result);
}
+
const auto struct_result = current_surface->MatchesStructure(params);
- if (struct_result != MatchStructureResult::None &&
- (params.target != SurfaceTarget::Texture3D ||
- current_surface->MatchTarget(params.target))) {
- if (struct_result == MatchStructureResult::FullMatch) {
- return ManageStructuralMatch(current_surface, params, is_render);
- } else {
- return RebuildSurface(current_surface, params, is_render);
+ if (struct_result != MatchStructureResult::None) {
+ const auto& old_params = current_surface->GetSurfaceParams();
+ const bool not_3d = params.target != SurfaceTarget::Texture3D &&
+ old_params.target != SurfaceTarget::Texture3D;
+ if (not_3d || current_surface->MatchTarget(params.target)) {
+ if (struct_result == MatchStructureResult::FullMatch) {
+ return ManageStructuralMatch(current_surface, params, is_render);
+ } else {
+ return RebuildSurface(current_surface, params, is_render);
+ }
}
}
}