// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include <bit>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Type : u64 {
_1D,
BUFFER_1D,
ARRAY_1D,
_2D,
ARRAY_2D,
_3D,
};
constexpr unsigned R = 1 << 0;
constexpr unsigned G = 1 << 1;
constexpr unsigned B = 1 << 2;
constexpr unsigned A = 1 << 3;
constexpr std::array MASK{
0U, //
R, //
G, //
R | G, //
B, //
R | B, //
G | B, //
R | G | B, //
A, //
R | A, //
G | A, //
R | G | A, //
B | A, //
R | B | A, //
G | B | A, //
R | G | B | A, //
};
enum class Size : u64 {
U8,
S8,
U16,
S16,
B32,
B64,
B128,
};
enum class Clamp : u64 {
IGN,
Default,
TRAP,
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators
enum class LoadCache : u64 {
CA, // Cache at all levels, likely to be accessed again
CG, // Cache at global level (L2 and below, not L1)
CI, // ???
CV, // Don't cache and fetch again (volatile)
};
enum class StoreCache : u64 {
WB, // Cache write-back all coherent levels
CG, // Cache at global level (L2 and below, not L1)
CS, // Cache streaming, likely to be accessed once
WT, // Cache write-through (to system memory, volatile?)
};
ImageFormat Format(Size size) {
switch (size) {
case Size::U8:
return ImageFormat::R8_UINT;
case Size::S8:
return ImageFormat::R8_SINT;
case Size::U16:
return ImageFormat::R16_UINT;
case Size::S16:
return ImageFormat::R16_SINT;
case Size::B32:
return ImageFormat::R32_UINT;
case Size::B64:
return ImageFormat::R32G32_UINT;
case Size::B128:
return ImageFormat::R32G32B32A32_UINT;
}
throw NotImplementedException("Invalid size {}", size);
}
int SizeInRegs(Size size) {
switch (size) {
case Size::U8:
case Size::S8:
case Size::U16:
case Size::S16:
case Size::B32:
return 1;
case Size::B64:
return 2;
case Size::B128:
return 4;
}
throw NotImplementedException("Invalid size {}", size);
}
TextureType GetType(Type type) {
switch (type) {
case Type::_1D:
return TextureType::Color1D;
case Type::BUFFER_1D:
return TextureType::Buffer;
case Type::ARRAY_1D:
return TextureType::ColorArray1D;
case Type::_2D:
return TextureType::Color2D;
case Type::ARRAY_2D:
return TextureType::ColorArray2D;
case Type::_3D:
return TextureType::Color3D;
}
throw NotImplementedException("Invalid type {}", type);
}
IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
const auto array{[&](int index) {
return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16));
}};
switch (type) {
case Type::_1D:
case Type::BUFFER_1D:
return v.X(reg);
case Type::ARRAY_1D:
return v.ir.CompositeConstruct(v.X(reg), array(1));
case Type::_2D:
return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
case Type::ARRAY_2D:
return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2));
case Type::_3D:
return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
}
throw NotImplementedException("Invalid type {}", type);
}
unsigned SwizzleMask(u64 swizzle) {
if (swizzle == 0 || swizzle >= MASK.size()) {
throw NotImplementedException("Invalid swizzle {}", swizzle);
}
return MASK[swizzle];
}
IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) {
std::array<IR::U32, 4> colors;
for (int i = 0; i < num_regs; ++i) {
colors[static_cast<size_t>(i)] = ir.GetReg(reg + i);
}
for (int i = num_regs; i < 4; ++i) {
colors[static_cast<size_t>(i)] = ir.Imm32(0);
}
return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]);
}
} // Anonymous namespace
void TranslatorVisitor::SULD(u64 insn) {
union {
u64 raw;
BitField<51, 1, u64> is_bound;
BitField<52, 1, u64> d;
BitField<23, 1, u64> ba;
BitField<33, 3, Type> type;
BitField<24, 2, LoadCache> cache;
BitField<20, 3, Size> size; // .D
BitField<20, 4, u64> swizzle; // .P
BitField<49, 2, Clamp> clamp;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> coord_reg;
BitField<36, 13, u64> bound_offset; // is_bound
BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
} const suld{insn};
if (suld.clamp != Clamp::IGN) {
throw NotImplementedException("Clamp {}", suld.clamp.Value());
}
if (suld.cache != LoadCache::CA && suld.cache != LoadCache::CG) {
throw NotImplementedException("Cache {}", suld.cache.Value());
}
const bool is_typed{suld.d != 0};
if (is_typed && suld.ba != 0) {
throw NotImplementedException("BA");
}
const ImageFormat format{is_typed ? Format(suld.size) : ImageFormat::Typeless};
const TextureType type{GetType(suld.type)};
const IR::Value coords{MakeCoords(*this, suld.coord_reg, suld.type)};
const IR::U32 handle{suld.is_bound != 0 ? ir.Imm32(static_cast<u32>(suld.bound_offset * 4))
: X(suld.bindless_reg)};
IR::TextureInstInfo info{};
info.type.Assign(type);
info.image_format.Assign(format);
const IR::Value result{ir.ImageRead(handle, coords, info)};
IR::Reg dest_reg{suld.dest_reg};
if (is_typed) {
const int num_regs{SizeInRegs(suld.size)};
for (int i = 0; i < num_regs; ++i) {
X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
}
} else {
const unsigned mask{SwizzleMask(suld.swizzle)};
const int bits{std::popcount(mask)};
if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : static_cast<size_t>(bits))) {
throw NotImplementedException("Unaligned destination register");
}
for (unsigned component = 0; component < 4; ++component) {
if (((mask >> component) & 1) == 0) {
continue;
}
X(dest_reg, IR::U32{ir.CompositeExtract(result, component)});
++dest_reg;
}
}
}
void TranslatorVisitor::SUST(u64 insn) {
union {
u64 raw;
BitField<51, 1, u64> is_bound;
BitField<52, 1, u64> d;
BitField<23, 1, u64> ba;
BitField<33, 3, Type> type;
BitField<24, 2, StoreCache> cache;
BitField<20, 3, Size> size; // .D
BitField<20, 4, u64> swizzle; // .P
BitField<49, 2, Clamp> clamp;
BitField<0, 8, IR::Reg> data_reg;
BitField<8, 8, IR::Reg> coord_reg;
BitField<36, 13, u64> bound_offset; // is_bound
BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
} const sust{insn};
if (sust.clamp != Clamp::IGN) {
throw NotImplementedException("Clamp {}", sust.clamp.Value());
}
if (sust.cache != StoreCache::WB && sust.cache != StoreCache::CG) {
throw NotImplementedException("Cache {}", sust.cache.Value());
}
const bool is_typed{sust.d != 0};
if (is_typed && sust.ba != 0) {
throw NotImplementedException("BA");
}
const ImageFormat format{is_typed ? Format(sust.size) : ImageFormat::Typeless};
const TextureType type{GetType(sust.type)};
const IR::Value coords{MakeCoords(*this, sust.coord_reg, sust.type)};
const IR::U32 handle{sust.is_bound != 0 ? ir.Imm32(static_cast<u32>(sust.bound_offset * 4))
: X(sust.bindless_reg)};
IR::TextureInstInfo info{};
info.type.Assign(type);
info.image_format.Assign(format);
IR::Value color;
if (is_typed) {
color = MakeColor(ir, sust.data_reg, SizeInRegs(sust.size));
} else {
const unsigned mask{SwizzleMask(sust.swizzle)};
if (mask != 0xf) {
throw NotImplementedException("Non-full mask");
}
color = MakeColor(ir, sust.data_reg, 4);
}
ir.ImageWrite(handle, coords, color, info);
}
} // namespace Shader::Maxwell