summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
m---------externals/dynarmic0
-rw-r--r--src/audio_core/renderer/command/resample/upsample.cpp97
-rw-r--r--src/common/settings.cpp12
-rw-r--r--src/common/settings.h13
-rw-r--r--src/core/core_timing.cpp42
-rw-r--r--src/core/core_timing.h9
-rw-r--r--src/core/hle/kernel/k_hardware_timer.cpp6
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp8
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.h2
-rw-r--r--src/core/memory.cpp4
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/engines/draw_manager.cpp31
-rw-r--r--src/video_core/engines/draw_manager.h20
-rw-r--r--src/video_core/engines/maxwell_3d.cpp3
-rw-r--r--src/video_core/engines/maxwell_3d.h16
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt2
-rw-r--r--src/video_core/host_shaders/blit_color_float.frag (renamed from src/video_core/host_shaders/vulkan_blit_color_float.frag)0
-rw-r--r--src/video_core/host_shaders/full_screen_triangle.vert13
-rw-r--r--src/video_core/rasterizer_interface.h3
-rw-r--r--src/video_core/renderer_null/null_rasterizer.cpp1
-rw-r--r--src/video_core/renderer_null/null_rasterizer.h1
-rw-r--r--src/video_core/renderer_opengl/blit_image.cpp59
-rw-r--r--src/video_core/renderer_opengl/blit_image.h38
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_device.h7
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp44
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp121
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h129
-rw-r--r--src/video_core/renderer_vulkan/blit_image.cpp88
-rw-r--r--src/video_core/renderer_vulkan/blit_image.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp29
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h1
-rw-r--r--src/video_core/texture_cache/texture_cache.h7
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h3
-rw-r--r--src/yuzu/configuration/configure_graphics.ui15
36 files changed, 613 insertions, 224 deletions
diff --git a/externals/dynarmic b/externals/dynarmic
-Subproject a1cbea7948372989218a4e6159a95998d65876a
+Subproject befe547d5631024a70d81d2ccee808bbfcb3854
diff --git a/src/audio_core/renderer/command/resample/upsample.cpp b/src/audio_core/renderer/command/resample/upsample.cpp
index 6c3ff31f7..5f7db12ca 100644
--- a/src/audio_core/renderer/command/resample/upsample.cpp
+++ b/src/audio_core/renderer/command/resample/upsample.cpp
@@ -20,25 +20,25 @@ static void SrcProcessFrame(std::span<s32> output, std::span<const s32> input,
const u32 target_sample_count, const u32 source_sample_count,
UpsamplerState* state) {
constexpr u32 WindowSize = 10;
- constexpr std::array<Common::FixedPoint<24, 8>, WindowSize> SincWindow1{
- 51.93359375f, -18.80078125f, 9.73046875f, -5.33203125f, 2.84375f,
- -1.41015625f, 0.62109375f, -0.2265625f, 0.0625f, -0.00390625f,
+ constexpr std::array<Common::FixedPoint<17, 15>, WindowSize> WindowedSinc1{
+ 0.95376587f, -0.12872314f, 0.060028076f, -0.032470703f, 0.017669678f,
+ -0.009124756f, 0.004272461f, -0.001739502f, 0.000579834f, -0.000091552734f,
};
- constexpr std::array<Common::FixedPoint<24, 8>, WindowSize> SincWindow2{
- 105.35546875f, -24.52734375f, 11.9609375f, -6.515625f, 3.52734375f,
- -1.796875f, 0.828125f, -0.32421875f, 0.1015625f, -0.015625f,
+ constexpr std::array<Common::FixedPoint<17, 15>, WindowSize> WindowedSinc2{
+ 0.8230896f, -0.19161987f, 0.093444824f, -0.05090332f, 0.027557373f,
+ -0.014038086f, 0.0064697266f, -0.002532959f, 0.00079345703f, -0.00012207031f,
};
- constexpr std::array<Common::FixedPoint<24, 8>, WindowSize> SincWindow3{
- 122.08203125f, -16.47656250f, 7.68359375f, -4.15625000f, 2.26171875f,
- -1.16796875f, 0.54687500f, -0.22265625f, 0.07421875f, -0.01171875f,
+ constexpr std::array<Common::FixedPoint<17, 15>, WindowSize> WindowedSinc3{
+ 0.6298828f, -0.19274902f, 0.09725952f, -0.05319214f, 0.028625488f,
+ -0.014373779f, 0.006500244f, -0.0024719238f, 0.0007324219f, -0.000091552734f,
};
- constexpr std::array<Common::FixedPoint<24, 8>, WindowSize> SincWindow4{
- 23.73437500f, -9.62109375f, 5.07812500f, -2.78125000f, 1.46875000f,
- -0.71484375f, 0.30859375f, -0.10546875f, 0.02734375f, 0.00000000f,
+ constexpr std::array<Common::FixedPoint<17, 15>, WindowSize> WindowedSinc4{
+ 0.4057312f, -0.1468811f, 0.07601929f, -0.041656494f, 0.022216797f,
+ -0.011016846f, 0.004852295f, -0.0017700195f, 0.00048828125f, -0.000030517578f,
};
- constexpr std::array<Common::FixedPoint<24, 8>, WindowSize> SincWindow5{
- 80.62500000f, -24.67187500f, 12.44921875f, -6.80859375f, 3.66406250f,
- -1.83984375f, 0.83203125f, -0.31640625f, 0.09375000f, -0.01171875f,
+ constexpr std::array<Common::FixedPoint<17, 15>, WindowSize> WindowedSinc5{
+ 0.1854248f, -0.075164795f, 0.03967285f, -0.021728516f, 0.011474609f,
+ -0.005584717f, 0.0024108887f, -0.0008239746f, 0.00021362305f, 0.0f,
};
if (!state->initialized) {
@@ -91,52 +91,31 @@ static void SrcProcessFrame(std::span<s32> output, std::span<const s32> input,
static_cast<u16>((state->history_output_index + 1) % UpsamplerState::HistorySize);
};
- auto calculate_sample = [&state](std::span<const Common::FixedPoint<24, 8>> coeffs1,
- std::span<const Common::FixedPoint<24, 8>> coeffs2) -> s32 {
+ auto calculate_sample = [&state](std::span<const Common::FixedPoint<17, 15>> coeffs1,
+ std::span<const Common::FixedPoint<17, 15>> coeffs2) -> s32 {
auto output_index{state->history_output_index};
- auto start_pos{output_index - state->history_start_index + 1U};
- auto end_pos{10U};
+ u64 result{0};
- if (start_pos < 10) {
- end_pos = start_pos;
- }
-
- u64 prev_contrib{0};
- u32 coeff_index{0};
- for (; coeff_index < end_pos; coeff_index++, output_index--) {
- prev_contrib += static_cast<u64>(state->history[output_index].to_raw()) *
- coeffs1[coeff_index].to_raw();
- }
+ for (u32 coeff_index = 0; coeff_index < 10; coeff_index++) {
+ result += static_cast<u64>(state->history[output_index].to_raw()) *
+ coeffs1[coeff_index].to_raw();
- auto end_index{state->history_end_index};
- for (; start_pos < 9; start_pos++, coeff_index++, end_index--) {
- prev_contrib += static_cast<u64>(state->history[end_index].to_raw()) *
- coeffs1[coeff_index].to_raw();
+ output_index = output_index == state->history_start_index ? state->history_end_index
+ : output_index - 1;
}
output_index =
static_cast<u16>((state->history_output_index + 1) % UpsamplerState::HistorySize);
- start_pos = state->history_end_index - output_index + 1U;
- end_pos = 10U;
- if (start_pos < 10) {
- end_pos = start_pos;
- }
-
- u64 next_contrib{0};
- coeff_index = 0;
- for (; coeff_index < end_pos; coeff_index++, output_index++) {
- next_contrib += static_cast<u64>(state->history[output_index].to_raw()) *
- coeffs2[coeff_index].to_raw();
- }
+ for (u32 coeff_index = 0; coeff_index < 10; coeff_index++) {
+ result += static_cast<u64>(state->history[output_index].to_raw()) *
+ coeffs2[coeff_index].to_raw();
- auto start_index{state->history_start_index};
- for (; start_pos < 9; start_pos++, start_index++, coeff_index++) {
- next_contrib += static_cast<u64>(state->history[start_index].to_raw()) *
- coeffs2[coeff_index].to_raw();
+ output_index = output_index == state->history_end_index ? state->history_start_index
+ : output_index + 1;
}
- return static_cast<s32>(((prev_contrib >> 15) + (next_contrib >> 15)) >> 8);
+ return static_cast<s32>(result >> (8 + 15));
};
switch (state->ratio.to_int_floor()) {
@@ -150,23 +129,23 @@ static void SrcProcessFrame(std::span<s32> output, std::span<const s32> input,
break;
case 1:
- output[write_index] = calculate_sample(SincWindow3, SincWindow4);
+ output[write_index] = calculate_sample(WindowedSinc1, WindowedSinc5);
break;
case 2:
- output[write_index] = calculate_sample(SincWindow2, SincWindow1);
+ output[write_index] = calculate_sample(WindowedSinc2, WindowedSinc4);
break;
case 3:
- output[write_index] = calculate_sample(SincWindow5, SincWindow5);
+ output[write_index] = calculate_sample(WindowedSinc3, WindowedSinc3);
break;
case 4:
- output[write_index] = calculate_sample(SincWindow1, SincWindow2);
+ output[write_index] = calculate_sample(WindowedSinc4, WindowedSinc2);
break;
case 5:
- output[write_index] = calculate_sample(SincWindow4, SincWindow3);
+ output[write_index] = calculate_sample(WindowedSinc5, WindowedSinc1);
break;
}
state->sample_index = static_cast<u8>((state->sample_index + 1) % 6);
@@ -183,11 +162,11 @@ static void SrcProcessFrame(std::span<s32> output, std::span<const s32> input,
break;
case 1:
- output[write_index] = calculate_sample(SincWindow2, SincWindow1);
+ output[write_index] = calculate_sample(WindowedSinc2, WindowedSinc4);
break;
case 2:
- output[write_index] = calculate_sample(SincWindow1, SincWindow2);
+ output[write_index] = calculate_sample(WindowedSinc4, WindowedSinc2);
break;
}
state->sample_index = static_cast<u8>((state->sample_index + 1) % 3);
@@ -204,12 +183,12 @@ static void SrcProcessFrame(std::span<s32> output, std::span<const s32> input,
break;
case 1:
- output[write_index] = calculate_sample(SincWindow1, SincWindow2);
+ output[write_index] = calculate_sample(WindowedSinc4, WindowedSinc2);
break;
case 2:
increment();
- output[write_index] = calculate_sample(SincWindow2, SincWindow1);
+ output[write_index] = calculate_sample(WindowedSinc2, WindowedSinc4);
break;
}
state->sample_index = static_cast<u8>((state->sample_index + 1) % 3);
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index 1638b79f5..b1a2aa8b2 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -129,6 +129,10 @@ void UpdateRescalingInfo() {
info.up_scale = 1;
info.down_shift = 0;
break;
+ case ResolutionSetup::Res3_2X:
+ info.up_scale = 3;
+ info.down_shift = 1;
+ break;
case ResolutionSetup::Res2X:
info.up_scale = 2;
info.down_shift = 0;
@@ -149,6 +153,14 @@ void UpdateRescalingInfo() {
info.up_scale = 6;
info.down_shift = 0;
break;
+ case ResolutionSetup::Res7X:
+ info.up_scale = 7;
+ info.down_shift = 0;
+ break;
+ case ResolutionSetup::Res8X:
+ info.up_scale = 8;
+ info.down_shift = 0;
+ break;
default:
ASSERT(false);
info.up_scale = 1;
diff --git a/src/common/settings.h b/src/common/settings.h
index a457e3f23..80b2eeabc 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -56,11 +56,14 @@ enum class ResolutionSetup : u32 {
Res1_2X = 0,
Res3_4X = 1,
Res1X = 2,
- Res2X = 3,
- Res3X = 4,
- Res4X = 5,
- Res5X = 6,
- Res6X = 7,
+ Res3_2X = 3,
+ Res2X = 4,
+ Res3X = 5,
+ Res4X = 6,
+ Res5X = 7,
+ Res6X = 8,
+ Res7X = 9,
+ Res8X = 10,
};
enum class ScalingFilter : u32 {
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index 0e7b5f943..6bac6722f 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -142,16 +142,24 @@ void CoreTiming::ScheduleLoopingEvent(std::chrono::nanoseconds start_time,
}
void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type,
- std::uintptr_t user_data) {
- std::scoped_lock scope{basic_lock};
- const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
- return e.type.lock().get() == event_type.get() && e.user_data == user_data;
- });
-
- // Removing random items breaks the invariant so we have to re-establish it.
- if (itr != event_queue.end()) {
- event_queue.erase(itr, event_queue.end());
- std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());
+ std::uintptr_t user_data, bool wait) {
+ {
+ std::scoped_lock lk{basic_lock};
+ const auto itr =
+ std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
+ return e.type.lock().get() == event_type.get() && e.user_data == user_data;
+ });
+
+ // Removing random items breaks the invariant so we have to re-establish it.
+ if (itr != event_queue.end()) {
+ event_queue.erase(itr, event_queue.end());
+ std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());
+ }
+ }
+
+ // Force any in-progress events to finish
+ if (wait) {
+ std::scoped_lock lk{advance_lock};
}
}
@@ -190,20 +198,6 @@ u64 CoreTiming::GetClockTicks() const {
return CpuCyclesToClockCycles(ticks);
}
-void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
- std::scoped_lock lock{basic_lock};
-
- const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
- return e.type.lock().get() == event_type.get();
- });
-
- // Removing random items breaks the invariant so we have to re-establish it.
- if (itr != event_queue.end()) {
- event_queue.erase(itr, event_queue.end());
- std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());
- }
-}
-
std::optional<s64> CoreTiming::Advance() {
std::scoped_lock lock{advance_lock, basic_lock};
global_timer = GetGlobalTimeNs().count();
diff --git a/src/core/core_timing.h b/src/core/core_timing.h
index b5925193c..da366637b 100644
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -98,10 +98,13 @@ public:
const std::shared_ptr<EventType>& event_type,
std::uintptr_t user_data = 0, bool absolute_time = false);
- void UnscheduleEvent(const std::shared_ptr<EventType>& event_type, std::uintptr_t user_data);
+ void UnscheduleEvent(const std::shared_ptr<EventType>& event_type, std::uintptr_t user_data,
+ bool wait = true);
- /// We only permit one event of each type in the queue at a time.
- void RemoveEvent(const std::shared_ptr<EventType>& event_type);
+ void UnscheduleEventWithoutWait(const std::shared_ptr<EventType>& event_type,
+ std::uintptr_t user_data) {
+ UnscheduleEvent(event_type, user_data, false);
+ }
void AddTicks(u64 ticks_to_add);
diff --git a/src/core/hle/kernel/k_hardware_timer.cpp b/src/core/hle/kernel/k_hardware_timer.cpp
index 6bba79ea0..4dcd53821 100644
--- a/src/core/hle/kernel/k_hardware_timer.cpp
+++ b/src/core/hle/kernel/k_hardware_timer.cpp
@@ -18,7 +18,8 @@ void KHardwareTimer::Initialize() {
}
void KHardwareTimer::Finalize() {
- this->DisableInterrupt();
+ m_kernel.System().CoreTiming().UnscheduleEvent(m_event_type, reinterpret_cast<uintptr_t>(this));
+ m_wakeup_time = std::numeric_limits<s64>::max();
m_event_type.reset();
}
@@ -59,7 +60,8 @@ void KHardwareTimer::EnableInterrupt(s64 wakeup_time) {
}
void KHardwareTimer::DisableInterrupt() {
- m_kernel.System().CoreTiming().UnscheduleEvent(m_event_type, reinterpret_cast<uintptr_t>(this));
+ m_kernel.System().CoreTiming().UnscheduleEventWithoutWait(m_event_type,
+ reinterpret_cast<uintptr_t>(this));
m_wakeup_time = std::numeric_limits<s64>::max();
}
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index d1cbadde4..f4416f5b2 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -312,8 +312,6 @@ void NVFlinger::Compose() {
}
s64 NVFlinger::GetNextTicks() const {
- static constexpr s64 max_hertz = 120LL;
-
const auto& settings = Settings::values;
auto speed_scale = 1.f;
if (settings.use_multi_core.GetValue()) {
@@ -327,9 +325,11 @@ s64 NVFlinger::GetNextTicks() const {
}
}
- const auto next_ticks = ((1000000000 * (1LL << swap_interval)) / max_hertz);
+ // As an extension, treat nonpositive swap interval as framerate multiplier.
+ const f32 effective_fps = swap_interval <= 0 ? 120.f * static_cast<f32>(1 - swap_interval)
+ : 60.f / static_cast<f32>(swap_interval);
- return static_cast<s64>(speed_scale * static_cast<float>(next_ticks));
+ return static_cast<s64>(speed_scale * (1000000000.f / effective_fps));
}
} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index 9b22397db..3828cf272 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -133,7 +133,7 @@ private:
/// layers.
u32 next_buffer_queue_id = 1;
- u32 swap_interval = 1;
+ s32 swap_interval = 1;
/// Event that handles screen composition.
std::shared_ptr<Core::Timing::EventType> multi_composition_event;
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index a1e41faff..4e605fae4 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -383,6 +383,10 @@ struct Memory::Impl {
return;
}
+ if (Settings::IsFastmemEnabled()) {
+ system.DeviceMemory().buffer.Protect(vaddr, size, !debug, !debug);
+ }
+
// Iterate over a contiguous CPU address space, marking/unmarking the region.
// The region is at a granularity of CPU pages.
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index f617665de..b474eb363 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -100,6 +100,8 @@ add_library(video_core STATIC
renderer_null/null_rasterizer.h
renderer_null/renderer_null.cpp
renderer_null/renderer_null.h
+ renderer_opengl/blit_image.cpp
+ renderer_opengl/blit_image.h
renderer_opengl/gl_buffer_cache.cpp
renderer_opengl/gl_buffer_cache.h
renderer_opengl/gl_compute_pipeline.cpp
diff --git a/src/video_core/engines/draw_manager.cpp b/src/video_core/engines/draw_manager.cpp
index 2437121ce..1d22d25f1 100644
--- a/src/video_core/engines/draw_manager.cpp
+++ b/src/video_core/engines/draw_manager.cpp
@@ -51,6 +51,10 @@ void DrawManager::ProcessMethodCall(u32 method, u32 argument) {
LOG_WARNING(HW_GPU, "(STUBBED) called");
break;
}
+ case MAXWELL3D_REG_INDEX(draw_texture.src_y0): {
+ DrawTexture();
+ break;
+ }
default:
break;
}
@@ -179,6 +183,33 @@ void DrawManager::DrawIndexSmall(u32 argument) {
ProcessDraw(true, 1);
}
+void DrawManager::DrawTexture() {
+ const auto& regs{maxwell3d->regs};
+ draw_texture_state.dst_x0 = static_cast<float>(regs.draw_texture.dst_x0) / 4096.f;
+ draw_texture_state.dst_y0 = static_cast<float>(regs.draw_texture.dst_y0) / 4096.f;
+ const auto dst_width = static_cast<float>(regs.draw_texture.dst_width) / 4096.f;
+ const auto dst_height = static_cast<float>(regs.draw_texture.dst_height) / 4096.f;
+ const bool lower_left{regs.window_origin.mode !=
+ Maxwell3D::Regs::WindowOrigin::Mode::UpperLeft};
+ if (lower_left) {
+ draw_texture_state.dst_y0 -= dst_height;
+ }
+ draw_texture_state.dst_x1 = draw_texture_state.dst_x0 + dst_width;
+ draw_texture_state.dst_y1 = draw_texture_state.dst_y0 + dst_height;
+ draw_texture_state.src_x0 = static_cast<float>(regs.draw_texture.src_x0) / 4096.f;
+ draw_texture_state.src_y0 = static_cast<float>(regs.draw_texture.src_y0) / 4096.f;
+ draw_texture_state.src_x1 =
+ (static_cast<float>(regs.draw_texture.dx_du) / 4294967296.f) * dst_width +
+ draw_texture_state.src_x0;
+ draw_texture_state.src_y1 =
+ (static_cast<float>(regs.draw_texture.dy_dv) / 4294967296.f) * dst_height +
+ draw_texture_state.src_y0;
+ draw_texture_state.src_sampler = regs.draw_texture.src_sampler;
+ draw_texture_state.src_texture = regs.draw_texture.src_texture;
+
+ maxwell3d->rasterizer->DrawTexture();
+}
+
void DrawManager::UpdateTopology() {
const auto& regs{maxwell3d->regs};
switch (regs.primitive_topology_control) {
diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h
index 58d1b2d59..7c22c49f1 100644
--- a/src/video_core/engines/draw_manager.h
+++ b/src/video_core/engines/draw_manager.h
@@ -32,6 +32,19 @@ public:
std::vector<u8> inline_index_draw_indexes;
};
+ struct DrawTextureState {
+ f32 dst_x0;
+ f32 dst_y0;
+ f32 dst_x1;
+ f32 dst_y1;
+ f32 src_x0;
+ f32 src_y0;
+ f32 src_x1;
+ f32 src_y1;
+ u32 src_sampler;
+ u32 src_texture;
+ };
+
struct IndirectParams {
bool is_indexed;
bool include_count;
@@ -64,6 +77,10 @@ public:
return draw_state;
}
+ const DrawTextureState& GetDrawTextureState() const {
+ return draw_texture_state;
+ }
+
IndirectParams& GetIndirectParams() {
return indirect_state;
}
@@ -81,6 +98,8 @@ private:
void DrawIndexSmall(u32 argument);
+ void DrawTexture();
+
void UpdateTopology();
void ProcessDraw(bool draw_indexed, u32 instance_count);
@@ -89,6 +108,7 @@ private:
Maxwell3D* maxwell3d{};
State draw_state{};
+ DrawTextureState draw_texture_state{};
IndirectParams indirect_state{};
};
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 97f547789..ae9da6290 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -149,6 +149,7 @@ bool Maxwell3D::IsMethodExecutable(u32 method) {
case MAXWELL3D_REG_INDEX(inline_index_4x8.index0):
case MAXWELL3D_REG_INDEX(vertex_array_instance_first):
case MAXWELL3D_REG_INDEX(vertex_array_instance_subsequent):
+ case MAXWELL3D_REG_INDEX(draw_texture.src_y0):
case MAXWELL3D_REG_INDEX(wait_for_idle):
case MAXWELL3D_REG_INDEX(shadow_ram_control):
case MAXWELL3D_REG_INDEX(load_mme.instruction_ptr):
@@ -467,7 +468,7 @@ void Maxwell3D::ProcessMacroBind(u32 data) {
}
void Maxwell3D::ProcessFirmwareCall4() {
- LOG_WARNING(HW_GPU, "(STUBBED) called");
+ LOG_DEBUG(HW_GPU, "(STUBBED) called");
// Firmware call 4 is a blob that changes some registers depending on its parameters.
// These registers don't affect emulation and so are stubbed by setting 0xd00 to 1.
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 0b2fd2928..c89969bb4 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1599,6 +1599,20 @@ public:
};
static_assert(sizeof(TIRModulationCoeff) == 0x4);
+ struct DrawTexture {
+ s32 dst_x0;
+ s32 dst_y0;
+ s32 dst_width;
+ s32 dst_height;
+ s64 dx_du;
+ s64 dy_dv;
+ u32 src_sampler;
+ u32 src_texture;
+ s32 src_x0;
+ s32 src_y0;
+ };
+ static_assert(sizeof(DrawTexture) == 0x30);
+
struct ReduceColorThreshold {
union {
BitField<0, 8, u32> all_hit_once;
@@ -2751,7 +2765,7 @@ public:
u32 reserved_sw_method2; ///< 0x102C
std::array<TIRModulationCoeff, 5> tir_modulation_coeff; ///< 0x1030
std::array<u32, 15> spare_nop; ///< 0x1044
- INSERT_PADDING_BYTES_NOINIT(0x30);
+ DrawTexture draw_texture; ///< 0x1080
std::array<u32, 7> reserved_sw_method3_to_7; ///< 0x10B0
ReduceColorThreshold reduce_color_thresholds_unorm8; ///< 0x10CC
std::array<u32, 4> reserved_sw_method10_to_13; ///< 0x10D0
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index f275b2aa9..e968ae220 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -11,6 +11,7 @@ set(GLSL_INCLUDES
set(SHADER_FILES
astc_decoder.comp
+ blit_color_float.frag
block_linear_unswizzle_2d.comp
block_linear_unswizzle_3d.comp
convert_abgr8_to_d24s8.frag
@@ -36,7 +37,6 @@ set(SHADER_FILES
smaa_blending_weight_calculation.frag
smaa_neighborhood_blending.vert
smaa_neighborhood_blending.frag
- vulkan_blit_color_float.frag
vulkan_blit_depth_stencil.frag
vulkan_fidelityfx_fsr_easu_fp16.comp
vulkan_fidelityfx_fsr_easu_fp32.comp
diff --git a/src/video_core/host_shaders/vulkan_blit_color_float.frag b/src/video_core/host_shaders/blit_color_float.frag
index c0c832296..c0c832296 100644
--- a/src/video_core/host_shaders/vulkan_blit_color_float.frag
+++ b/src/video_core/host_shaders/blit_color_float.frag
diff --git a/src/video_core/host_shaders/full_screen_triangle.vert b/src/video_core/host_shaders/full_screen_triangle.vert
index 2c976b19f..d16d98995 100644
--- a/src/video_core/host_shaders/full_screen_triangle.vert
+++ b/src/video_core/host_shaders/full_screen_triangle.vert
@@ -4,13 +4,20 @@
#version 450
#ifdef VULKAN
+#define VERTEX_ID gl_VertexIndex
#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
#define END_PUSH_CONSTANTS };
#define UNIFORM(n)
+#define FLIPY 1
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
+#define VERTEX_ID gl_VertexID
#define BEGIN_PUSH_CONSTANTS
#define END_PUSH_CONSTANTS
+#define FLIPY -1
#define UNIFORM(n) layout (location = n) uniform
+out gl_PerVertex {
+ vec4 gl_Position;
+};
#endif
BEGIN_PUSH_CONSTANTS
@@ -21,8 +28,8 @@ END_PUSH_CONSTANTS
layout(location = 0) out vec2 texcoord;
void main() {
- float x = float((gl_VertexIndex & 1) << 2);
- float y = float((gl_VertexIndex & 2) << 1);
- gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0);
+ float x = float((VERTEX_ID & 1) << 2);
+ float y = float((VERTEX_ID & 2) << 1);
+ gl_Position = vec4(x - 1.0, FLIPY * (y - 1.0), 0.0, 1.0);
texcoord = fma(vec2(x, y) / 2.0, tex_scale, tex_offset);
}
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 1735b6164..33e2610bc 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -47,6 +47,9 @@ public:
/// Dispatches an indirect draw invocation
virtual void DrawIndirect() {}
+ /// Dispatches an draw texture invocation
+ virtual void DrawTexture() = 0;
+
/// Clear the current framebuffer
virtual void Clear(u32 layer_count) = 0;
diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp
index 2c11345d7..2b5c7defa 100644
--- a/src/video_core/renderer_null/null_rasterizer.cpp
+++ b/src/video_core/renderer_null/null_rasterizer.cpp
@@ -21,6 +21,7 @@ RasterizerNull::RasterizerNull(Core::Memory::Memory& cpu_memory_, Tegra::GPU& gp
RasterizerNull::~RasterizerNull() = default;
void RasterizerNull::Draw(bool is_indexed, u32 instance_count) {}
+void RasterizerNull::DrawTexture() {}
void RasterizerNull::Clear(u32 layer_count) {}
void RasterizerNull::DispatchCompute() {}
void RasterizerNull::ResetCounter(VideoCore::QueryType type) {}
diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h
index 2112aa70e..51f896e43 100644
--- a/src/video_core/renderer_null/null_rasterizer.h
+++ b/src/video_core/renderer_null/null_rasterizer.h
@@ -31,6 +31,7 @@ public:
~RasterizerNull() override;
void Draw(bool is_indexed, u32 instance_count) override;
+ void DrawTexture() override;
void Clear(u32 layer_count) override;
void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
diff --git a/src/video_core/renderer_opengl/blit_image.cpp b/src/video_core/renderer_opengl/blit_image.cpp
new file mode 100644
index 000000000..9a560a73b
--- /dev/null
+++ b/src/video_core/renderer_opengl/blit_image.cpp
@@ -0,0 +1,59 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <algorithm>
+
+#include "video_core/host_shaders/blit_color_float_frag.h"
+#include "video_core/host_shaders/full_screen_triangle_vert.h"
+#include "video_core/renderer_opengl/blit_image.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_shader_util.h"
+
+namespace OpenGL {
+
+BlitImageHelper::BlitImageHelper(ProgramManager& program_manager_)
+ : program_manager(program_manager_),
+ full_screen_vert(CreateProgram(HostShaders::FULL_SCREEN_TRIANGLE_VERT, GL_VERTEX_SHADER)),
+ blit_color_to_color_frag(
+ CreateProgram(HostShaders::BLIT_COLOR_FLOAT_FRAG, GL_FRAGMENT_SHADER)) {}
+
+BlitImageHelper::~BlitImageHelper() = default;
+
+void BlitImageHelper::BlitColor(GLuint dst_framebuffer, GLuint src_image_view, GLuint src_sampler,
+ const Region2D& dst_region, const Region2D& src_region,
+ const Extent3D& src_size) {
+ glEnable(GL_CULL_FACE);
+ glDisable(GL_COLOR_LOGIC_OP);
+ glDisable(GL_DEPTH_TEST);
+ glDisable(GL_STENCIL_TEST);
+ glDisable(GL_POLYGON_OFFSET_FILL);
+ glDisable(GL_RASTERIZER_DISCARD);
+ glDisable(GL_ALPHA_TEST);
+ glDisablei(GL_BLEND, 0);
+ glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
+ glCullFace(GL_BACK);
+ glFrontFace(GL_CW);
+ glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
+ glDepthRangeIndexed(0, 0.0, 0.0);
+
+ program_manager.BindPresentPrograms(full_screen_vert.handle, blit_color_to_color_frag.handle);
+ glProgramUniform2f(full_screen_vert.handle, 0,
+ static_cast<float>(src_region.end.x - src_region.start.x) /
+ static_cast<float>(src_size.width),
+ static_cast<float>(src_region.end.y - src_region.start.y) /
+ static_cast<float>(src_size.height));
+ glProgramUniform2f(full_screen_vert.handle, 1,
+ static_cast<float>(src_region.start.x) / static_cast<float>(src_size.width),
+ static_cast<float>(src_region.start.y) /
+ static_cast<float>(src_size.height));
+ glViewport(std::min(dst_region.start.x, dst_region.end.x),
+ std::min(dst_region.start.y, dst_region.end.y),
+ std::abs(dst_region.end.x - dst_region.start.x),
+ std::abs(dst_region.end.y - dst_region.start.y));
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer);
+ glBindSampler(0, src_sampler);
+ glBindTextureUnit(0, src_image_view);
+ glClear(GL_COLOR_BUFFER_BIT);
+ glDrawArrays(GL_TRIANGLES, 0, 3);
+}
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/blit_image.h b/src/video_core/renderer_opengl/blit_image.h
new file mode 100644
index 000000000..5a2b12d16
--- /dev/null
+++ b/src/video_core/renderer_opengl/blit_image.h
@@ -0,0 +1,38 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <glad/glad.h>
+
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/texture_cache/types.h"
+
+namespace OpenGL {
+
+using VideoCommon::Extent3D;
+using VideoCommon::Offset2D;
+using VideoCommon::Region2D;
+
+class ProgramManager;
+class Framebuffer;
+class ImageView;
+
+class BlitImageHelper {
+public:
+ explicit BlitImageHelper(ProgramManager& program_manager);
+ ~BlitImageHelper();
+
+ void BlitColor(GLuint dst_framebuffer, GLuint src_image_view, GLuint src_sampler,
+ const Region2D& dst_region, const Region2D& src_region,
+ const Extent3D& src_size);
+
+private:
+ ProgramManager& program_manager;
+
+ OGLProgram full_screen_vert;
+ OGLProgram blit_color_to_color_frag;
+};
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index cee5c3247..22ed16ebf 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -166,6 +166,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) {
has_shader_int64 = HasExtension(extensions, "GL_ARB_gpu_shader_int64");
has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float;
has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2;
+ has_draw_texture = GLAD_GL_NV_draw_texture;
warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel;
need_fastmath_off = is_nvidia;
can_report_memory = GLAD_GL_NVX_gpu_memory_info;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index 2a72d84be..3ff8cad83 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -4,6 +4,8 @@
#pragma once
#include <cstddef>
+#include <string>
+
#include "common/common_types.h"
#include "core/frontend/emu_window.h"
#include "shader_recompiler/stage.h"
@@ -146,6 +148,10 @@ public:
return has_sparse_texture_2;
}
+ bool HasDrawTexture() const {
+ return has_draw_texture;
+ }
+
bool IsWarpSizePotentiallyLargerThanGuest() const {
return warp_size_potentially_larger_than_guest;
}
@@ -216,6 +222,7 @@ private:
bool has_shader_int64{};
bool has_amd_shader_half_float{};
bool has_sparse_texture_2{};
+ bool has_draw_texture{};
bool warp_size_potentially_larger_than_guest{};
bool need_fastmath_off{};
bool has_cbuf_ftou_bug{};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 181857d9c..7bced675c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -64,7 +64,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
state_tracker, gpu.ShaderNotify()),
query_cache(*this), accelerate_dma(buffer_cache),
- fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
+ fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
+ blit_image(program_manager_) {}
RasterizerOpenGL::~RasterizerOpenGL() = default;
@@ -320,6 +321,47 @@ void RasterizerOpenGL::DrawIndirect() {
buffer_cache.SetDrawIndirect(nullptr);
}
+void RasterizerOpenGL::DrawTexture() {
+ MICROPROFILE_SCOPE(OpenGL_Drawing);
+
+ SCOPE_EXIT({ gpu.TickWork(); });
+ query_cache.UpdateCounters();
+
+ texture_cache.SynchronizeGraphicsDescriptors();
+ texture_cache.UpdateRenderTargets(false);
+
+ SyncState();
+
+ const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState();
+ const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler);
+ const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture);
+
+ if (device.HasDrawTexture()) {
+ state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
+
+ glDrawTextureNV(texture.DefaultHandle(), sampler->Handle(), draw_texture_state.dst_x0,
+ draw_texture_state.dst_y0, draw_texture_state.dst_x1,
+ draw_texture_state.dst_y1, 0,
+ draw_texture_state.src_x0 / static_cast<float>(texture.size.width),
+ draw_texture_state.src_y0 / static_cast<float>(texture.size.height),
+ draw_texture_state.src_x1 / static_cast<float>(texture.size.width),
+ draw_texture_state.src_y1 / static_cast<float>(texture.size.height));
+ } else {
+ Region2D dst_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x0),
+ .y = static_cast<s32>(draw_texture_state.dst_y0)},
+ Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x1),
+ .y = static_cast<s32>(draw_texture_state.dst_y1)}};
+ Region2D src_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.src_x0),
+ .y = static_cast<s32>(draw_texture_state.src_y0)},
+ Offset2D{.x = static_cast<s32>(draw_texture_state.src_x1),
+ .y = static_cast<s32>(draw_texture_state.src_y1)}};
+ blit_image.BlitColor(texture_cache.GetFramebuffer()->Handle(), texture.DefaultHandle(),
+ sampler->Handle(), dst_region, src_region, texture.size);
+ }
+
+ ++num_queued_commands;
+}
+
void RasterizerOpenGL::DispatchCompute() {
gpu_memory->FlushCaching();
ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index be4f76c18..0c45832ae 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -16,6 +16,7 @@
#include "video_core/engines/maxwell_dma.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_opengl/blit_image.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_fence_manager.h"
@@ -70,6 +71,7 @@ public:
void Draw(bool is_indexed, u32 instance_count) override;
void DrawIndirect() override;
+ void DrawTexture() override;
void Clear(u32 layer_count) override;
void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
@@ -224,6 +226,8 @@ private:
AccelerateDMA accelerate_dma;
FenceManagerOpenGL fence_manager;
+ BlitImageHelper blit_image;
+
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index d9c29d8b7..98841ae65 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -1,2 +1,123 @@
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <glad/glad.h>
+
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+
+namespace OpenGL {
+
+static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{
+ GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
+ GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
+};
+
+ProgramManager::ProgramManager(const Device& device) {
+ glCreateProgramPipelines(1, &pipeline.handle);
+ if (device.UseAssemblyShaders()) {
+ glEnable(GL_COMPUTE_PROGRAM_NV);
+ }
+}
+
+void ProgramManager::BindComputeProgram(GLuint program) {
+ glUseProgram(program);
+ is_compute_bound = true;
+}
+
+void ProgramManager::BindComputeAssemblyProgram(GLuint program) {
+ if (current_assembly_compute_program != program) {
+ current_assembly_compute_program = program;
+ glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
+ }
+ UnbindPipeline();
+}
+
+void ProgramManager::BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) {
+ static constexpr std::array<GLenum, 5> stage_enums{
+ GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT,
+ GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT,
+ };
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ if (current_programs[stage] != programs[stage].handle) {
+ current_programs[stage] = programs[stage].handle;
+ glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle);
+ }
+ }
+ BindPipeline();
+}
+
+void ProgramManager::BindPresentPrograms(GLuint vertex, GLuint fragment) {
+ if (current_programs[0] != vertex) {
+ current_programs[0] = vertex;
+ glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex);
+ }
+ if (current_programs[4] != fragment) {
+ current_programs[4] = fragment;
+ glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment);
+ }
+ glUseProgramStages(
+ pipeline.handle,
+ GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0);
+ current_programs[1] = 0;
+ current_programs[2] = 0;
+ current_programs[3] = 0;
+
+ if (current_stage_mask != 0) {
+ current_stage_mask = 0;
+ for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) {
+ glDisable(program_type);
+ }
+ }
+ BindPipeline();
+}
+
+void ProgramManager::BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs,
+ u32 stage_mask) {
+ const u32 changed_mask = current_stage_mask ^ stage_mask;
+ current_stage_mask = stage_mask;
+
+ if (changed_mask != 0) {
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ if (((changed_mask >> stage) & 1) != 0) {
+ if (((stage_mask >> stage) & 1) != 0) {
+ glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]);
+ } else {
+ glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]);
+ }
+ }
+ }
+ }
+ for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
+ if (current_programs[stage] != programs[stage].handle) {
+ current_programs[stage] = programs[stage].handle;
+ glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle);
+ }
+ }
+ UnbindPipeline();
+}
+
+void ProgramManager::RestoreGuestCompute() {}
+
+void ProgramManager::BindPipeline() {
+ if (!is_pipeline_bound) {
+ is_pipeline_bound = true;
+ glBindProgramPipeline(pipeline.handle);
+ }
+ UnbindCompute();
+}
+
+void ProgramManager::UnbindPipeline() {
+ if (is_pipeline_bound) {
+ is_pipeline_bound = false;
+ glBindProgramPipeline(0);
+ }
+ UnbindCompute();
+}
+
+void ProgramManager::UnbindCompute() {
+ if (is_compute_bound) {
+ is_compute_bound = false;
+ glUseProgram(0);
+ }
+}
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index a84f5aeb3..07ffab77f 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -6,8 +6,6 @@
#include <array>
#include <span>
-#include <glad/glad.h>
-
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -16,121 +14,28 @@ namespace OpenGL {
class ProgramManager {
static constexpr size_t NUM_STAGES = 5;
- static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{
- GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
- GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
- };
-
public:
- explicit ProgramManager(const Device& device) {
- glCreateProgramPipelines(1, &pipeline.handle);
- if (device.UseAssemblyShaders()) {
- glEnable(GL_COMPUTE_PROGRAM_NV);
- }
- }
-
- void BindComputeProgram(GLuint program) {
- glUseProgram(program);
- is_compute_bound = true;
- }
-
- void BindComputeAssemblyProgram(GLuint program) {
- if (current_assembly_compute_program != program) {
- current_assembly_compute_program = program;
- glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
- }
- UnbindPipeline();
- }
-
- void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) {
- static constexpr std::array<GLenum, 5> stage_enums{
- GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT,
- GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT,
- };
- for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
- if (current_programs[stage] != programs[stage].handle) {
- current_programs[stage] = programs[stage].handle;
- glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle);
- }
- }
- BindPipeline();
- }
-
- void BindPresentPrograms(GLuint vertex, GLuint fragment) {
- if (current_programs[0] != vertex) {
- current_programs[0] = vertex;
- glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex);
- }
- if (current_programs[4] != fragment) {
- current_programs[4] = fragment;
- glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment);
- }
- glUseProgramStages(
- pipeline.handle,
- GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0);
- current_programs[1] = 0;
- current_programs[2] = 0;
- current_programs[3] = 0;
-
- if (current_stage_mask != 0) {
- current_stage_mask = 0;
- for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) {
- glDisable(program_type);
- }
- }
- BindPipeline();
- }
+ explicit ProgramManager(const Device& device);
+
+ void BindComputeProgram(GLuint program);
+
+ void BindComputeAssemblyProgram(GLuint program);
+
+ void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs);
+
+ void BindPresentPrograms(GLuint vertex, GLuint fragment);
void BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs,
- u32 stage_mask) {
- const u32 changed_mask = current_stage_mask ^ stage_mask;
- current_stage_mask = stage_mask;
-
- if (changed_mask != 0) {
- for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
- if (((changed_mask >> stage) & 1) != 0) {
- if (((stage_mask >> stage) & 1) != 0) {
- glEnable(ASSEMBLY_PROGRAM_ENUMS[stage]);
- } else {
- glDisable(ASSEMBLY_PROGRAM_ENUMS[stage]);
- }
- }
- }
- }
- for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
- if (current_programs[stage] != programs[stage].handle) {
- current_programs[stage] = programs[stage].handle;
- glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle);
- }
- }
- UnbindPipeline();
- }
-
- void RestoreGuestCompute() {}
+ u32 stage_mask);
+
+ void RestoreGuestCompute();
private:
- void BindPipeline() {
- if (!is_pipeline_bound) {
- is_pipeline_bound = true;
- glBindProgramPipeline(pipeline.handle);
- }
- UnbindCompute();
- }
-
- void UnbindPipeline() {
- if (is_pipeline_bound) {
- is_pipeline_bound = false;
- glBindProgramPipeline(0);
- }
- UnbindCompute();
- }
-
- void UnbindCompute() {
- if (is_compute_bound) {
- is_compute_bound = false;
- glUseProgram(0);
- }
- }
+ void BindPipeline();
+
+ void UnbindPipeline();
+
+ void UnbindCompute();
OGLPipeline pipeline;
bool is_pipeline_bound{};
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index 3f2b139e0..dd00d3edf 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -4,13 +4,13 @@
#include <algorithm>
#include "common/settings.h"
+#include "video_core/host_shaders/blit_color_float_frag_spv.h"
#include "video_core/host_shaders/convert_abgr8_to_d24s8_frag_spv.h"
#include "video_core/host_shaders/convert_d24s8_to_abgr8_frag_spv.h"
#include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"
#include "video_core/host_shaders/convert_float_to_depth_frag_spv.h"
#include "video_core/host_shaders/convert_s8d24_to_abgr8_frag_spv.h"
#include "video_core/host_shaders/full_screen_triangle_vert_spv.h"
-#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h"
#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h"
#include "video_core/renderer_vulkan/blit_image.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
@@ -303,7 +303,7 @@ void UpdateTwoTexturesDescriptorSet(const Device& device, VkDescriptorSet descri
}
void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Region2D& dst_region,
- const Region2D& src_region) {
+ const Region2D& src_region, const Extent3D& src_size = {1, 1, 1}) {
const VkOffset2D offset{
.x = std::min(dst_region.start.x, dst_region.end.x),
.y = std::min(dst_region.start.y, dst_region.end.y),
@@ -325,12 +325,15 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi
.offset = offset,
.extent = extent,
};
- const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x);
- const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y);
+ const float scale_x = static_cast<float>(src_region.end.x - src_region.start.x) /
+ static_cast<float>(src_size.width);
+ const float scale_y = static_cast<float>(src_region.end.y - src_region.start.y) /
+ static_cast<float>(src_size.height);
const PushConstants push_constants{
.tex_scale = {scale_x, scale_y},
- .tex_offset = {static_cast<float>(src_region.start.x),
- static_cast<float>(src_region.start.y)},
+ .tex_offset = {static_cast<float>(src_region.start.x) / static_cast<float>(src_size.width),
+ static_cast<float>(src_region.start.y) /
+ static_cast<float>(src_size.height)},
};
cmdbuf.SetViewport(0, viewport);
cmdbuf.SetScissor(0, scissor);
@@ -347,6 +350,51 @@ VkExtent2D GetConversionExtent(const ImageView& src_image_view) {
.height = is_rescaled ? resolution.ScaleUp(height) : height,
};
}
+
+void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout,
+ VkImageLayout source_layout = VK_IMAGE_LAYOUT_GENERAL) {
+ constexpr VkFlags flags{VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+ VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT};
+ const VkImageMemoryBarrier barrier{
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .pNext = nullptr,
+ .srcAccessMask = flags,
+ .dstAccessMask = flags,
+ .oldLayout = source_layout,
+ .newLayout = target_layout,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = image,
+ .subresourceRange{
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ };
+ cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ 0, barrier);
+}
+
+void BeginRenderPass(vk::CommandBuffer& cmdbuf, const Framebuffer* framebuffer) {
+ const VkRenderPass render_pass = framebuffer->RenderPass();
+ const VkFramebuffer framebuffer_handle = framebuffer->Handle();
+ const VkExtent2D render_area = framebuffer->RenderArea();
+ const VkRenderPassBeginInfo renderpass_bi{
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .pNext = nullptr,
+ .renderPass = render_pass,
+ .framebuffer = framebuffer_handle,
+ .renderArea{
+ .offset{},
+ .extent = render_area,
+ },
+ .clearValueCount = 0,
+ .pClearValues = nullptr,
+ };
+ cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
+}
} // Anonymous namespace
BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,
@@ -365,7 +413,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_,
two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout(
PipelineLayoutCreateInfo(two_textures_set_layout.address()))),
full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)),
- blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)),
+ blit_color_to_color_frag(BuildShader(device, BLIT_COLOR_FLOAT_FRAG_SPV)),
blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)),
convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
@@ -404,6 +452,32 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView
scheduler.InvalidateState();
}
+void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view,
+ VkImage src_image, VkSampler src_sampler,
+ const Region2D& dst_region, const Region2D& src_region,
+ const Extent3D& src_size) {
+ const BlitImagePipelineKey key{
+ .renderpass = dst_framebuffer->RenderPass(),
+ .operation = Tegra::Engines::Fermi2D::Operation::SrcCopy,
+ };
+ const VkPipelineLayout layout = *one_texture_pipeline_layout;
+ const VkPipeline pipeline = FindOrEmplaceColorPipeline(key);
+ scheduler.RequestOutsideRenderPassOperationContext();
+ scheduler.Record([this, dst_framebuffer, src_image_view, src_image, src_sampler, dst_region,
+ src_region, src_size, pipeline, layout](vk::CommandBuffer cmdbuf) {
+ TransitionImageLayout(cmdbuf, src_image, VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL);
+ BeginRenderPass(cmdbuf, dst_framebuffer);
+ const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
+ UpdateOneTextureDescriptorSet(device, descriptor_set, src_sampler, src_image_view);
+ cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+ cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
+ nullptr);
+ BindBlitState(cmdbuf, layout, dst_region, src_region, src_size);
+ cmdbuf.Draw(3, 1, 0, 0);
+ cmdbuf.EndRenderPass();
+ });
+}
+
void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
VkImageView src_depth_view, VkImageView src_stencil_view,
const Region2D& dst_region, const Region2D& src_region,
diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h
index 5df679fb4..be8a9a2f6 100644
--- a/src/video_core/renderer_vulkan/blit_image.h
+++ b/src/video_core/renderer_vulkan/blit_image.h
@@ -10,6 +10,8 @@
namespace Vulkan {
+using VideoCommon::Extent3D;
+using VideoCommon::Offset2D;
using VideoCommon::Region2D;
class Device;
@@ -36,6 +38,10 @@ public:
Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation);
+ void BlitColor(const Framebuffer* dst_framebuffer, VkImageView src_image_view,
+ VkImage src_image, VkSampler src_sampler, const Region2D& dst_region,
+ const Region2D& src_region, const Extent3D& src_size);
+
void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view,
VkImageView src_stencil_view, const Region2D& dst_region,
const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index b75b8eec6..86ef0daeb 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -266,6 +266,35 @@ void RasterizerVulkan::DrawIndirect() {
buffer_cache.SetDrawIndirect(nullptr);
}
+void RasterizerVulkan::DrawTexture() {
+ MICROPROFILE_SCOPE(Vulkan_Drawing);
+
+ SCOPE_EXIT({ gpu.TickWork(); });
+ FlushWork();
+
+ query_cache.UpdateCounters();
+
+ texture_cache.SynchronizeGraphicsDescriptors();
+ texture_cache.UpdateRenderTargets(false);
+
+ UpdateDynamicStates();
+
+ const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState();
+ const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler);
+ const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture);
+ Region2D dst_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x0),
+ .y = static_cast<s32>(draw_texture_state.dst_y0)},
+ Offset2D{.x = static_cast<s32>(draw_texture_state.dst_x1),
+ .y = static_cast<s32>(draw_texture_state.dst_y1)}};
+ Region2D src_region = {Offset2D{.x = static_cast<s32>(draw_texture_state.src_x0),
+ .y = static_cast<s32>(draw_texture_state.src_y0)},
+ Offset2D{.x = static_cast<s32>(draw_texture_state.src_x1),
+ .y = static_cast<s32>(draw_texture_state.src_y1)}};
+ blit_image.BlitColor(texture_cache.GetFramebuffer(), texture.RenderTarget(),
+ texture.ImageHandle(), sampler->Handle(), dst_region, src_region,
+ texture.size);
+}
+
void RasterizerVulkan::Clear(u32 layer_count) {
MICROPROFILE_SCOPE(Vulkan_Clearing);
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 472cc64d9..a0508b57c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -66,6 +66,7 @@ public:
void Draw(bool is_indexed, u32 instance_count) override;
void DrawIndirect() override;
+ void DrawTexture() override;
void Clear(u32 layer_count) override;
void DispatchCompute() override;
void ResetCounter(VideoCore::QueryType type) override;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 87152c8e9..1b01990a4 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -149,6 +149,13 @@ typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
}
template <class P>
+typename P::ImageView& TextureCache<P>::GetImageView(u32 index) noexcept {
+ const auto image_view_id = VisitImageView(channel_state->graphics_image_table,
+ channel_state->graphics_image_view_ids, index);
+ return slot_image_views[image_view_id];
+}
+
+template <class P>
void TextureCache<P>::MarkModification(ImageId id) noexcept {
MarkModification(slot_images[id]);
}
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 4eea1f609..485eaabaa 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -129,6 +129,9 @@ public:
/// Return a reference to the given image view id
[[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
+ /// Get the imageview from the graphics descriptor table in the specified index
+ [[nodiscard]] ImageView& GetImageView(u32 index) noexcept;
+
/// Mark an image as modified from the GPU
void MarkModification(ImageId id) noexcept;
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index aa02cc63c..bb9910a53 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -366,6 +366,11 @@
</item>
<item>
<property name="text">
+ <string>1.5X (1080p/1620p) [EXPERIMENTAL]</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
<string>2X (1440p/2160p)</string>
</property>
</item>
@@ -389,6 +394,16 @@
<string>6X (4320p/6480p)</string>
</property>
</item>
+ <item>
+ <property name="text">
+ <string>7X (5040p/7560p)</string>
+ </property>
+ </item>
+ <item>
+ <property name="text">
+ <string>8X (5760p/8640p)</string>
+ </property>
+ </item>
</widget>
</item>
</layout>