summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/audio_core/CMakeLists.txt2
-rw-r--r--src/audio_core/hle/dsp.cpp16
-rw-r--r--src/audio_core/time_stretch.cpp144
-rw-r--r--src/audio_core/time_stretch.h57
-rw-r--r--src/core/hle/kernel/memory.cpp3
-rw-r--r--src/core/hle/service/dsp_dsp.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp26
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h11
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_state.h2
-rw-r--r--src/video_core/renderer_opengl/pica_to_gl.h20
-rw-r--r--src/video_core/shader/shader.cpp9
-rw-r--r--src/video_core/shader/shader.h25
-rw-r--r--src/video_core/shader/shader_interpreter.cpp8
-rw-r--r--src/video_core/shader/shader_interpreter.h2
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp32
-rw-r--r--src/video_core/shader/shader_jit_x64.h6
17 files changed, 329 insertions, 45 deletions
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index 13b5e400e..eba0a5697 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -7,6 +7,7 @@ set(SRCS
hle/source.cpp
interpolate.cpp
sink_details.cpp
+ time_stretch.cpp
)
set(HEADERS
@@ -21,6 +22,7 @@ set(HEADERS
null_sink.h
sink.h
sink_details.h
+ time_stretch.h
)
include_directories(../../externals/soundtouch/include)
diff --git a/src/audio_core/hle/dsp.cpp b/src/audio_core/hle/dsp.cpp
index 0cdbdb06a..5113ad8ca 100644
--- a/src/audio_core/hle/dsp.cpp
+++ b/src/audio_core/hle/dsp.cpp
@@ -9,6 +9,7 @@
#include "audio_core/hle/pipe.h"
#include "audio_core/hle/source.h"
#include "audio_core/sink.h"
+#include "audio_core/time_stretch.h"
namespace DSP {
namespace HLE {
@@ -48,15 +49,29 @@ static std::array<Source, num_sources> sources = {
};
static std::unique_ptr<AudioCore::Sink> sink;
+static AudioCore::TimeStretcher time_stretcher;
void Init() {
DSP::HLE::ResetPipes();
+
for (auto& source : sources) {
source.Reset();
}
+
+ time_stretcher.Reset();
+ if (sink) {
+ time_stretcher.SetOutputSampleRate(sink->GetNativeSampleRate());
+ }
}
void Shutdown() {
+ time_stretcher.Flush();
+ while (true) {
+ std::vector<s16> residual_audio = time_stretcher.Process(sink->SamplesInQueue());
+ if (residual_audio.empty())
+ break;
+ sink->EnqueueSamples(residual_audio);
+ }
}
bool Tick() {
@@ -77,6 +92,7 @@ bool Tick() {
void SetSink(std::unique_ptr<AudioCore::Sink> sink_) {
sink = std::move(sink_);
+ time_stretcher.SetOutputSampleRate(sink->GetNativeSampleRate());
}
} // namespace HLE
diff --git a/src/audio_core/time_stretch.cpp b/src/audio_core/time_stretch.cpp
new file mode 100644
index 000000000..ea38f40d0
--- /dev/null
+++ b/src/audio_core/time_stretch.cpp
@@ -0,0 +1,144 @@
+// Copyright 2016 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <chrono>
+#include <cmath>
+#include <vector>
+
+#include <SoundTouch.h>
+
+#include "audio_core/audio_core.h"
+#include "audio_core/time_stretch.h"
+
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "common/math_util.h"
+
+using steady_clock = std::chrono::steady_clock;
+
+namespace AudioCore {
+
+constexpr double MIN_RATIO = 0.1;
+constexpr double MAX_RATIO = 100.0;
+
+static double ClampRatio(double ratio) {
+ return MathUtil::Clamp(ratio, MIN_RATIO, MAX_RATIO);
+}
+
+constexpr double MIN_DELAY_TIME = 0.05; // Units: seconds
+constexpr double MAX_DELAY_TIME = 0.25; // Units: seconds
+constexpr size_t DROP_FRAMES_SAMPLE_DELAY = 16000; // Units: samples
+
+constexpr double SMOOTHING_FACTOR = 0.007;
+
+struct TimeStretcher::Impl {
+ soundtouch::SoundTouch soundtouch;
+
+ steady_clock::time_point frame_timer = steady_clock::now();
+ size_t samples_queued = 0;
+
+ double smoothed_ratio = 1.0;
+
+ double sample_rate = static_cast<double>(native_sample_rate);
+};
+
+std::vector<s16> TimeStretcher::Process(size_t samples_in_queue) {
+ // This is a very simple algorithm without any fancy control theory. It works and is stable.
+
+ double ratio = CalculateCurrentRatio();
+ ratio = CorrectForUnderAndOverflow(ratio, samples_in_queue);
+ impl->smoothed_ratio = (1.0 - SMOOTHING_FACTOR) * impl->smoothed_ratio + SMOOTHING_FACTOR * ratio;
+ impl->smoothed_ratio = ClampRatio(impl->smoothed_ratio);
+
+ // SoundTouch's tempo definition the inverse of our ratio definition.
+ impl->soundtouch.setTempo(1.0 / impl->smoothed_ratio);
+
+ std::vector<s16> samples = GetSamples();
+ if (samples_in_queue >= DROP_FRAMES_SAMPLE_DELAY) {
+ samples.clear();
+ LOG_DEBUG(Audio, "Dropping frames!");
+ }
+ return samples;
+}
+
+TimeStretcher::TimeStretcher() : impl(std::make_unique<Impl>()) {
+ impl->soundtouch.setPitch(1.0);
+ impl->soundtouch.setChannels(2);
+ impl->soundtouch.setSampleRate(native_sample_rate);
+ Reset();
+}
+
+TimeStretcher::~TimeStretcher() {
+ impl->soundtouch.clear();
+}
+
+void TimeStretcher::SetOutputSampleRate(unsigned int sample_rate) {
+ impl->sample_rate = static_cast<double>(sample_rate);
+ impl->soundtouch.setRate(static_cast<double>(native_sample_rate) / impl->sample_rate);
+}
+
+void TimeStretcher::AddSamples(const s16* buffer, size_t num_samples) {
+ impl->soundtouch.putSamples(buffer, static_cast<uint>(num_samples));
+ impl->samples_queued += num_samples;
+}
+
+void TimeStretcher::Flush() {
+ impl->soundtouch.flush();
+}
+
+void TimeStretcher::Reset() {
+ impl->soundtouch.setTempo(1.0);
+ impl->soundtouch.clear();
+ impl->smoothed_ratio = 1.0;
+ impl->frame_timer = steady_clock::now();
+ impl->samples_queued = 0;
+ SetOutputSampleRate(native_sample_rate);
+}
+
+double TimeStretcher::CalculateCurrentRatio() {
+ const steady_clock::time_point now = steady_clock::now();
+ const std::chrono::duration<double> duration = now - impl->frame_timer;
+
+ const double expected_time = static_cast<double>(impl->samples_queued) / static_cast<double>(native_sample_rate);
+ const double actual_time = duration.count();
+
+ double ratio;
+ if (expected_time != 0) {
+ ratio = ClampRatio(actual_time / expected_time);
+ } else {
+ ratio = impl->smoothed_ratio;
+ }
+
+ impl->frame_timer = now;
+ impl->samples_queued = 0;
+
+ return ratio;
+}
+
+double TimeStretcher::CorrectForUnderAndOverflow(double ratio, size_t sample_delay) const {
+ const size_t min_sample_delay = static_cast<size_t>(MIN_DELAY_TIME * impl->sample_rate);
+ const size_t max_sample_delay = static_cast<size_t>(MAX_DELAY_TIME * impl->sample_rate);
+
+ if (sample_delay < min_sample_delay) {
+ // Make the ratio bigger.
+ ratio = ratio > 1.0 ? ratio * ratio : sqrt(ratio);
+ } else if (sample_delay > max_sample_delay) {
+ // Make the ratio smaller.
+ ratio = ratio > 1.0 ? sqrt(ratio) : ratio * ratio;
+ }
+
+ return ClampRatio(ratio);
+}
+
+std::vector<s16> TimeStretcher::GetSamples() {
+ uint available = impl->soundtouch.numSamples();
+
+ std::vector<s16> output(static_cast<size_t>(available) * 2);
+
+ impl->soundtouch.receiveSamples(output.data(), available);
+
+ return output;
+}
+
+} // namespace AudioCore
diff --git a/src/audio_core/time_stretch.h b/src/audio_core/time_stretch.h
new file mode 100644
index 000000000..1fde3f72a
--- /dev/null
+++ b/src/audio_core/time_stretch.h
@@ -0,0 +1,57 @@
+// Copyright 2016 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstddef>
+#include <memory>
+#include <vector>
+
+#include "common/common_types.h"
+
+namespace AudioCore {
+
+class TimeStretcher final {
+public:
+ TimeStretcher();
+ ~TimeStretcher();
+
+ /**
+ * Set sample rate for the samples that Process returns.
+ * @param sample_rate The sample rate.
+ */
+ void SetOutputSampleRate(unsigned int sample_rate);
+
+ /**
+ * Add samples to be processed.
+ * @param sample_buffer Buffer of samples in interleaved stereo PCM16 format.
+ * @param num_sample Number of samples.
+ */
+ void AddSamples(const s16* sample_buffer, size_t num_samples);
+
+ /// Flush audio remaining in internal buffers.
+ void Flush();
+
+ /// Resets internal state and clears buffers.
+ void Reset();
+
+ /**
+ * Does audio stretching and produces the time-stretched samples.
+ * Timer calculations use sample_delay to determine how much of a margin we have.
+ * @param sample_delay How many samples are buffered downstream of this module and haven't been played yet.
+ * @return Samples to play in interleaved stereo PCM16 format.
+ */
+ std::vector<s16> Process(size_t sample_delay);
+
+private:
+ struct Impl;
+ std::unique_ptr<Impl> impl;
+
+ /// INTERNAL: ratio = wallclock time / emulated time
+ double CalculateCurrentRatio();
+ /// INTERNAL: If we have too many or too few samples downstream, nudge ratio in the appropriate direction.
+ double CorrectForUnderAndOverflow(double ratio, size_t sample_delay) const;
+ /// INTERNAL: Gets the time-stretched samples from SoundTouch.
+ std::vector<s16> GetSamples();
+};
+
+} // namespace AudioCore
diff --git a/src/core/hle/kernel/memory.cpp b/src/core/hle/kernel/memory.cpp
index 4be20db22..17ae87aef 100644
--- a/src/core/hle/kernel/memory.cpp
+++ b/src/core/hle/kernel/memory.cpp
@@ -55,6 +55,9 @@ void MemoryInit(u32 mem_type) {
memory_regions[i].size = memory_region_sizes[mem_type][i];
memory_regions[i].used = 0;
memory_regions[i].linear_heap_memory = std::make_shared<std::vector<u8>>();
+ // Reserve enough space for this region of FCRAM.
+ // We do not want this block of memory to be relocated when allocating from it.
+ memory_regions[i].linear_heap_memory->reserve(memory_regions[i].size);
base += memory_regions[i].size;
}
diff --git a/src/core/hle/service/dsp_dsp.cpp b/src/core/hle/service/dsp_dsp.cpp
index 274fc751a..10730d7ac 100644
--- a/src/core/hle/service/dsp_dsp.cpp
+++ b/src/core/hle/service/dsp_dsp.cpp
@@ -440,9 +440,9 @@ static void GetHeadphoneStatus(Service::Interface* self) {
cmd_buff[0] = IPC::MakeHeader(0x1F, 2, 0);
cmd_buff[1] = RESULT_SUCCESS.raw; // No error
- cmd_buff[2] = 0; // Not using headphones?
+ cmd_buff[2] = 0; // Not using headphones
- LOG_WARNING(Service_DSP, "(STUBBED) called");
+ LOG_DEBUG(Service_DSP, "called");
}
/**
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index ed2e2f3ae..bcd1ae78d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -104,7 +104,6 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
// Sync fixed function OpenGL state
SyncCullMode();
- SyncDepthModifiers();
SyncBlendEnabled();
SyncBlendFuncs();
SyncBlendColor();
@@ -259,8 +258,10 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
// Depth modifiers
case PICA_REG_INDEX(viewport_depth_range):
+ SyncDepthScale();
+ break;
case PICA_REG_INDEX(viewport_depth_near_plane):
- SyncDepthModifiers();
+ SyncDepthOffset();
break;
// Depth buffering
@@ -880,6 +881,8 @@ void RasterizerOpenGL::SetShader() {
glUniformBlockBinding(current_shader->shader.handle, block_index, 0);
// Update uniforms
+ SyncDepthScale();
+ SyncDepthOffset();
SyncAlphaTest();
SyncCombinerColor();
auto& tev_stages = Pica::g_state.regs.GetTevStages();
@@ -922,13 +925,20 @@ void RasterizerOpenGL::SyncCullMode() {
}
}
-void RasterizerOpenGL::SyncDepthModifiers() {
+void RasterizerOpenGL::SyncDepthScale() {
float depth_scale = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32();
- float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32();
+ if (depth_scale != uniform_block_data.data.depth_scale) {
+ uniform_block_data.data.depth_scale = depth_scale;
+ uniform_block_data.dirty = true;
+ }
+}
- uniform_block_data.data.depth_scale = depth_scale;
- uniform_block_data.data.depth_offset = depth_offset;
- uniform_block_data.dirty = true;
+void RasterizerOpenGL::SyncDepthOffset() {
+ float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32();
+ if (depth_offset != uniform_block_data.data.depth_offset) {
+ uniform_block_data.data.depth_offset = depth_offset;
+ uniform_block_data.dirty = true;
+ }
}
void RasterizerOpenGL::SyncBlendEnabled() {
@@ -937,6 +947,8 @@ void RasterizerOpenGL::SyncBlendEnabled() {
void RasterizerOpenGL::SyncBlendFuncs() {
const auto& regs = Pica::g_state.regs;
+ state.blend.rgb_equation = PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_rgb);
+ state.blend.a_equation = PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_a);
state.blend.src_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_rgb);
state.blend.dst_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_rgb);
state.blend.src_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_a);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index eed00011a..d70369400 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -339,8 +339,11 @@ private:
/// Syncs the cull mode to match the PICA register
void SyncCullMode();
- /// Syncs the depth scale and offset to match the PICA registers
- void SyncDepthModifiers();
+ /// Syncs the depth scale to match the PICA register
+ void SyncDepthScale();
+
+ /// Syncs the depth offset to match the PICA register
+ void SyncDepthOffset();
/// Syncs the blend enabled status to match the PICA register
void SyncBlendEnabled();
@@ -413,7 +416,7 @@ private:
UniformData data;
bool lut_dirty[6];
bool dirty;
- } uniform_block_data;
+ } uniform_block_data = {};
std::array<SamplerInfo, 3> texture_samplers;
OGLVertexArray vertex_array;
@@ -422,5 +425,5 @@ private:
OGLFramebuffer framebuffer;
std::array<OGLTexture, 6> lighting_luts;
- std::array<std::array<GLvec4, 256>, 6> lighting_lut_data;
+ std::array<std::array<GLvec4, 256>, 6> lighting_lut_data{};
};
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 02cd9f417..fa141fc9a 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -36,6 +36,8 @@ OpenGLState::OpenGLState() {
stencil.action_stencil_fail = GL_KEEP;
blend.enabled = false;
+ blend.rgb_equation = GL_FUNC_ADD;
+ blend.a_equation = GL_FUNC_ADD;
blend.src_rgb_func = GL_ONE;
blend.dst_rgb_func = GL_ZERO;
blend.src_a_func = GL_ONE;
@@ -165,6 +167,11 @@ void OpenGLState::Apply() const {
blend.src_a_func, blend.dst_a_func);
}
+ if (blend.rgb_equation != cur_state.blend.rgb_equation ||
+ blend.a_equation != cur_state.blend.a_equation) {
+ glBlendEquationSeparate(blend.rgb_equation, blend.a_equation);
+ }
+
if (logic_op != cur_state.logic_op) {
glLogicOp(logic_op);
}
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 24f20e47c..228727054 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -40,6 +40,8 @@ public:
struct {
bool enabled; // GL_BLEND
+ GLenum rgb_equation; // GL_BLEND_EQUATION_RGB
+ GLenum a_equation; // GL_BLEND_EQUATION_ALPHA
GLenum src_rgb_func; // GL_BLEND_SRC_RGB
GLenum dst_rgb_func; // GL_BLEND_DST_RGB
GLenum src_a_func; // GL_BLEND_SRC_ALPHA
diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h
index 976d1f364..6dc2758c5 100644
--- a/src/video_core/renderer_opengl/pica_to_gl.h
+++ b/src/video_core/renderer_opengl/pica_to_gl.h
@@ -78,6 +78,26 @@ inline GLenum WrapMode(Pica::Regs::TextureConfig::WrapMode mode) {
return gl_mode;
}
+inline GLenum BlendEquation(Pica::Regs::BlendEquation equation) {
+ static const GLenum blend_equation_table[] = {
+ GL_FUNC_ADD, // BlendEquation::Add
+ GL_FUNC_SUBTRACT, // BlendEquation::Subtract
+ GL_FUNC_REVERSE_SUBTRACT, // BlendEquation::ReverseSubtract
+ GL_MIN, // BlendEquation::Min
+ GL_MAX, // BlendEquation::Max
+ };
+
+ // Range check table for input
+ if (static_cast<size_t>(equation) >= ARRAY_SIZE(blend_equation_table)) {
+ LOG_CRITICAL(Render_OpenGL, "Unknown blend equation %d", equation);
+ UNREACHABLE();
+
+ return GL_FUNC_ADD;
+ }
+
+ return blend_equation_table[(unsigned)equation];
+}
+
inline GLenum BlendFunc(Pica::Regs::BlendFactor factor) {
static const GLenum blend_func_table[] = {
GL_ZERO, // BlendFactor::Zero
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index e93a9d92a..161097610 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -64,6 +64,7 @@ MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) {
auto& config = g_state.regs.vs;
+ auto& setup = g_state.vs;
MICROPROFILE_SCOPE(GPU_Shader);
@@ -81,11 +82,11 @@ OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input,
#ifdef ARCHITECTURE_x86_64
if (VideoCore::g_shader_jit_enabled)
- jit_shader->Run(&state.registers, g_state.regs.vs.main_offset);
+ jit_shader->Run(setup, state, config.main_offset);
else
- RunInterpreter(state);
+ RunInterpreter(setup, state, config.main_offset);
#else
- RunInterpreter(state);
+ RunInterpreter(setup, state, config.main_offset);
#endif // ARCHITECTURE_x86_64
// Setup output data
@@ -156,7 +157,7 @@ DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_
state.conditional_code[0] = false;
state.conditional_code[1] = false;
- RunInterpreter(state);
+ RunInterpreter(setup, state, config.main_offset);
return state.debug;
}
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 983e4a967..84898f21c 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -283,10 +283,10 @@ struct UnitState {
static size_t InputOffset(const SourceRegister& reg) {
switch (reg.GetRegisterType()) {
case RegisterType::Input:
- return offsetof(UnitState::Registers, input) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
+ return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
case RegisterType::Temporary:
- return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
+ return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
default:
UNREACHABLE();
@@ -297,10 +297,10 @@ struct UnitState {
static size_t OutputOffset(const DestRegister& reg) {
switch (reg.GetRegisterType()) {
case RegisterType::Output:
- return offsetof(UnitState::Registers, output) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
+ return offsetof(UnitState, registers.output) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
case RegisterType::Temporary:
- return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
+ return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
default:
UNREACHABLE();
@@ -323,6 +323,23 @@ struct ShaderSetup {
std::array<Math::Vec4<u8>, 4> i;
} uniforms;
+ static size_t UniformOffset(RegisterType type, unsigned index) {
+ switch (type) {
+ case RegisterType::FloatUniform:
+ return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4<float24>);
+
+ case RegisterType::BoolUniform:
+ return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool);
+
+ case RegisterType::IntUniform:
+ return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4<u8>);
+
+ default:
+ UNREACHABLE();
+ return 0;
+ }
+ }
+
std::array<u32, 1024> program_code;
std::array<u32, 1024> swizzle_data;
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index 3a827d11f..714e8bfd5 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -41,11 +41,11 @@ struct CallStackElement {
};
template<bool Debug>
-void RunInterpreter(UnitState<Debug>& state) {
+void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) {
// TODO: Is there a maximal size for this?
boost::container::static_vector<CallStackElement, 16> call_stack;
- u32 program_counter = g_state.regs.vs.main_offset;
+ u32 program_counter = offset;
const auto& uniforms = g_state.vs.uniforms;
const auto& swizzle_data = g_state.vs.swizzle_data;
@@ -647,8 +647,8 @@ void RunInterpreter(UnitState<Debug>& state) {
}
// Explicit instantiation
-template void RunInterpreter(UnitState<false>& state);
-template void RunInterpreter(UnitState<true>& state);
+template void RunInterpreter(const ShaderSetup& setup, UnitState<false>& state, unsigned offset);
+template void RunInterpreter(const ShaderSetup& setup, UnitState<true>& state, unsigned offset);
} // namespace
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h
index 6048cdf3a..bb3ce1c6e 100644
--- a/src/video_core/shader/shader_interpreter.h
+++ b/src/video_core/shader/shader_interpreter.h
@@ -11,7 +11,7 @@ namespace Shader {
template <bool Debug> struct UnitState;
template<bool Debug>
-void RunInterpreter(UnitState<Debug>& state);
+void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset);
} // namespace
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 99f6c51eb..43e7e6b4c 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -102,7 +102,7 @@ const JitFunction instr_table[64] = {
// purposes, as documented below:
/// Pointer to the uniform memory
-static const X64Reg UNIFORMS = R9;
+static const X64Reg SETUP = R9;
/// The two 32-bit VS address offset registers set by the MOVA instruction
static const X64Reg ADDROFFS_REG_0 = R10;
static const X64Reg ADDROFFS_REG_1 = R11;
@@ -117,7 +117,7 @@ static const X64Reg COND0 = R13;
/// Result of the previous CMP instruction for the Y-component comparison
static const X64Reg COND1 = R14;
/// Pointer to the UnitState instance for the current VS unit
-static const X64Reg REGISTERS = R15;
+static const X64Reg STATE = R15;
/// SIMD scratch register
static const X64Reg SCRATCH = XMM0;
/// Loaded with the first swizzled source register, otherwise can be used as a scratch register
@@ -136,7 +136,7 @@ static const X64Reg NEGBIT = XMM15;
// State registers that must not be modified by external functions calls
// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
static const BitSet32 persistent_regs = {
- UNIFORMS, REGISTERS, // Pointers to register blocks
+ SETUP, STATE, // Pointers to register blocks
ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers
ONE+16, NEGBIT+16, // Constants
};
@@ -177,10 +177,10 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
size_t src_offset;
if (src_reg.GetRegisterType() == RegisterType::FloatUniform) {
- src_ptr = UNIFORMS;
- src_offset = src_reg.GetIndex() * sizeof(float24) * 4;
+ src_ptr = SETUP;
+ src_offset = ShaderSetup::UniformOffset(RegisterType::FloatUniform, src_reg.GetIndex());
} else {
- src_ptr = REGISTERS;
+ src_ptr = STATE;
src_offset = UnitState<false>::InputOffset(src_reg);
}
@@ -264,11 +264,11 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
// If all components are enabled, write the result to the destination register
if (swiz.dest_mask == NO_DEST_REG_MASK) {
// Store dest back to memory
- MOVAPS(MDisp(REGISTERS, dest_offset_disp), src);
+ MOVAPS(MDisp(STATE, dest_offset_disp), src);
} else {
// Not all components are enabled, so mask the result when storing to the destination register...
- MOVAPS(SCRATCH, MDisp(REGISTERS, dest_offset_disp));
+ MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp));
if (Common::GetCPUCaps().sse4_1) {
u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
@@ -287,7 +287,7 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
}
// Store dest back to memory
- MOVAPS(MDisp(REGISTERS, dest_offset_disp), SCRATCH);
+ MOVAPS(MDisp(STATE, dest_offset_disp), SCRATCH);
}
}
@@ -336,8 +336,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) {
}
void JitShader::Compile_UniformCondition(Instruction instr) {
- int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool));
- CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0));
+ int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id);
+ CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0));
}
BitSet32 JitShader::PersistentCallerSavedRegs() {
@@ -714,8 +714,8 @@ void JitShader::Compile_LOOP(Instruction instr) {
looping = true;
- int offset = offsetof(decltype(g_state.vs.uniforms), i) + (instr.flow_control.int_uniform_id * sizeof(Math::Vec4<u8>));
- MOV(32, R(LOOPCOUNT), MDisp(UNIFORMS, offset));
+ int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id);
+ MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset));
MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT));
SHR(32, R(LOOPCOUNT_REG), Imm8(8));
AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start
@@ -826,8 +826,8 @@ void JitShader::Compile() {
// The stack pointer is 8 modulo 16 at the entry of a procedure
ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
- MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1));
- MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms));
+ MOV(PTRBITS, R(SETUP), R(ABI_PARAM1));
+ MOV(PTRBITS, R(STATE), R(ABI_PARAM2));
// Zero address/loop registers
XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0));
@@ -845,7 +845,7 @@ void JitShader::Compile() {
MOVAPS(NEGBIT, MatR(RAX));
// Jump to start of the shader program
- JMPptr(R(ABI_PARAM2));
+ JMPptr(R(ABI_PARAM3));
// Compile entire program
Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 30aa7ff30..5468459d4 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -36,8 +36,8 @@ class JitShader : public Gen::XCodeBlock {
public:
JitShader();
- void Run(void* registers, unsigned offset) const {
- program(registers, code_ptr[offset]);
+ void Run(const ShaderSetup& setup, UnitState<false>& state, unsigned offset) const {
+ program(&setup, &state, code_ptr[offset]);
}
void Compile();
@@ -117,7 +117,7 @@ private:
/// Branches that need to be fixed up once the entire shader program is compiled
std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;
- using CompiledShader = void(void* registers, const u8* start_addr);
+ using CompiledShader = void(const void* setup, void* state, const u8* start_addr);
CompiledShader* program = nullptr;
};