summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/audio_core/hle/source.cpp49
-rw-r--r--src/audio_core/interpolate.cpp86
-rw-r--r--src/audio_core/interpolate.h27
-rw-r--r--src/citra/citra.rc8
-rw-r--r--src/citra/config.cpp2
-rw-r--r--src/citra/default_ini.h4
-rw-r--r--src/citra_qt/citra-qt.rc12
-rw-r--r--src/citra_qt/configuration/config.cpp3
-rw-r--r--src/core/CMakeLists.txt2
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.cpp4
-rw-r--r--src/core/core.cpp1
-rw-r--r--src/core/frontend/emu_window.cpp71
-rw-r--r--src/core/frontend/emu_window.h31
-rw-r--r--src/core/frontend/input.h6
-rw-r--r--src/core/hle/applets/mii_selector.cpp6
-rw-r--r--src/core/hle/applets/mii_selector.h57
-rw-r--r--src/core/hle/kernel/memory.cpp30
-rw-r--r--src/core/hle/kernel/memory.h2
-rw-r--r--src/core/hle/kernel/thread.cpp12
-rw-r--r--src/core/hle/kernel/vm_manager.cpp13
-rw-r--r--src/core/hle/kernel/vm_manager.h6
-rw-r--r--src/core/hle/lock.cpp2
-rw-r--r--src/core/hle/lock.h2
-rw-r--r--src/core/hle/service/apt/apt.cpp286
-rw-r--r--src/core/hle/service/cfg/cfg.cpp2
-rw-r--r--src/core/hle/service/cfg/cfg.h2
-rw-r--r--src/core/hle/service/hid/hid.cpp12
-rw-r--r--src/core/hle/service/nwm/nwm_uds.cpp165
-rw-r--r--src/core/hle/service/nwm/nwm_uds.h12
-rw-r--r--src/core/hle/service/nwm/uds_beacon.cpp3
-rw-r--r--src/core/hle/service/nwm/uds_beacon.h30
-rw-r--r--src/core/hle/service/nwm/uds_connection.cpp79
-rw-r--r--src/core/hle/service/nwm/uds_connection.h51
-rw-r--r--src/core/hle/svc.cpp2
-rw-r--r--src/core/loader/3dsx.cpp1
-rw-r--r--src/core/loader/elf.cpp1
-rw-r--r--src/core/loader/ncch.cpp1
-rw-r--r--src/core/memory.cpp157
-rw-r--r--src/core/memory.h62
-rw-r--r--src/core/memory_setup.h10
-rw-r--r--src/core/settings.h1
-rw-r--r--src/tests/core/arm/arm_test_common.cpp18
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/command_processor.cpp54
-rw-r--r--src/video_core/geometry_pipeline.cpp274
-rw-r--r--src/video_core/geometry_pipeline.h49
-rw-r--r--src/video_core/pica.cpp21
-rw-r--r--src/video_core/pica_state.h11
-rw-r--r--src/video_core/primitive_assembly.cpp15
-rw-r--r--src/video_core/primitive_assembly.h7
-rw-r--r--src/video_core/regs_pipeline.h34
-rw-r--r--src/video_core/regs_rasterizer.h14
-rw-r--r--src/video_core/regs_shader.h7
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp28
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h9
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp99
-rw-r--r--src/video_core/shader/shader.cpp41
-rw-r--r--src/video_core/shader/shader.h49
-rw-r--r--src/video_core/shader/shader_interpreter.cpp16
-rw-r--r--src/video_core/shader/shader_jit_x64_compiler.cpp49
-rw-r--r--src/video_core/shader/shader_jit_x64_compiler.h2
-rw-r--r--src/video_core/swrasterizer/clipper.cpp31
-rw-r--r--src/video_core/swrasterizer/lighting.cpp46
-rw-r--r--src/video_core/swrasterizer/lighting.h3
-rw-r--r--src/video_core/swrasterizer/rasterizer.cpp4
65 files changed, 1614 insertions, 582 deletions
diff --git a/src/audio_core/hle/source.cpp b/src/audio_core/hle/source.cpp
index 92484c526..de4e88cae 100644
--- a/src/audio_core/hle/source.cpp
+++ b/src/audio_core/hle/source.cpp
@@ -244,17 +244,27 @@ void Source::GenerateFrame() {
break;
}
- const size_t size_to_copy =
- std::min(state.current_buffer.size(), current_frame.size() - frame_position);
-
- std::copy(state.current_buffer.begin(), state.current_buffer.begin() + size_to_copy,
- current_frame.begin() + frame_position);
- state.current_buffer.erase(state.current_buffer.begin(),
- state.current_buffer.begin() + size_to_copy);
-
- frame_position += size_to_copy;
- state.next_sample_number += static_cast<u32>(size_to_copy);
+ switch (state.interpolation_mode) {
+ case InterpolationMode::None:
+ AudioInterp::None(state.interp_state, state.current_buffer, state.rate_multiplier,
+ current_frame, frame_position);
+ break;
+ case InterpolationMode::Linear:
+ AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier,
+ current_frame, frame_position);
+ break;
+ case InterpolationMode::Polyphase:
+ // TODO(merry): Implement polyphase interpolation
+ LOG_DEBUG(Audio_DSP, "Polyphase interpolation unimplemented; falling back to linear");
+ AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier,
+ current_frame, frame_position);
+ break;
+ default:
+ UNIMPLEMENTED();
+ break;
+ }
}
+ state.next_sample_number += frame_position;
state.filters.ProcessFrame(current_frame);
}
@@ -305,25 +315,6 @@ bool Source::DequeueBuffer() {
return true;
}
- switch (state.interpolation_mode) {
- case InterpolationMode::None:
- state.current_buffer =
- AudioInterp::None(state.interp_state, state.current_buffer, state.rate_multiplier);
- break;
- case InterpolationMode::Linear:
- state.current_buffer =
- AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier);
- break;
- case InterpolationMode::Polyphase:
- // TODO(merry): Implement polyphase interpolation
- state.current_buffer =
- AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier);
- break;
- default:
- UNIMPLEMENTED();
- break;
- }
-
// the first playthrough starts at play_position, loops start at the beginning of the buffer
state.current_sample_number = (!buf.has_played) ? buf.play_position : 0;
state.next_sample_number = state.current_sample_number;
diff --git a/src/audio_core/interpolate.cpp b/src/audio_core/interpolate.cpp
index 8a5d4181a..16e68bc5c 100644
--- a/src/audio_core/interpolate.cpp
+++ b/src/audio_core/interpolate.cpp
@@ -13,74 +13,64 @@ namespace AudioInterp {
constexpr u64 scale_factor = 1 << 24;
constexpr u64 scale_mask = scale_factor - 1;
-/// Here we step over the input in steps of rate_multiplier, until we consume all of the input.
+/// Here we step over the input in steps of rate, until we consume all of the input.
/// Three adjacent samples are passed to fn each step.
template <typename Function>
-static StereoBuffer16 StepOverSamples(State& state, const StereoBuffer16& input,
- float rate_multiplier, Function fn) {
- ASSERT(rate_multiplier > 0);
+static void StepOverSamples(State& state, StereoBuffer16& input, float rate,
+ DSP::HLE::StereoFrame16& output, size_t& outputi, Function fn) {
+ ASSERT(rate > 0);
- if (input.size() < 2)
- return {};
+ if (input.empty())
+ return;
- StereoBuffer16 output;
- output.reserve(static_cast<size_t>(input.size() / rate_multiplier));
+ input.insert(input.begin(), {state.xn2, state.xn1});
- u64 step_size = static_cast<u64>(rate_multiplier * scale_factor);
+ const u64 step_size = static_cast<u64>(rate * scale_factor);
+ u64 fposition = state.fposition;
+ size_t inputi = 0;
- u64 fposition = 0;
- const u64 max_fposition = input.size() * scale_factor;
+ while (outputi < output.size()) {
+ inputi = static_cast<size_t>(fposition / scale_factor);
- while (fposition < 1 * scale_factor) {
- u64 fraction = fposition & scale_mask;
-
- output.push_back(fn(fraction, state.xn2, state.xn1, input[0]));
-
- fposition += step_size;
- }
-
- while (fposition < 2 * scale_factor) {
- u64 fraction = fposition & scale_mask;
-
- output.push_back(fn(fraction, state.xn1, input[0], input[1]));
-
- fposition += step_size;
- }
+ if (inputi + 2 >= input.size()) {
+ inputi = input.size() - 2;
+ break;
+ }
- while (fposition < max_fposition) {
u64 fraction = fposition & scale_mask;
-
- size_t index = static_cast<size_t>(fposition / scale_factor);
- output.push_back(fn(fraction, input[index - 2], input[index - 1], input[index]));
+ output[outputi++] = fn(fraction, input[inputi], input[inputi + 1], input[inputi + 2]);
fposition += step_size;
}
- state.xn2 = input[input.size() - 2];
- state.xn1 = input[input.size() - 1];
+ state.xn2 = input[inputi];
+ state.xn1 = input[inputi + 1];
+ state.fposition = fposition - inputi * scale_factor;
- return output;
+ input.erase(input.begin(), input.begin() + inputi + 2);
}
-StereoBuffer16 None(State& state, const StereoBuffer16& input, float rate_multiplier) {
- return StepOverSamples(
- state, input, rate_multiplier,
+void None(State& state, StereoBuffer16& input, float rate, DSP::HLE::StereoFrame16& output,
+ size_t& outputi) {
+ StepOverSamples(
+ state, input, rate, output, outputi,
[](u64 fraction, const auto& x0, const auto& x1, const auto& x2) { return x0; });
}
-StereoBuffer16 Linear(State& state, const StereoBuffer16& input, float rate_multiplier) {
+void Linear(State& state, StereoBuffer16& input, float rate, DSP::HLE::StereoFrame16& output,
+ size_t& outputi) {
// Note on accuracy: Some values that this produces are +/- 1 from the actual firmware.
- return StepOverSamples(state, input, rate_multiplier,
- [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) {
- // This is a saturated subtraction. (Verified by black-box fuzzing.)
- s64 delta0 = MathUtil::Clamp<s64>(x1[0] - x0[0], -32768, 32767);
- s64 delta1 = MathUtil::Clamp<s64>(x1[1] - x0[1], -32768, 32767);
-
- return std::array<s16, 2>{
- static_cast<s16>(x0[0] + fraction * delta0 / scale_factor),
- static_cast<s16>(x0[1] + fraction * delta1 / scale_factor),
- };
- });
+ StepOverSamples(state, input, rate, output, outputi,
+ [](u64 fraction, const auto& x0, const auto& x1, const auto& x2) {
+ // This is a saturated subtraction. (Verified by black-box fuzzing.)
+ s64 delta0 = MathUtil::Clamp<s64>(x1[0] - x0[0], -32768, 32767);
+ s64 delta1 = MathUtil::Clamp<s64>(x1[1] - x0[1], -32768, 32767);
+
+ return std::array<s16, 2>{
+ static_cast<s16>(x0[0] + fraction * delta0 / scale_factor),
+ static_cast<s16>(x0[1] + fraction * delta1 / scale_factor),
+ };
+ });
}
} // namespace AudioInterp
diff --git a/src/audio_core/interpolate.h b/src/audio_core/interpolate.h
index 19a7b66cb..59f59bc14 100644
--- a/src/audio_core/interpolate.h
+++ b/src/audio_core/interpolate.h
@@ -6,6 +6,7 @@
#include <array>
#include <vector>
+#include "audio_core/hle/common.h"
#include "common/common_types.h"
namespace AudioInterp {
@@ -14,31 +15,35 @@ namespace AudioInterp {
using StereoBuffer16 = std::vector<std::array<s16, 2>>;
struct State {
- // Two historical samples.
+ /// Two historical samples.
std::array<s16, 2> xn1 = {}; ///< x[n-1]
std::array<s16, 2> xn2 = {}; ///< x[n-2]
+ /// Current fractional position.
+ u64 fposition = 0;
};
/**
* No interpolation. This is equivalent to a zero-order hold. There is a two-sample predelay.
* @param state Interpolation state.
* @param input Input buffer.
- * @param rate_multiplier Stretch factor. Must be a positive non-zero value.
- * rate_multiplier > 1.0 performs decimation and rate_multipler < 1.0
- * performs upsampling.
- * @return The resampled audio buffer.
+ * @param rate Stretch factor. Must be a positive non-zero value.
+ * rate > 1.0 performs decimation and rate < 1.0 performs upsampling.
+ * @param output The resampled audio buffer.
+ * @param outputi The index of output to start writing to.
*/
-StereoBuffer16 None(State& state, const StereoBuffer16& input, float rate_multiplier);
+void None(State& state, StereoBuffer16& input, float rate, DSP::HLE::StereoFrame16& output,
+ size_t& outputi);
/**
* Linear interpolation. This is equivalent to a first-order hold. There is a two-sample predelay.
* @param state Interpolation state.
* @param input Input buffer.
- * @param rate_multiplier Stretch factor. Must be a positive non-zero value.
- * rate_multiplier > 1.0 performs decimation and rate_multipler < 1.0
- * performs upsampling.
- * @return The resampled audio buffer.
+ * @param rate Stretch factor. Must be a positive non-zero value.
+ * rate > 1.0 performs decimation and rate < 1.0 performs upsampling.
+ * @param output The resampled audio buffer.
+ * @param outputi The index of output to start writing to.
*/
-StereoBuffer16 Linear(State& state, const StereoBuffer16& input, float rate_multiplier);
+void Linear(State& state, StereoBuffer16& input, float rate, DSP::HLE::StereoFrame16& output,
+ size_t& outputi);
} // namespace AudioInterp
diff --git a/src/citra/citra.rc b/src/citra/citra.rc
index fea603004..c490ef302 100644
--- a/src/citra/citra.rc
+++ b/src/citra/citra.rc
@@ -1,3 +1,4 @@
+#include "winresrc.h"
/////////////////////////////////////////////////////////////////////////////
//
// Icon
@@ -7,3 +8,10 @@
// remains consistent on all systems.
CITRA_ICON ICON "../../dist/citra.ico"
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// RT_MANIFEST
+//
+
+1 RT_MANIFEST "../../dist/citra.manifest"
diff --git a/src/citra/config.cpp b/src/citra/config.cpp
index 3869b6b5d..a48ef08c7 100644
--- a/src/citra/config.cpp
+++ b/src/citra/config.cpp
@@ -78,6 +78,8 @@ void Config::ReadValues() {
Settings::values.motion_device = sdl2_config->Get(
"Controls", "motion_device", "engine:motion_emu,update_period:100,sensitivity:0.01");
+ Settings::values.touch_device =
+ sdl2_config->Get("Controls", "touch_device", "engine:emu_window");
// Core
Settings::values.use_cpu_jit = sdl2_config->GetBoolean("Core", "use_cpu_jit", true);
diff --git a/src/citra/default_ini.h b/src/citra/default_ini.h
index ea02a788d..4b13a2e1b 100644
--- a/src/citra/default_ini.h
+++ b/src/citra/default_ini.h
@@ -62,6 +62,10 @@ c_stick=
# - "sensitivity": the coefficient converting mouse movement to tilting angle (default to 0.01)
motion_device=
+# for touch input, the following devices are available:
+# - "emu_window" (default) for emulating touch input from mouse input to the emulation window. No parameters required
+touch_device=
+
[Core]
# Whether to use the Just-In-Time (JIT) compiler for CPU emulation
# 0: Interpreter (slow), 1 (default): JIT (fast)
diff --git a/src/citra_qt/citra-qt.rc b/src/citra_qt/citra-qt.rc
index fea603004..a48a9440d 100644
--- a/src/citra_qt/citra-qt.rc
+++ b/src/citra_qt/citra-qt.rc
@@ -1,3 +1,4 @@
+#include "winresrc.h"
/////////////////////////////////////////////////////////////////////////////
//
// Icon
@@ -5,5 +6,14 @@
// Icon with lowest ID value placed first to ensure application icon
// remains consistent on all systems.
-CITRA_ICON ICON "../../dist/citra.ico"
+// QT requires that the default application icon is named IDI_ICON1
+IDI_ICON1 ICON "../../dist/citra.ico"
+
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// RT_MANIFEST
+//
+
+1 RT_MANIFEST "../../dist/citra.manifest"
diff --git a/src/citra_qt/configuration/config.cpp b/src/citra_qt/configuration/config.cpp
index e2dceaa4c..ef114aad3 100644
--- a/src/citra_qt/configuration/config.cpp
+++ b/src/citra_qt/configuration/config.cpp
@@ -61,6 +61,8 @@ void Config::ReadValues() {
qt_config->value("motion_device", "engine:motion_emu,update_period:100,sensitivity:0.01")
.toString()
.toStdString();
+ Settings::values.touch_device =
+ qt_config->value("touch_device", "engine:emu_window").toString().toStdString();
qt_config->endGroup();
@@ -213,6 +215,7 @@ void Config::SaveValues() {
QString::fromStdString(Settings::values.analogs[i]));
}
qt_config->setValue("motion_device", QString::fromStdString(Settings::values.motion_device));
+ qt_config->setValue("touch_device", QString::fromStdString(Settings::values.touch_device));
qt_config->endGroup();
qt_config->beginGroup("Core");
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 89578024f..cd1a8de2d 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -146,6 +146,7 @@ set(SRCS
hle/service/nwm/nwm_tst.cpp
hle/service/nwm/nwm_uds.cpp
hle/service/nwm/uds_beacon.cpp
+ hle/service/nwm/uds_connection.cpp
hle/service/nwm/uds_data.cpp
hle/service/pm_app.cpp
hle/service/ptm/ptm.cpp
@@ -346,6 +347,7 @@ set(HEADERS
hle/service/nwm/nwm_tst.h
hle/service/nwm/nwm_uds.h
hle/service/nwm/uds_beacon.h
+ hle/service/nwm/uds_connection.h
hle/service/nwm/uds_data.h
hle/service/pm_app.h
hle/service/ptm/ptm.h
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index 0a0b91590..34c5aa381 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -56,7 +56,9 @@ static Dynarmic::UserCallbacks GetUserCallbacks(
user_callbacks.memory.Write16 = &Memory::Write16;
user_callbacks.memory.Write32 = &Memory::Write32;
user_callbacks.memory.Write64 = &Memory::Write64;
- user_callbacks.page_table = Memory::GetCurrentPageTablePointers();
+ // TODO(Subv): Re-add the page table pointers once dynarmic supports switching page tables at
+ // runtime.
+ user_callbacks.page_table = nullptr;
user_callbacks.coprocessors[15] = std::make_shared<DynarmicCP15>(interpeter_state);
return user_callbacks;
}
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 5332318cf..59b8768e7 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -137,7 +137,6 @@ void System::Reschedule() {
}
System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) {
- Memory::InitMemoryMap();
LOG_DEBUG(HW_Memory, "initialized OK");
if (Settings::values.use_cpu_jit) {
diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp
index 54fa5c7fa..e67394177 100644
--- a/src/core/frontend/emu_window.cpp
+++ b/src/core/frontend/emu_window.cpp
@@ -2,14 +2,55 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <algorithm>
#include <cmath>
-#include "common/assert.h"
-#include "core/3ds.h"
-#include "core/core.h"
+#include <mutex>
#include "core/frontend/emu_window.h"
+#include "core/frontend/input.h"
#include "core/settings.h"
+class EmuWindow::TouchState : public Input::Factory<Input::TouchDevice>,
+ public std::enable_shared_from_this<TouchState> {
+public:
+ std::unique_ptr<Input::TouchDevice> Create(const Common::ParamPackage&) override {
+ return std::make_unique<Device>(shared_from_this());
+ }
+
+ std::mutex mutex;
+
+ bool touch_pressed = false; ///< True if touchpad area is currently pressed, otherwise false
+
+ float touch_x = 0.0f; ///< Touchpad X-position
+ float touch_y = 0.0f; ///< Touchpad Y-position
+
+private:
+ class Device : public Input::TouchDevice {
+ public:
+ explicit Device(std::weak_ptr<TouchState>&& touch_state) : touch_state(touch_state) {}
+ std::tuple<float, float, bool> GetStatus() const override {
+ if (auto state = touch_state.lock()) {
+ std::lock_guard<std::mutex> guard(state->mutex);
+ return std::make_tuple(state->touch_x, state->touch_y, state->touch_pressed);
+ }
+ return std::make_tuple(0.0f, 0.0f, false);
+ }
+
+ private:
+ std::weak_ptr<TouchState> touch_state;
+ };
+};
+
+EmuWindow::EmuWindow() {
+ // TODO: Find a better place to set this.
+ config.min_client_area_size = std::make_pair(400u, 480u);
+ active_config = config;
+ touch_state = std::make_shared<TouchState>();
+ Input::RegisterFactory<Input::TouchDevice>("emu_window", touch_state);
+}
+
+EmuWindow::~EmuWindow() {
+ Input::UnregisterFactory<Input::TouchDevice>("emu_window");
+}
+
/**
* Check if the given x/y coordinates are within the touchpad specified by the framebuffer layout
* @param layout FramebufferLayout object describing the framebuffer size and screen positions
@@ -38,22 +79,26 @@ void EmuWindow::TouchPressed(unsigned framebuffer_x, unsigned framebuffer_y) {
if (!IsWithinTouchscreen(framebuffer_layout, framebuffer_x, framebuffer_y))
return;
- touch_x = Core::kScreenBottomWidth * (framebuffer_x - framebuffer_layout.bottom_screen.left) /
- (framebuffer_layout.bottom_screen.right - framebuffer_layout.bottom_screen.left);
- touch_y = Core::kScreenBottomHeight * (framebuffer_y - framebuffer_layout.bottom_screen.top) /
- (framebuffer_layout.bottom_screen.bottom - framebuffer_layout.bottom_screen.top);
+ std::lock_guard<std::mutex> guard(touch_state->mutex);
+ touch_state->touch_x =
+ static_cast<float>(framebuffer_x - framebuffer_layout.bottom_screen.left) /
+ (framebuffer_layout.bottom_screen.right - framebuffer_layout.bottom_screen.left);
+ touch_state->touch_y =
+ static_cast<float>(framebuffer_y - framebuffer_layout.bottom_screen.top) /
+ (framebuffer_layout.bottom_screen.bottom - framebuffer_layout.bottom_screen.top);
- touch_pressed = true;
+ touch_state->touch_pressed = true;
}
void EmuWindow::TouchReleased() {
- touch_pressed = false;
- touch_x = 0;
- touch_y = 0;
+ std::lock_guard<std::mutex> guard(touch_state->mutex);
+ touch_state->touch_pressed = false;
+ touch_state->touch_x = 0;
+ touch_state->touch_y = 0;
}
void EmuWindow::TouchMoved(unsigned framebuffer_x, unsigned framebuffer_y) {
- if (!touch_pressed)
+ if (!touch_state->touch_pressed)
return;
if (!IsWithinTouchscreen(framebuffer_layout, framebuffer_x, framebuffer_y))
diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h
index 7bdee251c..c10dee51b 100644
--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@@ -4,11 +4,10 @@
#pragma once
-#include <mutex>
+#include <memory>
#include <tuple>
#include <utility>
#include "common/common_types.h"
-#include "common/math_util.h"
#include "core/frontend/framebuffer_layout.h"
/**
@@ -69,17 +68,6 @@ public:
void TouchMoved(unsigned framebuffer_x, unsigned framebuffer_y);
/**
- * Gets the current touch screen state (touch X/Y coordinates and whether or not it is pressed).
- * @note This should be called by the core emu thread to get a state set by the window thread.
- * @todo Fix this function to be thread-safe.
- * @return std::tuple of (x, y, pressed) where `x` and `y` are the touch coordinates and
- * `pressed` is true if the touch screen is currently being pressed
- */
- std::tuple<u16, u16, bool> GetTouchState() const {
- return std::make_tuple(touch_x, touch_y, touch_pressed);
- }
-
- /**
* Returns currently active configuration.
* @note Accesses to the returned object need not be consistent because it may be modified in
* another thread
@@ -113,15 +101,8 @@ public:
void UpdateCurrentFramebufferLayout(unsigned width, unsigned height);
protected:
- EmuWindow() {
- // TODO: Find a better place to set this.
- config.min_client_area_size = std::make_pair(400u, 480u);
- active_config = config;
- touch_x = 0;
- touch_y = 0;
- touch_pressed = false;
- }
- virtual ~EmuWindow() {}
+ EmuWindow();
+ virtual ~EmuWindow();
/**
* Processes any pending configuration changes from the last SetConfig call.
@@ -177,10 +158,8 @@ private:
/// ProcessConfigurationChanges)
WindowConfig active_config; ///< Internal active configuration
- bool touch_pressed; ///< True if touchpad area is currently pressed, otherwise false
-
- u16 touch_x; ///< Touchpad X-position in native 3DS pixel coordinates (0-320)
- u16 touch_y; ///< Touchpad Y-position in native 3DS pixel coordinates (0-240)
+ class TouchState;
+ std::shared_ptr<TouchState> touch_state;
/**
* Clip the provided coordinates to be inside the touchscreen area.
diff --git a/src/core/frontend/input.h b/src/core/frontend/input.h
index 5916a901d..8c256beb5 100644
--- a/src/core/frontend/input.h
+++ b/src/core/frontend/input.h
@@ -126,4 +126,10 @@ using AnalogDevice = InputDevice<std::tuple<float, float>>;
*/
using MotionDevice = InputDevice<std::tuple<Math::Vec3<float>, Math::Vec3<float>>>;
+/**
+ * A touch device is an input device that returns a tuple of two floats and a bool. The floats are
+ * x and y coordinates in the range 0.0 - 1.0, and the bool indicates whether it is pressed.
+ */
+using TouchDevice = InputDevice<std::tuple<float, float, bool>>;
+
} // namespace Input
diff --git a/src/core/hle/applets/mii_selector.cpp b/src/core/hle/applets/mii_selector.cpp
index 705859f1e..f225c23a5 100644
--- a/src/core/hle/applets/mii_selector.cpp
+++ b/src/core/hle/applets/mii_selector.cpp
@@ -66,7 +66,7 @@ ResultCode MiiSelector::StartImpl(const Service::APT::AppletStartupParameter& pa
// continue.
MiiResult result;
memset(&result, 0, sizeof(result));
- result.result_code = 0;
+ result.return_code = 0;
// Let the application know that we're closing
Service::APT::MessageParameter message;
@@ -82,5 +82,5 @@ ResultCode MiiSelector::StartImpl(const Service::APT::AppletStartupParameter& pa
}
void MiiSelector::Update() {}
-}
-} // namespace
+} // namespace Applets
+} // namespace HLE
diff --git a/src/core/hle/applets/mii_selector.h b/src/core/hle/applets/mii_selector.h
index ec00e29d2..136ce8948 100644
--- a/src/core/hle/applets/mii_selector.h
+++ b/src/core/hle/applets/mii_selector.h
@@ -16,51 +16,46 @@ namespace HLE {
namespace Applets {
struct MiiConfig {
- u8 unk_000;
- u8 unk_001;
- u8 unk_002;
- u8 unk_003;
- u8 unk_004;
+ u8 enable_cancel_button;
+ u8 enable_guest_mii;
+ u8 show_on_top_screen;
+ INSERT_PADDING_BYTES(5);
+ u16 title[0x40];
+ INSERT_PADDING_BYTES(4);
+ u8 show_guest_miis;
INSERT_PADDING_BYTES(3);
- u16 unk_008;
- INSERT_PADDING_BYTES(0x82);
- u8 unk_08C;
- INSERT_PADDING_BYTES(3);
- u16 unk_090;
+ u32 initially_selected_mii_index;
+ u8 guest_mii_whitelist[6];
+ u8 user_mii_whitelist[0x64];
INSERT_PADDING_BYTES(2);
- u32 unk_094;
- u16 unk_098;
- u8 unk_09A[0x64];
- u8 unk_0FE;
- u8 unk_0FF;
- u32 unk_100;
+ u32 magic_value;
};
-
static_assert(sizeof(MiiConfig) == 0x104, "MiiConfig structure has incorrect size");
#define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(MiiConfig, field_name) == position, \
"Field " #field_name " has invalid position")
-ASSERT_REG_POSITION(unk_008, 0x08);
-ASSERT_REG_POSITION(unk_08C, 0x8C);
-ASSERT_REG_POSITION(unk_090, 0x90);
-ASSERT_REG_POSITION(unk_094, 0x94);
-ASSERT_REG_POSITION(unk_0FE, 0xFE);
+ASSERT_REG_POSITION(title, 0x08);
+ASSERT_REG_POSITION(show_guest_miis, 0x8C);
+ASSERT_REG_POSITION(initially_selected_mii_index, 0x90);
+ASSERT_REG_POSITION(guest_mii_whitelist, 0x94);
#undef ASSERT_REG_POSITION
struct MiiResult {
- u32 result_code;
- u8 unk_04;
- INSERT_PADDING_BYTES(7);
- u8 unk_0C[0x60];
- u8 unk_6C[0x16];
+ u32 return_code;
+ u32 is_guest_mii_selected;
+ u32 selected_guest_mii_index;
+ // TODO(mailwl): expand to Mii Format structure: https://www.3dbrew.org/wiki/Mii
+ u8 selected_mii_data[0x5C];
INSERT_PADDING_BYTES(2);
+ u16 mii_data_checksum;
+ u16 guest_mii_name[0xC];
};
static_assert(sizeof(MiiResult) == 0x84, "MiiResult structure has incorrect size");
#define ASSERT_REG_POSITION(field_name, position) \
static_assert(offsetof(MiiResult, field_name) == position, \
"Field " #field_name " has invalid position")
-ASSERT_REG_POSITION(unk_0C, 0x0C);
-ASSERT_REG_POSITION(unk_6C, 0x6C);
+ASSERT_REG_POSITION(selected_mii_data, 0x0C);
+ASSERT_REG_POSITION(guest_mii_name, 0x6C);
#undef ASSERT_REG_POSITION
class MiiSelector final : public Applet {
@@ -79,5 +74,5 @@ private:
MiiConfig config;
};
-}
-} // namespace
+} // namespace Applets
+} // namespace HLE
diff --git a/src/core/hle/kernel/memory.cpp b/src/core/hle/kernel/memory.cpp
index 496d07cb5..7f27e9655 100644
--- a/src/core/hle/kernel/memory.cpp
+++ b/src/core/hle/kernel/memory.cpp
@@ -8,7 +8,6 @@
#include <memory>
#include <utility>
#include <vector>
-#include "audio_core/audio_core.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
@@ -24,7 +23,7 @@
namespace Kernel {
-static MemoryRegionInfo memory_regions[3];
+MemoryRegionInfo memory_regions[3];
/// Size of the APPLICATION, SYSTEM and BASE memory regions (respectively) for each system
/// memory configuration type.
@@ -96,9 +95,6 @@ MemoryRegionInfo* GetMemoryRegion(MemoryRegion region) {
}
}
-std::array<u8, Memory::VRAM_SIZE> vram;
-std::array<u8, Memory::N3DS_EXTRA_RAM_SIZE> n3ds_extra_ram;
-
void HandleSpecialMapping(VMManager& address_space, const AddressMapping& mapping) {
using namespace Memory;
@@ -143,30 +139,14 @@ void HandleSpecialMapping(VMManager& address_space, const AddressMapping& mappin
return;
}
- // TODO(yuriks): Use GetPhysicalPointer when that becomes independent of the virtual
- // mappings.
- u8* target_pointer = nullptr;
- switch (area->paddr_base) {
- case VRAM_PADDR:
- target_pointer = vram.data();
- break;
- case DSP_RAM_PADDR:
- target_pointer = AudioCore::GetDspMemory().data();
- break;
- case N3DS_EXTRA_RAM_PADDR:
- target_pointer = n3ds_extra_ram.data();
- break;
- default:
- UNREACHABLE();
- }
+ u8* target_pointer = Memory::GetPhysicalPointer(area->paddr_base + offset_into_region);
// TODO(yuriks): This flag seems to have some other effect, but it's unknown what
MemoryState memory_state = mapping.unk_flag ? MemoryState::Static : MemoryState::IO;
- auto vma = address_space
- .MapBackingMemory(mapping.address, target_pointer + offset_into_region,
- mapping.size, memory_state)
- .Unwrap();
+ auto vma =
+ address_space.MapBackingMemory(mapping.address, target_pointer, mapping.size, memory_state)
+ .Unwrap();
address_space.Reprotect(vma,
mapping.read_only ? VMAPermission::Read : VMAPermission::ReadWrite);
}
diff --git a/src/core/hle/kernel/memory.h b/src/core/hle/kernel/memory.h
index 08c1a9989..da6bb3563 100644
--- a/src/core/hle/kernel/memory.h
+++ b/src/core/hle/kernel/memory.h
@@ -26,4 +26,6 @@ MemoryRegionInfo* GetMemoryRegion(MemoryRegion region);
void HandleSpecialMapping(VMManager& address_space, const AddressMapping& mapping);
void MapSharedPages(VMManager& address_space);
+
+extern MemoryRegionInfo memory_regions[3];
} // namespace Kernel
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index b957c45dd..324415a36 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -171,6 +171,8 @@ static void SwitchContext(Thread* new_thread) {
// Cancel any outstanding wakeup events for this thread
CoreTiming::UnscheduleEvent(ThreadWakeupEventType, new_thread->callback_handle);
+ auto previous_process = Kernel::g_current_process;
+
current_thread = new_thread;
ready_queue.remove(new_thread->current_priority, new_thread);
@@ -178,8 +180,18 @@ static void SwitchContext(Thread* new_thread) {
Core::CPU().LoadContext(new_thread->context);
Core::CPU().SetCP15Register(CP15_THREAD_URO, new_thread->GetTLSAddress());
+
+ if (previous_process != current_thread->owner_process) {
+ Kernel::g_current_process = current_thread->owner_process;
+ Memory::current_page_table = &Kernel::g_current_process->vm_manager.page_table;
+ // We have switched processes and thus, page tables, clear the instruction cache so we
+ // don't keep stale data from the previous process.
+ Core::CPU().ClearInstructionCache();
+ }
} else {
current_thread = nullptr;
+ // Note: We do not reset the current process and current page table when idling because
+ // technically we haven't changed processes, our threads are just paused.
}
}
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index cef1f7fa8..7a007c065 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -56,6 +56,10 @@ void VMManager::Reset() {
initial_vma.size = MAX_ADDRESS;
vma_map.emplace(initial_vma.base, initial_vma);
+ page_table.pointers.fill(nullptr);
+ page_table.attributes.fill(Memory::PageType::Unmapped);
+ page_table.cached_res_count.fill(0);
+
UpdatePageTableForVMA(initial_vma);
}
@@ -328,16 +332,17 @@ VMManager::VMAIter VMManager::MergeAdjacent(VMAIter iter) {
void VMManager::UpdatePageTableForVMA(const VirtualMemoryArea& vma) {
switch (vma.type) {
case VMAType::Free:
- Memory::UnmapRegion(vma.base, vma.size);
+ Memory::UnmapRegion(page_table, vma.base, vma.size);
break;
case VMAType::AllocatedMemoryBlock:
- Memory::MapMemoryRegion(vma.base, vma.size, vma.backing_block->data() + vma.offset);
+ Memory::MapMemoryRegion(page_table, vma.base, vma.size,
+ vma.backing_block->data() + vma.offset);
break;
case VMAType::BackingMemory:
- Memory::MapMemoryRegion(vma.base, vma.size, vma.backing_memory);
+ Memory::MapMemoryRegion(page_table, vma.base, vma.size, vma.backing_memory);
break;
case VMAType::MMIO:
- Memory::MapIoRegion(vma.base, vma.size, vma.mmio_handler);
+ Memory::MapIoRegion(page_table, vma.base, vma.size, vma.mmio_handler);
break;
}
}
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index 38e0d74d0..1302527bb 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -9,6 +9,7 @@
#include <vector>
#include "common/common_types.h"
#include "core/hle/result.h"
+#include "core/memory.h"
#include "core/mmio.h"
namespace Kernel {
@@ -102,7 +103,6 @@ struct VirtualMemoryArea {
* - http://duartes.org/gustavo/blog/post/page-cache-the-affair-between-memory-and-files/
*/
class VMManager final {
- // TODO(yuriks): Make page tables switchable to support multiple VMManagers
public:
/**
* The maximum amount of address space managed by the kernel. Addresses above this are never
@@ -184,6 +184,10 @@ public:
/// Dumps the address space layout to the log, for debugging
void LogLayout(Log::Level log_level) const;
+ /// Each VMManager has its own page table, which is set as the main one when the owning process
+ /// is scheduled.
+ Memory::PageTable page_table;
+
private:
using VMAIter = decltype(vma_map)::iterator;
diff --git a/src/core/hle/lock.cpp b/src/core/hle/lock.cpp
index 082f689c8..1c24c7ce9 100644
--- a/src/core/hle/lock.cpp
+++ b/src/core/hle/lock.cpp
@@ -7,5 +7,5 @@
#include <core/hle/lock.h>
namespace HLE {
-std::mutex g_hle_lock;
+std::recursive_mutex g_hle_lock;
}
diff --git a/src/core/hle/lock.h b/src/core/hle/lock.h
index 8265621e1..5c99fe996 100644
--- a/src/core/hle/lock.h
+++ b/src/core/hle/lock.h
@@ -14,5 +14,5 @@ namespace HLE {
* to the emulated memory is not protected by this mutex, and should be avoided in any threads other
* than the CPU thread.
*/
-extern std::mutex g_hle_lock;
+extern std::recursive_mutex g_hle_lock;
} // namespace HLE
diff --git a/src/core/hle/service/apt/apt.cpp b/src/core/hle/service/apt/apt.cpp
index 58d94768c..8c0ba73f2 100644
--- a/src/core/hle/service/apt/apt.cpp
+++ b/src/core/hle/service/apt/apt.cpp
@@ -19,6 +19,7 @@
#include "core/hle/service/apt/apt_s.h"
#include "core/hle/service/apt/apt_u.h"
#include "core/hle/service/apt/bcfnt/bcfnt.h"
+#include "core/hle/service/cfg/cfg.h"
#include "core/hle/service/fs/archive.h"
#include "core/hle/service/ptm/ptm.h"
#include "core/hle/service/service.h"
@@ -198,6 +199,143 @@ void Initialize(Service::Interface* self) {
Kernel::g_handle_table.Create(slot_data->parameter_event).Unwrap());
}
+static u32 DecompressLZ11(const u8* in, u8* out) {
+ u32_le decompressed_size;
+ memcpy(&decompressed_size, in, sizeof(u32));
+ in += 4;
+
+ u8 type = decompressed_size & 0xFF;
+ ASSERT(type == 0x11);
+ decompressed_size >>= 8;
+
+ u32 current_out_size = 0;
+ u8 flags = 0, mask = 1;
+ while (current_out_size < decompressed_size) {
+ if (mask == 1) {
+ flags = *(in++);
+ mask = 0x80;
+ } else {
+ mask >>= 1;
+ }
+
+ if (flags & mask) {
+ u8 byte1 = *(in++);
+ u32 length = byte1 >> 4;
+ u32 offset;
+ if (length == 0) {
+ u8 byte2 = *(in++);
+ u8 byte3 = *(in++);
+ length = (((byte1 & 0x0F) << 4) | (byte2 >> 4)) + 0x11;
+ offset = (((byte2 & 0x0F) << 8) | byte3) + 0x1;
+ } else if (length == 1) {
+ u8 byte2 = *(in++);
+ u8 byte3 = *(in++);
+ u8 byte4 = *(in++);
+ length = (((byte1 & 0x0F) << 12) | (byte2 << 4) | (byte3 >> 4)) + 0x111;
+ offset = (((byte3 & 0x0F) << 8) | byte4) + 0x1;
+ } else {
+ u8 byte2 = *(in++);
+ length = (byte1 >> 4) + 0x1;
+ offset = (((byte1 & 0x0F) << 8) | byte2) + 0x1;
+ }
+
+ for (u32 i = 0; i < length; i++) {
+ *out = *(out - offset);
+ ++out;
+ }
+
+ current_out_size += length;
+ } else {
+ *(out++) = *(in++);
+ current_out_size++;
+ }
+ }
+ return decompressed_size;
+}
+
+static bool LoadSharedFont() {
+ u8 font_region_code;
+ switch (CFG::GetRegionValue()) {
+ case 4: // CHN
+ font_region_code = 2;
+ break;
+ case 5: // KOR
+ font_region_code = 3;
+ break;
+ case 6: // TWN
+ font_region_code = 4;
+ break;
+ default: // JPN/EUR/USA
+ font_region_code = 1;
+ break;
+ }
+
+ const u64_le shared_font_archive_id_low = 0x0004009b00014002 | ((font_region_code - 1) << 8);
+ const u64_le shared_font_archive_id_high = 0x00000001ffffff00;
+ std::vector<u8> shared_font_archive_id(16);
+ std::memcpy(&shared_font_archive_id[0], &shared_font_archive_id_low, sizeof(u64));
+ std::memcpy(&shared_font_archive_id[8], &shared_font_archive_id_high, sizeof(u64));
+ FileSys::Path archive_path(shared_font_archive_id);
+ auto archive_result = Service::FS::OpenArchive(Service::FS::ArchiveIdCode::NCCH, archive_path);
+ if (archive_result.Failed())
+ return false;
+
+ std::vector<u8> romfs_path(20, 0); // 20-byte all zero path for opening RomFS
+ FileSys::Path file_path(romfs_path);
+ FileSys::Mode open_mode = {};
+ open_mode.read_flag.Assign(1);
+ auto file_result = Service::FS::OpenFileFromArchive(*archive_result, file_path, open_mode);
+ if (file_result.Failed())
+ return false;
+
+ auto romfs = std::move(file_result).Unwrap();
+ std::vector<u8> romfs_buffer(romfs->backend->GetSize());
+ romfs->backend->Read(0, romfs_buffer.size(), romfs_buffer.data());
+ romfs->backend->Close();
+
+ const char16_t* file_name[4] = {u"cbf_std.bcfnt.lz", u"cbf_zh-Hans-CN.bcfnt.lz",
+ u"cbf_ko-Hang-KR.bcfnt.lz", u"cbf_zh-Hant-TW.bcfnt.lz"};
+ const u8* font_file =
+ RomFS::GetFilePointer(romfs_buffer.data(), {file_name[font_region_code - 1]});
+ if (font_file == nullptr)
+ return false;
+
+ struct {
+ u32_le status;
+ u32_le region;
+ u32_le decompressed_size;
+ INSERT_PADDING_WORDS(0x1D);
+ } shared_font_header{};
+ static_assert(sizeof(shared_font_header) == 0x80, "shared_font_header has incorrect size");
+
+ shared_font_header.status = 2; // successfully loaded
+ shared_font_header.region = font_region_code;
+ shared_font_header.decompressed_size =
+ DecompressLZ11(font_file, shared_font_mem->GetPointer(0x80));
+ std::memcpy(shared_font_mem->GetPointer(), &shared_font_header, sizeof(shared_font_header));
+ *shared_font_mem->GetPointer(0x83) = 'U'; // Change the magic from "CFNT" to "CFNU"
+
+ return true;
+}
+
+static bool LoadLegacySharedFont() {
+ // This is the legacy method to load shared font.
+ // The expected format is a decrypted, uncompressed BCFNT file with the 0x80 byte header
+ // generated by the APT:U service. The best way to get is by dumping it from RAM. We've provided
+ // a homebrew app to do this: https://github.com/citra-emu/3dsutils. Put the resulting file
+ // "shared_font.bin" in the Citra "sysdata" directory.
+ std::string filepath = FileUtil::GetUserPath(D_SYSDATA_IDX) + SHARED_FONT;
+
+ FileUtil::CreateFullPath(filepath); // Create path if not already created
+ FileUtil::IOFile file(filepath, "rb");
+ if (file.IsOpen()) {
+ file.ReadBytes(shared_font_mem->GetPointer(), file.GetSize());
+ return true;
+ }
+
+ return false;
+}
+
void GetSharedFont(Service::Interface* self) {
IPC::RequestParser rp(Kernel::GetCommandBuffer(), 0x44, 0, 0); // 0x00440000
IPC::RequestBuilder rb = rp.MakeBuilder(2, 2);
@@ -206,11 +344,20 @@ void GetSharedFont(Service::Interface* self) {
Core::Telemetry().AddField(Telemetry::FieldType::Session, "RequiresSharedFont", true);
if (!shared_font_loaded) {
- LOG_ERROR(Service_APT, "shared font file missing - go dump it from your 3ds");
- rb.Push<u32>(-1); // TODO: Find the right error code
- rb.Skip(1 + 2, true);
- Core::System::GetInstance().SetStatus(Core::System::ResultStatus::ErrorSharedFont);
- return;
+ // On real 3DS, font loading happens on booting. However, we load it on demand to coordinate
+ // with CFG region auto configuration, which happens later than APT initialization.
+ if (LoadSharedFont()) {
+ shared_font_loaded = true;
+ } else if (LoadLegacySharedFont()) {
+ LOG_WARNING(Service_APT, "Loaded shared font by legacy method");
+ shared_font_loaded = true;
+ } else {
+ LOG_ERROR(Service_APT, "shared font file missing - go dump it from your 3ds");
+ rb.Push<u32>(-1); // TODO: Find the right error code
+ rb.Skip(1 + 2, true);
+ Core::System::GetInstance().SetStatus(Core::System::ResultStatus::ErrorSharedFont);
+ return;
+ }
}
// The shared font has to be relocated to the new address before being passed to the
@@ -863,125 +1010,6 @@ void CheckNew3DS(Service::Interface* self) {
LOG_WARNING(Service_APT, "(STUBBED) called");
}
-static u32 DecompressLZ11(const u8* in, u8* out) {
- u32_le decompressed_size;
- memcpy(&decompressed_size, in, sizeof(u32));
- in += 4;
-
- u8 type = decompressed_size & 0xFF;
- ASSERT(type == 0x11);
- decompressed_size >>= 8;
-
- u32 current_out_size = 0;
- u8 flags = 0, mask = 1;
- while (current_out_size < decompressed_size) {
- if (mask == 1) {
- flags = *(in++);
- mask = 0x80;
- } else {
- mask >>= 1;
- }
-
- if (flags & mask) {
- u8 byte1 = *(in++);
- u32 length = byte1 >> 4;
- u32 offset;
- if (length == 0) {
- u8 byte2 = *(in++);
- u8 byte3 = *(in++);
- length = (((byte1 & 0x0F) << 4) | (byte2 >> 4)) + 0x11;
- offset = (((byte2 & 0x0F) << 8) | byte3) + 0x1;
- } else if (length == 1) {
- u8 byte2 = *(in++);
- u8 byte3 = *(in++);
- u8 byte4 = *(in++);
- length = (((byte1 & 0x0F) << 12) | (byte2 << 4) | (byte3 >> 4)) + 0x111;
- offset = (((byte3 & 0x0F) << 8) | byte4) + 0x1;
- } else {
- u8 byte2 = *(in++);
- length = (byte1 >> 4) + 0x1;
- offset = (((byte1 & 0x0F) << 8) | byte2) + 0x1;
- }
-
- for (u32 i = 0; i < length; i++) {
- *out = *(out - offset);
- ++out;
- }
-
- current_out_size += length;
- } else {
- *(out++) = *(in++);
- current_out_size++;
- }
- }
- return decompressed_size;
-}
-
-static bool LoadSharedFont() {
- // TODO (wwylele): load different font archive for region CHN/KOR/TWN
- const u64_le shared_font_archive_id_low = 0x0004009b00014002;
- const u64_le shared_font_archive_id_high = 0x00000001ffffff00;
- std::vector<u8> shared_font_archive_id(16);
- std::memcpy(&shared_font_archive_id[0], &shared_font_archive_id_low, sizeof(u64));
- std::memcpy(&shared_font_archive_id[8], &shared_font_archive_id_high, sizeof(u64));
- FileSys::Path archive_path(shared_font_archive_id);
- auto archive_result = Service::FS::OpenArchive(Service::FS::ArchiveIdCode::NCCH, archive_path);
- if (archive_result.Failed())
- return false;
-
- std::vector<u8> romfs_path(20, 0); // 20-byte all zero path for opening RomFS
- FileSys::Path file_path(romfs_path);
- FileSys::Mode open_mode = {};
- open_mode.read_flag.Assign(1);
- auto file_result = Service::FS::OpenFileFromArchive(*archive_result, file_path, open_mode);
- if (file_result.Failed())
- return false;
-
- auto romfs = std::move(file_result).Unwrap();
- std::vector<u8> romfs_buffer(romfs->backend->GetSize());
- romfs->backend->Read(0, romfs_buffer.size(), romfs_buffer.data());
- romfs->backend->Close();
-
- const u8* font_file = RomFS::GetFilePointer(romfs_buffer.data(), {u"cbf_std.bcfnt.lz"});
- if (font_file == nullptr)
- return false;
-
- struct {
- u32_le status;
- u32_le region;
- u32_le decompressed_size;
- INSERT_PADDING_WORDS(0x1D);
- } shared_font_header{};
- static_assert(sizeof(shared_font_header) == 0x80, "shared_font_header has incorrect size");
-
- shared_font_header.status = 2; // successfully loaded
- shared_font_header.region = 1; // region JPN/EUR/USA
- shared_font_header.decompressed_size =
- DecompressLZ11(font_file, shared_font_mem->GetPointer(0x80));
- std::memcpy(shared_font_mem->GetPointer(), &shared_font_header, sizeof(shared_font_header));
- *shared_font_mem->GetPointer(0x83) = 'U'; // Change the magic from "CFNT" to "CFNU"
-
- return true;
-}
-
-static bool LoadLegacySharedFont() {
- // This is the legacy method to load shared font.
- // The expected format is a decrypted, uncompressed BCFNT file with the 0x80 byte header
- // generated by the APT:U service. The best way to get is by dumping it from RAM. We've provided
- // a homebrew app to do this: https://github.com/citra-emu/3dsutils. Put the resulting file
- // "shared_font.bin" in the Citra "sysdata" directory.
- std::string filepath = FileUtil::GetUserPath(D_SYSDATA_IDX) + SHARED_FONT;
-
- FileUtil::CreateFullPath(filepath); // Create path if not already created
- FileUtil::IOFile file(filepath, "rb");
- if (file.IsOpen()) {
- file.ReadBytes(shared_font_mem->GetPointer(), file.GetSize());
- return true;
- }
-
- return false;
-}
-
void Init() {
AddService(new APT_A_Interface);
AddService(new APT_S_Interface);
@@ -995,16 +1023,6 @@ void Init() {
MemoryPermission::ReadWrite, MemoryPermission::Read, 0,
Kernel::MemoryRegion::SYSTEM, "APT:SharedFont");
- if (LoadSharedFont()) {
- shared_font_loaded = true;
- } else if (LoadLegacySharedFont()) {
- LOG_WARNING(Service_APT, "Loaded shared font by legacy method");
- shared_font_loaded = true;
- } else {
- LOG_WARNING(Service_APT, "Unable to load shared font");
- shared_font_loaded = false;
- }
-
lock = Kernel::Mutex::Create(false, "APT_U:Lock");
cpu_percent = 0;
diff --git a/src/core/hle/service/cfg/cfg.cpp b/src/core/hle/service/cfg/cfg.cpp
index 3dbeb27cc..f26a1f65f 100644
--- a/src/core/hle/service/cfg/cfg.cpp
+++ b/src/core/hle/service/cfg/cfg.cpp
@@ -168,7 +168,7 @@ void GetCountryCodeID(Service::Interface* self) {
cmd_buff[2] = country_code_id;
}
-static u32 GetRegionValue() {
+u32 GetRegionValue() {
if (Settings::values.region_value == Settings::REGION_VALUE_AUTO_SELECT)
return preferred_region_code;
diff --git a/src/core/hle/service/cfg/cfg.h b/src/core/hle/service/cfg/cfg.h
index 1659ebf32..282b6936b 100644
--- a/src/core/hle/service/cfg/cfg.h
+++ b/src/core/hle/service/cfg/cfg.h
@@ -101,6 +101,8 @@ void GetCountryCodeString(Service::Interface* self);
*/
void GetCountryCodeID(Service::Interface* self);
+u32 GetRegionValue();
+
/**
* CFG::SecureInfoGetRegion service function
* Inputs:
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index 31f34a7ae..aa5d821f9 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -7,9 +7,9 @@
#include <cmath>
#include <memory>
#include "common/logging/log.h"
+#include "core/3ds.h"
#include "core/core.h"
#include "core/core_timing.h"
-#include "core/frontend/emu_window.h"
#include "core/frontend/input.h"
#include "core/hle/ipc.h"
#include "core/hle/kernel/event.h"
@@ -19,7 +19,6 @@
#include "core/hle/service/hid/hid_spvr.h"
#include "core/hle/service/hid/hid_user.h"
#include "core/hle/service/service.h"
-#include "video_core/video_core.h"
namespace Service {
namespace HID {
@@ -59,6 +58,7 @@ static std::array<std::unique_ptr<Input::ButtonDevice>, Settings::NativeButton::
buttons;
static std::unique_ptr<Input::AnalogDevice> circle_pad;
static std::unique_ptr<Input::MotionDevice> motion_device;
+static std::unique_ptr<Input::TouchDevice> touch_device;
DirectionState GetStickDirectionState(s16 circle_pad_x, s16 circle_pad_y) {
// 30 degree and 60 degree are angular thresholds for directions
@@ -96,6 +96,7 @@ static void LoadInputDevices() {
circle_pad = Input::CreateDevice<Input::AnalogDevice>(
Settings::values.analogs[Settings::NativeAnalog::CirclePad]);
motion_device = Input::CreateDevice<Input::MotionDevice>(Settings::values.motion_device);
+ touch_device = Input::CreateDevice<Input::TouchDevice>(Settings::values.touch_device);
}
static void UnloadInputDevices() {
@@ -104,6 +105,7 @@ static void UnloadInputDevices() {
}
circle_pad.reset();
motion_device.reset();
+ touch_device.reset();
}
static void UpdatePadCallback(u64 userdata, int cycles_late) {
@@ -172,8 +174,10 @@ static void UpdatePadCallback(u64 userdata, int cycles_late) {
// Get the current touch entry
TouchDataEntry& touch_entry = mem->touch.entries[mem->touch.index];
bool pressed = false;
-
- std::tie(touch_entry.x, touch_entry.y, pressed) = VideoCore::g_emu_window->GetTouchState();
+ float x, y;
+ std::tie(x, y, pressed) = touch_device->GetStatus();
+ touch_entry.x = static_cast<u16>(x * Core::kScreenBottomWidth);
+ touch_entry.y = static_cast<u16>(y * Core::kScreenBottomHeight);
touch_entry.valid.Assign(pressed ? 1 : 0);
// TODO(bunnei): We're not doing anything with offset 0xA8 + 0x18 of HID SharedMemory, which
diff --git a/src/core/hle/service/nwm/nwm_uds.cpp b/src/core/hle/service/nwm/nwm_uds.cpp
index 6dbdff044..893bbb1e7 100644
--- a/src/core/hle/service/nwm/nwm_uds.cpp
+++ b/src/core/hle/service/nwm/nwm_uds.cpp
@@ -4,6 +4,7 @@
#include <array>
#include <cstring>
+#include <mutex>
#include <unordered_map>
#include <vector>
#include "common/common_types.h"
@@ -15,8 +16,10 @@
#include "core/hle/result.h"
#include "core/hle/service/nwm/nwm_uds.h"
#include "core/hle/service/nwm/uds_beacon.h"
+#include "core/hle/service/nwm/uds_connection.h"
#include "core/hle/service/nwm/uds_data.h"
#include "core/memory.h"
+#include "network/network.h"
namespace Service {
namespace NWM {
@@ -51,6 +54,135 @@ static NetworkInfo network_info;
// Event that will generate and send the 802.11 beacon frames.
static int beacon_broadcast_event;
+// Mutex to synchronize access to the list of received beacons between the emulation thread and the
+// network thread.
+static std::mutex beacon_mutex;
+
+// Number of beacons to store before we start dropping the old ones.
+// TODO(Subv): Find a more accurate value for this limit.
+constexpr size_t MaxBeaconFrames = 15;
+
+// List of the last <MaxBeaconFrames> beacons received from the network.
+static std::deque<Network::WifiPacket> received_beacons;
+
+/**
+ * Returns a list of received 802.11 beacon frames from the specified sender since the last call.
+ */
+std::deque<Network::WifiPacket> GetReceivedBeacons(const MacAddress& sender) {
+ std::lock_guard<std::mutex> lock(beacon_mutex);
+ // TODO(Subv): Filter by sender.
+ return std::move(received_beacons);
+}
+
+/// Sends a WifiPacket to the room we're currently connected to.
+void SendPacket(Network::WifiPacket& packet) {
+ // TODO(Subv): Implement.
+}
+
+// Inserts the received beacon frame in the beacon queue and removes any older beacons if the size
+// limit is exceeded.
+void HandleBeaconFrame(const Network::WifiPacket& packet) {
+ std::lock_guard<std::mutex> lock(beacon_mutex);
+
+ received_beacons.emplace_back(packet);
+
+ // Discard old beacons if the buffer is full.
+ if (received_beacons.size() > MaxBeaconFrames)
+ received_beacons.pop_front();
+}
+
+/*
+ * Returns an available index in the nodes array for the
+ * currently-hosted UDS network.
+ */
+static u16 GetNextAvailableNodeId() {
+ ASSERT_MSG(connection_status.status == static_cast<u32>(NetworkStatus::ConnectedAsHost),
+ "Can not accept clients if we're not hosting a network");
+
+ for (u16 index = 0; index < connection_status.max_nodes; ++index) {
+ if ((connection_status.node_bitmask & (1 << index)) == 0)
+ return index;
+ }
+
+ // Any connection attempts to an already full network should have been refused.
+ ASSERT_MSG(false, "No available connection slots in the network");
+}
+
+/*
+ * Start a connection sequence with an UDS server. The sequence starts by sending an 802.11
+ * authentication frame with SEQ1.
+ */
+void StartConnectionSequence(const MacAddress& server) {
+ ASSERT(connection_status.status == static_cast<u32>(NetworkStatus::NotConnected));
+
+ // TODO(Subv): Handle timeout.
+
+ // Send an authentication frame with SEQ1
+ using Network::WifiPacket;
+ WifiPacket auth_request;
+ auth_request.channel = network_channel;
+ auth_request.data = GenerateAuthenticationFrame(AuthenticationSeq::SEQ1);
+ auth_request.destination_address = server;
+ auth_request.type = WifiPacket::PacketType::Authentication;
+
+ SendPacket(auth_request);
+}
+
+/// Sends an Association Response frame to the specified mac address
+void SendAssociationResponseFrame(const MacAddress& address) {
+ ASSERT_MSG(connection_status.status == static_cast<u32>(NetworkStatus::ConnectedAsHost));
+
+ using Network::WifiPacket;
+ WifiPacket assoc_response;
+ assoc_response.channel = network_channel;
+ // TODO(Subv): This will cause multiple clients to end up with the same association id, but
+ // we're not using that for anything.
+ u16 association_id = 1;
+ assoc_response.data = GenerateAssocResponseFrame(AssocStatus::Successful, association_id,
+ network_info.network_id);
+ assoc_response.destination_address = address;
+ assoc_response.type = WifiPacket::PacketType::AssociationResponse;
+
+ SendPacket(assoc_response);
+}
+
+/*
+ * Handles the authentication request frame and sends the authentication response and association
+ * response frames. Once an Authentication frame with SEQ1 is received by the server, it responds
+ * with an Authentication frame containing SEQ2, and immediately sends an Association response frame
+ * containing the details of the access point and the assigned association id for the new client.
+ */
+void HandleAuthenticationFrame(const Network::WifiPacket& packet) {
+ // Only the SEQ1 auth frame is handled here, the SEQ2 frame doesn't need any special behavior
+ if (GetAuthenticationSeqNumber(packet.data) == AuthenticationSeq::SEQ1) {
+ ASSERT_MSG(connection_status.status == static_cast<u32>(NetworkStatus::ConnectedAsHost));
+
+ // Respond with an authentication response frame with SEQ2
+ using Network::WifiPacket;
+ WifiPacket auth_request;
+ auth_request.channel = network_channel;
+ auth_request.data = GenerateAuthenticationFrame(AuthenticationSeq::SEQ2);
+ auth_request.destination_address = packet.transmitter_address;
+ auth_request.type = WifiPacket::PacketType::Authentication;
+
+ SendPacket(auth_request);
+
+ SendAssociationResponseFrame(packet.transmitter_address);
+ }
+}
+
+/// Callback to parse and handle a received wifi packet.
+void OnWifiPacketReceived(const Network::WifiPacket& packet) {
+ switch (packet.type) {
+ case Network::WifiPacket::PacketType::Beacon:
+ HandleBeaconFrame(packet);
+ break;
+ case Network::WifiPacket::PacketType::Authentication:
+ HandleAuthenticationFrame(packet);
+ break;
+ }
+}
+
/**
* NWM_UDS::Shutdown service function
* Inputs:
@@ -111,8 +243,7 @@ static void RecvBeaconBroadcastData(Interface* self) {
u32 total_size = sizeof(BeaconDataReplyHeader);
// Retrieve all beacon frames that were received from the desired mac address.
- std::deque<WifiPacket> beacons =
- GetReceivedPackets(WifiPacket::PacketType::Beacon, mac_address);
+ auto beacons = GetReceivedBeacons(mac_address);
BeaconDataReplyHeader data_reply_header{};
data_reply_header.total_entries = beacons.size();
@@ -193,6 +324,9 @@ static void InitializeWithVersion(Interface* self) {
rb.Push(RESULT_SUCCESS);
rb.PushCopyHandles(Kernel::g_handle_table.Create(connection_status_event).Unwrap());
+ // TODO(Subv): Connect the OnWifiPacketReceived function to the wifi packet received callback of
+ // the room we're currently in.
+
LOG_DEBUG(Service_NWM, "called sharedmem_size=0x%08X, version=0x%08X, sharedmem_handle=0x%08X",
sharedmem_size, version, sharedmem_handle);
}
@@ -610,32 +744,23 @@ static void BeaconBroadcastCallback(u64 userdata, int cycles_late) {
if (connection_status.status != static_cast<u32>(NetworkStatus::ConnectedAsHost))
return;
- // TODO(Subv): Actually send the beacon.
std::vector<u8> frame = GenerateBeaconFrame(network_info, node_info);
+ using Network::WifiPacket;
+ WifiPacket packet;
+ packet.type = WifiPacket::PacketType::Beacon;
+ packet.data = std::move(frame);
+ packet.destination_address = Network::BroadcastMac;
+ packet.channel = network_channel;
+
+ SendPacket(packet);
+
// Start broadcasting the network, send a beacon frame every 102.4ms.
CoreTiming::ScheduleEvent(msToCycles(DefaultBeaconInterval * MillisecondsPerTU) - cycles_late,
beacon_broadcast_event, 0);
}
/*
- * Returns an available index in the nodes array for the
- * currently-hosted UDS network.
- */
-static u32 GetNextAvailableNodeId() {
- ASSERT_MSG(connection_status.status == static_cast<u32>(NetworkStatus::ConnectedAsHost),
- "Can not accept clients if we're not hosting a network");
-
- for (unsigned index = 0; index < connection_status.max_nodes; ++index) {
- if ((connection_status.node_bitmask & (1 << index)) == 0)
- return index;
- }
-
- // Any connection attempts to an already full network should have been refused.
- ASSERT_MSG(false, "No available connection slots in the network");
-}
-
-/*
* Called when a client connects to an UDS network we're hosting,
* updates the connection status and signals the update event.
* @param network_node_id Network Node Id of the connecting client.
diff --git a/src/core/hle/service/nwm/nwm_uds.h b/src/core/hle/service/nwm/nwm_uds.h
index 141f49f9c..f1caaf974 100644
--- a/src/core/hle/service/nwm/nwm_uds.h
+++ b/src/core/hle/service/nwm/nwm_uds.h
@@ -42,6 +42,7 @@ using NodeList = std::vector<NodeInfo>;
enum class NetworkStatus {
NotConnected = 3,
ConnectedAsHost = 6,
+ Connecting = 7,
ConnectedAsClient = 9,
ConnectedAsSpectator = 10,
};
@@ -85,6 +86,17 @@ static_assert(offsetof(NetworkInfo, oui_value) == 0xC, "oui_value is at the wron
static_assert(offsetof(NetworkInfo, wlan_comm_id) == 0x10, "wlancommid is at the wrong offset.");
static_assert(sizeof(NetworkInfo) == 0x108, "NetworkInfo has incorrect size.");
+/// Additional block tag ids in the Beacon and Association Response frames
+enum class TagId : u8 {
+ SSID = 0,
+ SupportedRates = 1,
+ DSParameterSet = 2,
+ TrafficIndicationMap = 5,
+ CountryInformation = 7,
+ ERPInformation = 42,
+ VendorSpecific = 221
+};
+
class NWM_UDS final : public Interface {
public:
NWM_UDS();
diff --git a/src/core/hle/service/nwm/uds_beacon.cpp b/src/core/hle/service/nwm/uds_beacon.cpp
index 6332b404c..552eaf65e 100644
--- a/src/core/hle/service/nwm/uds_beacon.cpp
+++ b/src/core/hle/service/nwm/uds_beacon.cpp
@@ -325,8 +325,5 @@ std::vector<u8> GenerateBeaconFrame(const NetworkInfo& network_info, const NodeL
return buffer;
}
-std::deque<WifiPacket> GetReceivedPackets(WifiPacket::PacketType type, const MacAddress& sender) {
- return {};
-}
} // namespace NWM
} // namespace Service
diff --git a/src/core/hle/service/nwm/uds_beacon.h b/src/core/hle/service/nwm/uds_beacon.h
index caacf4c6f..50cc76da2 100644
--- a/src/core/hle/service/nwm/uds_beacon.h
+++ b/src/core/hle/service/nwm/uds_beacon.h
@@ -17,17 +17,6 @@ namespace NWM {
using MacAddress = std::array<u8, 6>;
constexpr std::array<u8, 3> NintendoOUI = {0x00, 0x1F, 0x32};
-/// Additional block tag ids in the Beacon frames
-enum class TagId : u8 {
- SSID = 0,
- SupportedRates = 1,
- DSParameterSet = 2,
- TrafficIndicationMap = 5,
- CountryInformation = 7,
- ERPInformation = 42,
- VendorSpecific = 221
-};
-
/**
* Internal vendor-specific tag ids as stored inside
* VendorSpecific blocks in the Beacon frames.
@@ -135,20 +124,6 @@ struct BeaconData {
static_assert(sizeof(BeaconData) == 0x12, "BeaconData has incorrect size.");
-/// Information about a received WiFi packet.
-/// Acts as our own 802.11 header.
-struct WifiPacket {
- enum class PacketType { Beacon, Data };
-
- PacketType type; ///< The type of 802.11 frame, Beacon / Data.
-
- /// Raw 802.11 frame data, starting at the management frame header for management frames.
- std::vector<u8> data;
- MacAddress transmitter_address; ///< Mac address of the transmitter.
- MacAddress destination_address; ///< Mac address of the receiver.
- u8 channel; ///< WiFi channel where this frame was transmitted.
-};
-
/**
* Decrypts the beacon data buffer for the network described by `network_info`.
*/
@@ -161,10 +136,5 @@ void DecryptBeaconData(const NetworkInfo& network_info, std::vector<u8>& buffer)
*/
std::vector<u8> GenerateBeaconFrame(const NetworkInfo& network_info, const NodeList& nodes);
-/**
- * Returns a list of received 802.11 frames from the specified sender
- * matching the type since the last call.
- */
-std::deque<WifiPacket> GetReceivedPackets(WifiPacket::PacketType type, const MacAddress& sender);
} // namespace NWM
} // namespace Service
diff --git a/src/core/hle/service/nwm/uds_connection.cpp b/src/core/hle/service/nwm/uds_connection.cpp
new file mode 100644
index 000000000..c8a76ec2a
--- /dev/null
+++ b/src/core/hle/service/nwm/uds_connection.cpp
@@ -0,0 +1,79 @@
+// Copyright 2017 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "core/hle/service/nwm/nwm_uds.h"
+#include "core/hle/service/nwm/uds_connection.h"
+#include "fmt/format.h"
+
+namespace Service {
+namespace NWM {
+
+// Note: These values were taken from a packet capture of an o3DS XL
+// broadcasting a Super Smash Bros. 4 lobby.
+constexpr u16 DefaultExtraCapabilities = 0x0431;
+
+std::vector<u8> GenerateAuthenticationFrame(AuthenticationSeq seq) {
+ AuthenticationFrame frame{};
+ frame.auth_seq = static_cast<u16>(seq);
+
+ std::vector<u8> data(sizeof(frame));
+ std::memcpy(data.data(), &frame, sizeof(frame));
+
+ return data;
+}
+
+AuthenticationSeq GetAuthenticationSeqNumber(const std::vector<u8>& body) {
+ AuthenticationFrame frame;
+ std::memcpy(&frame, body.data(), sizeof(frame));
+
+ return static_cast<AuthenticationSeq>(frame.auth_seq);
+}
+
+/**
+ * Generates an SSID tag of an 802.11 Beacon frame with an 8-byte character representation of the
+ * specified network id as the SSID value.
+ * @param network_id The network id to use.
+ * @returns A buffer with the SSID tag.
+ */
+static std::vector<u8> GenerateSSIDTag(u32 network_id) {
+ constexpr u8 SSIDSize = 8;
+
+ struct {
+ u8 id = static_cast<u8>(TagId::SSID);
+ u8 size = SSIDSize;
+ } tag_header;
+
+ std::vector<u8> buffer(sizeof(tag_header) + SSIDSize);
+
+ std::memcpy(buffer.data(), &tag_header, sizeof(tag_header));
+
+ std::string network_name = fmt::format("{0:08X}", network_id);
+
+ std::memcpy(buffer.data() + sizeof(tag_header), network_name.c_str(), SSIDSize);
+
+ return buffer;
+}
+
+std::vector<u8> GenerateAssocResponseFrame(AssocStatus status, u16 association_id, u32 network_id) {
+ AssociationResponseFrame frame{};
+ frame.capabilities = DefaultExtraCapabilities;
+ frame.status_code = static_cast<u16>(status);
+ // The association id is ORed with this magic value (0xC000)
+ constexpr u16 AssociationIdMagic = 0xC000;
+ frame.assoc_id = association_id | AssociationIdMagic;
+
+ std::vector<u8> data(sizeof(frame));
+ std::memcpy(data.data(), &frame, sizeof(frame));
+
+ auto ssid_tag = GenerateSSIDTag(network_id);
+ data.insert(data.end(), ssid_tag.begin(), ssid_tag.end());
+
+ // TODO(Subv): Add the SupportedRates tag.
+ // TODO(Subv): Add the DSParameterSet tag.
+ // TODO(Subv): Add the ERPInformation tag.
+ return data;
+}
+
+} // namespace NWM
+} // namespace Service
diff --git a/src/core/hle/service/nwm/uds_connection.h b/src/core/hle/service/nwm/uds_connection.h
new file mode 100644
index 000000000..73f55a4fd
--- /dev/null
+++ b/src/core/hle/service/nwm/uds_connection.h
@@ -0,0 +1,51 @@
+// Copyright 2017 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+#include "common/common_types.h"
+#include "common/swap.h"
+#include "core/hle/service/service.h"
+
+namespace Service {
+namespace NWM {
+
+/// Sequence number of the 802.11 authentication frames.
+enum class AuthenticationSeq : u16 { SEQ1 = 1, SEQ2 = 2 };
+
+enum class AuthAlgorithm : u16 { OpenSystem = 0 };
+
+enum class AuthStatus : u16 { Successful = 0 };
+
+enum class AssocStatus : u16 { Successful = 0 };
+
+struct AuthenticationFrame {
+ u16_le auth_algorithm = static_cast<u16>(AuthAlgorithm::OpenSystem);
+ u16_le auth_seq;
+ u16_le status_code = static_cast<u16>(AuthStatus::Successful);
+};
+
+static_assert(sizeof(AuthenticationFrame) == 6, "AuthenticationFrame has wrong size");
+
+struct AssociationResponseFrame {
+ u16_le capabilities;
+ u16_le status_code;
+ u16_le assoc_id;
+};
+
+static_assert(sizeof(AssociationResponseFrame) == 6, "AssociationResponseFrame has wrong size");
+
+/// Generates an 802.11 authentication frame, starting at the frame body.
+std::vector<u8> GenerateAuthenticationFrame(AuthenticationSeq seq);
+
+/// Returns the sequence number from the body of an Authentication frame.
+AuthenticationSeq GetAuthenticationSeqNumber(const std::vector<u8>& body);
+
+/// Generates an 802.11 association response frame with the specified status, association id and
+/// network id, starting at the frame body.
+std::vector<u8> GenerateAssocResponseFrame(AssocStatus status, u16 association_id, u32 network_id);
+
+} // namespace NWM
+} // namespace Service
diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp
index b98938cb4..dfc36748c 100644
--- a/src/core/hle/svc.cpp
+++ b/src/core/hle/svc.cpp
@@ -1334,7 +1334,7 @@ void CallSVC(u32 immediate) {
MICROPROFILE_SCOPE(Kernel_SVC);
// Lock the global kernel mutex when we enter the kernel HLE.
- std::lock_guard<std::mutex> lock(HLE::g_hle_lock);
+ std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
const FunctionDef* info = GetSVCInfo(immediate);
if (info) {
diff --git a/src/core/loader/3dsx.cpp b/src/core/loader/3dsx.cpp
index 74e336487..69cdc0867 100644
--- a/src/core/loader/3dsx.cpp
+++ b/src/core/loader/3dsx.cpp
@@ -270,6 +270,7 @@ ResultStatus AppLoader_THREEDSX::Load() {
Kernel::g_current_process = Kernel::Process::Create(std::move(codeset));
Kernel::g_current_process->svc_access_mask.set();
Kernel::g_current_process->address_mappings = default_address_mappings;
+ Memory::current_page_table = &Kernel::g_current_process->vm_manager.page_table;
// Attach the default resource limit (APPLICATION) to the process
Kernel::g_current_process->resource_limit =
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp
index cfcde9167..2f27606a1 100644
--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -397,6 +397,7 @@ ResultStatus AppLoader_ELF::Load() {
Kernel::g_current_process = Kernel::Process::Create(std::move(codeset));
Kernel::g_current_process->svc_access_mask.set();
Kernel::g_current_process->address_mappings = default_address_mappings;
+ Memory::current_page_table = &Kernel::g_current_process->vm_manager.page_table;
// Attach the default resource limit (APPLICATION) to the process
Kernel::g_current_process->resource_limit =
diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp
index 7aff7f29b..79ea50147 100644
--- a/src/core/loader/ncch.cpp
+++ b/src/core/loader/ncch.cpp
@@ -172,6 +172,7 @@ ResultStatus AppLoader_NCCH::LoadExec() {
codeset->memory = std::make_shared<std::vector<u8>>(std::move(code));
Kernel::g_current_process = Kernel::Process::Create(std::move(codeset));
+ Memory::current_page_table = &Kernel::g_current_process->vm_manager.page_table;
// Attach a resource limit to the process based on the resource limit category
Kernel::g_current_process->resource_limit =
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index a3c5f4a9d..68a6b1ac2 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -4,83 +4,31 @@
#include <array>
#include <cstring>
+#include "audio_core/audio_core.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "common/swap.h"
+#include "core/hle/kernel/memory.h"
#include "core/hle/kernel/process.h"
#include "core/hle/lock.h"
#include "core/memory.h"
#include "core/memory_setup.h"
-#include "core/mmio.h"
#include "video_core/renderer_base.h"
#include "video_core/video_core.h"
namespace Memory {
-enum class PageType {
- /// Page is unmapped and should cause an access error.
- Unmapped,
- /// Page is mapped to regular memory. This is the only type you can get pointers to.
- Memory,
- /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
- /// invalidation
- RasterizerCachedMemory,
- /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
- Special,
- /// Page is mapped to a I/O region, but also needs to check for rasterizer cache flushing and
- /// invalidation
- RasterizerCachedSpecial,
-};
-
-struct SpecialRegion {
- VAddr base;
- u32 size;
- MMIORegionPointer handler;
-};
+static std::array<u8, Memory::VRAM_SIZE> vram;
+static std::array<u8, Memory::N3DS_EXTRA_RAM_SIZE> n3ds_extra_ram;
-/**
- * A (reasonably) fast way of allowing switchable and remappable process address spaces. It loosely
- * mimics the way a real CPU page table works, but instead is optimized for minimal decoding and
- * fetching requirements when accessing. In the usual case of an access to regular memory, it only
- * requires an indexed fetch and a check for NULL.
- */
-struct PageTable {
- /**
- * Array of memory pointers backing each page. An entry can only be non-null if the
- * corresponding entry in the `attributes` array is of type `Memory`.
- */
- std::array<u8*, PAGE_TABLE_NUM_ENTRIES> pointers;
-
- /**
- * Contains MMIO handlers that back memory regions whose entries in the `attribute` array is of
- * type `Special`.
- */
- std::vector<SpecialRegion> special_regions;
-
- /**
- * Array of fine grained page attributes. If it is set to any value other than `Memory`, then
- * the corresponding entry in `pointers` MUST be set to null.
- */
- std::array<PageType, PAGE_TABLE_NUM_ENTRIES> attributes;
-
- /**
- * Indicates the number of externally cached resources touching a page that should be
- * flushed before the memory is accessed
- */
- std::array<u8, PAGE_TABLE_NUM_ENTRIES> cached_res_count;
-};
-
-/// Singular page table used for the singleton process
-static PageTable main_page_table;
-/// Currently active page table
-static PageTable* current_page_table = &main_page_table;
+PageTable* current_page_table = nullptr;
std::array<u8*, PAGE_TABLE_NUM_ENTRIES>* GetCurrentPageTablePointers() {
return &current_page_table->pointers;
}
-static void MapPages(u32 base, u32 size, u8* memory, PageType type) {
+static void MapPages(PageTable& page_table, u32 base, u32 size, u8* memory, PageType type) {
LOG_DEBUG(HW_Memory, "Mapping %p onto %08X-%08X", memory, base * PAGE_SIZE,
(base + size) * PAGE_SIZE);
@@ -91,9 +39,9 @@ static void MapPages(u32 base, u32 size, u8* memory, PageType type) {
while (base != end) {
ASSERT_MSG(base < PAGE_TABLE_NUM_ENTRIES, "out of range mapping at %08X", base);
- current_page_table->attributes[base] = type;
- current_page_table->pointers[base] = memory;
- current_page_table->cached_res_count[base] = 0;
+ page_table.attributes[base] = type;
+ page_table.pointers[base] = memory;
+ page_table.cached_res_count[base] = 0;
base += 1;
if (memory != nullptr)
@@ -101,30 +49,24 @@ static void MapPages(u32 base, u32 size, u8* memory, PageType type) {
}
}
-void InitMemoryMap() {
- main_page_table.pointers.fill(nullptr);
- main_page_table.attributes.fill(PageType::Unmapped);
- main_page_table.cached_res_count.fill(0);
-}
-
-void MapMemoryRegion(VAddr base, u32 size, u8* target) {
+void MapMemoryRegion(PageTable& page_table, VAddr base, u32 size, u8* target) {
ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: %08X", size);
ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: %08X", base);
- MapPages(base / PAGE_SIZE, size / PAGE_SIZE, target, PageType::Memory);
+ MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, PageType::Memory);
}
-void MapIoRegion(VAddr base, u32 size, MMIORegionPointer mmio_handler) {
+void MapIoRegion(PageTable& page_table, VAddr base, u32 size, MMIORegionPointer mmio_handler) {
ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: %08X", size);
ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: %08X", base);
- MapPages(base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Special);
+ MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Special);
- current_page_table->special_regions.emplace_back(SpecialRegion{base, size, mmio_handler});
+ page_table.special_regions.emplace_back(SpecialRegion{base, size, mmio_handler});
}
-void UnmapRegion(VAddr base, u32 size) {
+void UnmapRegion(PageTable& page_table, VAddr base, u32 size) {
ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: %08X", size);
ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: %08X", base);
- MapPages(base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Unmapped);
+ MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Unmapped);
}
/**
@@ -183,7 +125,7 @@ T Read(const VAddr vaddr) {
}
// The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state
- std::lock_guard<std::mutex> lock(HLE::g_hle_lock);
+ std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
switch (type) {
@@ -224,7 +166,7 @@ void Write(const VAddr vaddr, const T data) {
}
// The memory access might do an MMIO or cached access, so we have to lock the HLE kernel state
- std::lock_guard<std::mutex> lock(HLE::g_hle_lock);
+ std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
switch (type) {
@@ -273,8 +215,7 @@ bool IsValidVirtualAddress(const VAddr vaddr) {
}
bool IsValidPhysicalAddress(const PAddr paddr) {
- boost::optional<VAddr> vaddr = PhysicalToVirtualAddress(paddr);
- return vaddr && IsValidVirtualAddress(*vaddr);
+ return GetPhysicalPointer(paddr) != nullptr;
}
u8* GetPointer(const VAddr vaddr) {
@@ -306,9 +247,63 @@ std::string ReadCString(VAddr vaddr, std::size_t max_length) {
}
u8* GetPhysicalPointer(PAddr address) {
- // TODO(Subv): This call should not go through the application's memory mapping.
- boost::optional<VAddr> vaddr = PhysicalToVirtualAddress(address);
- return vaddr ? GetPointer(*vaddr) : nullptr;
+ struct MemoryArea {
+ PAddr paddr_base;
+ u32 size;
+ };
+
+ static constexpr MemoryArea memory_areas[] = {
+ {VRAM_PADDR, VRAM_SIZE},
+ {IO_AREA_PADDR, IO_AREA_SIZE},
+ {DSP_RAM_PADDR, DSP_RAM_SIZE},
+ {FCRAM_PADDR, FCRAM_N3DS_SIZE},
+ {N3DS_EXTRA_RAM_PADDR, N3DS_EXTRA_RAM_SIZE},
+ };
+
+ const auto area =
+ std::find_if(std::begin(memory_areas), std::end(memory_areas), [&](const auto& area) {
+ return address >= area.paddr_base && address < area.paddr_base + area.size;
+ });
+
+ if (area == std::end(memory_areas)) {
+ LOG_ERROR(HW_Memory, "unknown GetPhysicalPointer @ 0x%08X", address);
+ return nullptr;
+ }
+
+ if (area->paddr_base == IO_AREA_PADDR) {
+ LOG_ERROR(HW_Memory, "MMIO mappings are not supported yet. phys_addr=0x%08X", address);
+ return nullptr;
+ }
+
+ u32 offset_into_region = address - area->paddr_base;
+
+ u8* target_pointer = nullptr;
+ switch (area->paddr_base) {
+ case VRAM_PADDR:
+ target_pointer = vram.data() + offset_into_region;
+ break;
+ case DSP_RAM_PADDR:
+ target_pointer = AudioCore::GetDspMemory().data() + offset_into_region;
+ break;
+ case FCRAM_PADDR:
+ for (const auto& region : Kernel::memory_regions) {
+ if (offset_into_region >= region.base &&
+ offset_into_region < region.base + region.size) {
+ target_pointer =
+ region.linear_heap_memory->data() + offset_into_region - region.base;
+ break;
+ }
+ }
+ ASSERT_MSG(target_pointer != nullptr, "Invalid FCRAM address");
+ break;
+ case N3DS_EXTRA_RAM_PADDR:
+ target_pointer = n3ds_extra_ram.data() + offset_into_region;
+ break;
+ default:
+ UNREACHABLE();
+ }
+
+ return target_pointer;
}
void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) {
diff --git a/src/core/memory.h b/src/core/memory.h
index c8c56babd..b228a48c2 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -7,8 +7,10 @@
#include <array>
#include <cstddef>
#include <string>
+#include <vector>
#include <boost/optional.hpp>
#include "common/common_types.h"
+#include "core/mmio.h"
namespace Memory {
@@ -21,6 +23,59 @@ const u32 PAGE_MASK = PAGE_SIZE - 1;
const int PAGE_BITS = 12;
const size_t PAGE_TABLE_NUM_ENTRIES = 1 << (32 - PAGE_BITS);
+enum class PageType {
+ /// Page is unmapped and should cause an access error.
+ Unmapped,
+ /// Page is mapped to regular memory. This is the only type you can get pointers to.
+ Memory,
+ /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
+ /// invalidation
+ RasterizerCachedMemory,
+ /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
+ Special,
+ /// Page is mapped to a I/O region, but also needs to check for rasterizer cache flushing and
+ /// invalidation
+ RasterizerCachedSpecial,
+};
+
+struct SpecialRegion {
+ VAddr base;
+ u32 size;
+ MMIORegionPointer handler;
+};
+
+/**
+ * A (reasonably) fast way of allowing switchable and remappable process address spaces. It loosely
+ * mimics the way a real CPU page table works, but instead is optimized for minimal decoding and
+ * fetching requirements when accessing. In the usual case of an access to regular memory, it only
+ * requires an indexed fetch and a check for NULL.
+ */
+struct PageTable {
+ /**
+ * Array of memory pointers backing each page. An entry can only be non-null if the
+ * corresponding entry in the `attributes` array is of type `Memory`.
+ */
+ std::array<u8*, PAGE_TABLE_NUM_ENTRIES> pointers;
+
+ /**
+ * Contains MMIO handlers that back memory regions whose entries in the `attribute` array is of
+ * type `Special`.
+ */
+ std::vector<SpecialRegion> special_regions;
+
+ /**
+ * Array of fine grained page attributes. If it is set to any value other than `Memory`, then
+ * the corresponding entry in `pointers` MUST be set to null.
+ */
+ std::array<PageType, PAGE_TABLE_NUM_ENTRIES> attributes;
+
+ /**
+ * Indicates the number of externally cached resources touching a page that should be
+ * flushed before the memory is accessed
+ */
+ std::array<u8, PAGE_TABLE_NUM_ENTRIES> cached_res_count;
+};
+
/// Physical memory regions as seen from the ARM11
enum : PAddr {
/// IO register area
@@ -126,6 +181,9 @@ enum : VAddr {
NEW_LINEAR_HEAP_VADDR_END = NEW_LINEAR_HEAP_VADDR + NEW_LINEAR_HEAP_SIZE,
};
+/// Currently active page table
+extern PageTable* current_page_table;
+
bool IsValidVirtualAddress(const VAddr addr);
bool IsValidPhysicalAddress(const PAddr addr);
@@ -169,8 +227,6 @@ boost::optional<VAddr> PhysicalToVirtualAddress(PAddr addr);
/**
* Gets a pointer to the memory region beginning at the specified physical address.
- *
- * @note This is currently implemented using PhysicalToVirtualAddress().
*/
u8* GetPhysicalPointer(PAddr address);
@@ -209,4 +265,4 @@ void RasterizerFlushVirtualRegion(VAddr start, u32 size, FlushMode mode);
* retrieve the current page table for that purpose.
*/
std::array<u8*, PAGE_TABLE_NUM_ENTRIES>* GetCurrentPageTablePointers();
-}
+} // namespace Memory
diff --git a/src/core/memory_setup.h b/src/core/memory_setup.h
index 3fdf3a87d..c58baa50b 100644
--- a/src/core/memory_setup.h
+++ b/src/core/memory_setup.h
@@ -9,24 +9,24 @@
namespace Memory {
-void InitMemoryMap();
-
/**
* Maps an allocated buffer onto a region of the emulated process address space.
*
+ * @param page_table The page table of the emulated process.
* @param base The address to start mapping at. Must be page-aligned.
* @param size The amount of bytes to map. Must be page-aligned.
* @param target Buffer with the memory backing the mapping. Must be of length at least `size`.
*/
-void MapMemoryRegion(VAddr base, u32 size, u8* target);
+void MapMemoryRegion(PageTable& page_table, VAddr base, u32 size, u8* target);
/**
* Maps a region of the emulated process address space as a IO region.
+ * @param page_table The page table of the emulated process.
* @param base The address to start mapping at. Must be page-aligned.
* @param size The amount of bytes to map. Must be page-aligned.
* @param mmio_handler The handler that backs the mapping.
*/
-void MapIoRegion(VAddr base, u32 size, MMIORegionPointer mmio_handler);
+void MapIoRegion(PageTable& page_table, VAddr base, u32 size, MMIORegionPointer mmio_handler);
-void UnmapRegion(VAddr base, u32 size);
+void UnmapRegion(PageTable& page_table, VAddr base, u32 size);
}
diff --git a/src/core/settings.h b/src/core/settings.h
index bf8014c5a..024f14666 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -81,6 +81,7 @@ struct Values {
std::array<std::string, NativeButton::NumButtons> buttons;
std::array<std::string, NativeAnalog::NumAnalogs> analogs;
std::string motion_device;
+ std::string touch_device;
// Core
bool use_cpu_jit;
diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp
index 1df6c5677..8384ce744 100644
--- a/src/tests/core/arm/arm_test_common.cpp
+++ b/src/tests/core/arm/arm_test_common.cpp
@@ -3,20 +3,30 @@
// Refer to the license.txt file included.
#include "core/core.h"
+#include "core/memory.h"
#include "core/memory_setup.h"
#include "tests/core/arm/arm_test_common.h"
namespace ArmTests {
+static Memory::PageTable page_table;
+
TestEnvironment::TestEnvironment(bool mutable_memory_)
: mutable_memory(mutable_memory_), test_memory(std::make_shared<TestMemory>(this)) {
- Memory::MapIoRegion(0x00000000, 0x80000000, test_memory);
- Memory::MapIoRegion(0x80000000, 0x80000000, test_memory);
+
+ page_table.pointers.fill(nullptr);
+ page_table.attributes.fill(Memory::PageType::Unmapped);
+ page_table.cached_res_count.fill(0);
+
+ Memory::MapIoRegion(page_table, 0x00000000, 0x80000000, test_memory);
+ Memory::MapIoRegion(page_table, 0x80000000, 0x80000000, test_memory);
+
+ Memory::current_page_table = &page_table;
}
TestEnvironment::~TestEnvironment() {
- Memory::UnmapRegion(0x80000000, 0x80000000);
- Memory::UnmapRegion(0x00000000, 0x80000000);
+ Memory::UnmapRegion(page_table, 0x80000000, 0x80000000);
+ Memory::UnmapRegion(page_table, 0x00000000, 0x80000000);
}
void TestEnvironment::SetMemory64(VAddr vaddr, u64 value) {
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index cffa4c952..82f47d8a9 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,6 +1,7 @@
set(SRCS
command_processor.cpp
debug_utils/debug_utils.cpp
+ geometry_pipeline.cpp
pica.cpp
primitive_assembly.cpp
regs.cpp
@@ -29,6 +30,7 @@ set(SRCS
set(HEADERS
command_processor.h
debug_utils/debug_utils.h
+ geometry_pipeline.h
gpu_debugger.h
pica.h
pica_state.h
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index f98ca3302..fb65a3a0a 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -161,6 +161,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.index):
g_state.immediate.current_attribute = 0;
+ g_state.immediate.reset_geometry_pipeline = true;
default_attr_counter = 0;
break;
@@ -234,16 +235,14 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
shader_engine->Run(g_state.vs, shader_unit);
shader_unit.WriteOutput(regs.vs, output);
- // Send to renderer
- using Pica::Shader::OutputVertex;
- auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1,
- const OutputVertex& v2) {
- VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
- };
-
- g_state.primitive_assembler.SubmitVertex(
- Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, output),
- AddTriangle);
+ // Send to geometry pipeline
+ if (g_state.immediate.reset_geometry_pipeline) {
+ g_state.geometry_pipeline.Reconfigure();
+ g_state.immediate.reset_geometry_pipeline = false;
+ }
+ ASSERT(!g_state.geometry_pipeline.NeedIndexInput());
+ g_state.geometry_pipeline.Setup(shader_engine);
+ g_state.geometry_pipeline.SubmitVertex(output);
}
}
}
@@ -321,8 +320,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
// The size has been tuned for optimal balance between hit-rate and the cost of lookup
const size_t VERTEX_CACHE_SIZE = 32;
std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
- std::array<Shader::OutputVertex, VERTEX_CACHE_SIZE> vertex_cache;
- Shader::OutputVertex output_vertex;
+ std::array<Shader::AttributeBuffer, VERTEX_CACHE_SIZE> vertex_cache;
+ Shader::AttributeBuffer vs_output;
unsigned int vertex_cache_pos = 0;
vertex_cache_ids.fill(-1);
@@ -332,6 +331,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
+ g_state.geometry_pipeline.Reconfigure();
+ g_state.geometry_pipeline.Setup(shader_engine);
+ if (g_state.geometry_pipeline.NeedIndexInput())
+ ASSERT(is_indexed);
+
for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) {
// Indexed rendering doesn't use the start offset
unsigned int vertex =
@@ -345,6 +349,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
bool vertex_cache_hit = false;
if (is_indexed) {
+ if (g_state.geometry_pipeline.NeedIndexInput()) {
+ g_state.geometry_pipeline.SubmitIndex(vertex);
+ continue;
+ }
+
if (g_debug_context && Pica::g_debug_context->recorder) {
int size = index_u16 ? 2 : 1;
memory_accesses.AddAccess(base_address + index_info.offset + size * index,
@@ -353,7 +362,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) {
if (vertex == vertex_cache_ids[i]) {
- output_vertex = vertex_cache[i];
+ vs_output = vertex_cache[i];
vertex_cache_hit = true;
break;
}
@@ -362,7 +371,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
if (!vertex_cache_hit) {
// Initialize data for the current vertex
- Shader::AttributeBuffer input, output{};
+ Shader::AttributeBuffer input;
loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
// Send to vertex shader
@@ -371,26 +380,17 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
(void*)&input);
shader_unit.LoadInput(regs.vs, input);
shader_engine->Run(g_state.vs, shader_unit);
- shader_unit.WriteOutput(regs.vs, output);
-
- // Retrieve vertex from register data
- output_vertex = Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, output);
+ shader_unit.WriteOutput(regs.vs, vs_output);
if (is_indexed) {
- vertex_cache[vertex_cache_pos] = output_vertex;
+ vertex_cache[vertex_cache_pos] = vs_output;
vertex_cache_ids[vertex_cache_pos] = vertex;
vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
}
}
- // Send to renderer
- using Pica::Shader::OutputVertex;
- auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1,
- const OutputVertex& v2) {
- VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
- };
-
- primitive_assembler.SubmitVertex(output_vertex, AddTriangle);
+ // Send to geometry pipeline
+ g_state.geometry_pipeline.SubmitVertex(vs_output);
}
for (auto& range : memory_accesses.ranges) {
diff --git a/src/video_core/geometry_pipeline.cpp b/src/video_core/geometry_pipeline.cpp
new file mode 100644
index 000000000..b146e2ecb
--- /dev/null
+++ b/src/video_core/geometry_pipeline.cpp
@@ -0,0 +1,274 @@
+// Copyright 2017 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/geometry_pipeline.h"
+#include "video_core/pica_state.h"
+#include "video_core/regs.h"
+#include "video_core/renderer_base.h"
+#include "video_core/video_core.h"
+
+namespace Pica {
+
+/// An attribute buffering interface for different pipeline modes
+class GeometryPipelineBackend {
+public:
+ virtual ~GeometryPipelineBackend() = default;
+
+ /// Checks if there is no incomplete data transfer
+ virtual bool IsEmpty() const = 0;
+
+ /// Checks if the pipeline needs a direct input from index buffer
+ virtual bool NeedIndexInput() const = 0;
+
+ /// Submits an index from index buffer
+ virtual void SubmitIndex(unsigned int val) = 0;
+
+ /**
+ * Submits vertex attributes
+ * @param input attributes of a vertex output from vertex shader
+ * @return if the buffer is full and the geometry shader should be invoked
+ */
+ virtual bool SubmitVertex(const Shader::AttributeBuffer& input) = 0;
+};
+
+// In the Point mode, vertex attributes are sent to the input registers in the geometry shader unit.
+// The size of vertex shader outputs and geometry shader inputs are constants. Geometry shader is
+// invoked upon inputs buffer filled up by vertex shader outputs. For example, if we have a geometry
+// shader that takes 6 inputs, and the vertex shader outputs 2 attributes, it would take 3 vertices
+// for one geometry shader invocation.
+// TODO: what happens when the input size is not divisible by the output size?
+class GeometryPipeline_Point : public GeometryPipelineBackend {
+public:
+ GeometryPipeline_Point(const Regs& regs, Shader::GSUnitState& unit) : regs(regs), unit(unit) {
+ ASSERT(regs.pipeline.variable_primitive == 0);
+ ASSERT(regs.gs.input_to_uniform == 0);
+ vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1;
+ size_t gs_input_num = regs.gs.max_input_attribute_index + 1;
+ ASSERT(gs_input_num % vs_output_num == 0);
+ buffer_cur = attribute_buffer.attr;
+ buffer_end = attribute_buffer.attr + gs_input_num;
+ }
+
+ bool IsEmpty() const override {
+ return buffer_cur == attribute_buffer.attr;
+ }
+
+ bool NeedIndexInput() const override {
+ return false;
+ }
+
+ void SubmitIndex(unsigned int val) override {
+ UNREACHABLE();
+ }
+
+ bool SubmitVertex(const Shader::AttributeBuffer& input) override {
+ buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur);
+ if (buffer_cur == buffer_end) {
+ buffer_cur = attribute_buffer.attr;
+ unit.LoadInput(regs.gs, attribute_buffer);
+ return true;
+ }
+ return false;
+ }
+
+private:
+ const Regs& regs;
+ Shader::GSUnitState& unit;
+ Shader::AttributeBuffer attribute_buffer;
+ Math::Vec4<float24>* buffer_cur;
+ Math::Vec4<float24>* buffer_end;
+ unsigned int vs_output_num;
+};
+
+// In VariablePrimitive mode, vertex attributes are buffered into the uniform registers in the
+// geometry shader unit. The number of vertex is variable, which is specified by the first index
+// value in the batch. This mode is usually used for subdivision.
+class GeometryPipeline_VariablePrimitive : public GeometryPipelineBackend {
+public:
+ GeometryPipeline_VariablePrimitive(const Regs& regs, Shader::ShaderSetup& setup)
+ : regs(regs), setup(setup) {
+ ASSERT(regs.pipeline.variable_primitive == 1);
+ ASSERT(regs.gs.input_to_uniform == 1);
+ vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1;
+ }
+
+ bool IsEmpty() const override {
+ return need_index;
+ }
+
+ bool NeedIndexInput() const override {
+ return need_index;
+ }
+
+ void SubmitIndex(unsigned int val) override {
+ DEBUG_ASSERT(need_index);
+
+ // The number of vertex input is put to the uniform register
+ float24 vertex_num = float24::FromFloat32(val);
+ setup.uniforms.f[0] = Math::MakeVec(vertex_num, vertex_num, vertex_num, vertex_num);
+
+ // The second uniform register and so on are used for receiving input vertices
+ buffer_cur = setup.uniforms.f + 1;
+
+ main_vertex_num = regs.pipeline.variable_vertex_main_num_minus_1 + 1;
+ total_vertex_num = val;
+ need_index = false;
+ }
+
+ bool SubmitVertex(const Shader::AttributeBuffer& input) override {
+ DEBUG_ASSERT(!need_index);
+ if (main_vertex_num != 0) {
+ // For main vertices, receive all attributes
+ buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur);
+ --main_vertex_num;
+ } else {
+ // For other vertices, only receive the first attribute (usually the position)
+ *(buffer_cur++) = input.attr[0];
+ }
+ --total_vertex_num;
+
+ if (total_vertex_num == 0) {
+ need_index = true;
+ return true;
+ }
+
+ return false;
+ }
+
+private:
+ bool need_index = true;
+ const Regs& regs;
+ Shader::ShaderSetup& setup;
+ unsigned int main_vertex_num;
+ unsigned int total_vertex_num;
+ Math::Vec4<float24>* buffer_cur;
+ unsigned int vs_output_num;
+};
+
+// In FixedPrimitive mode, vertex attributes are buffered into the uniform registers in the geometry
+// shader unit. The number of vertex per shader invocation is constant. This is usually used for
+// particle system.
+class GeometryPipeline_FixedPrimitive : public GeometryPipelineBackend {
+public:
+ GeometryPipeline_FixedPrimitive(const Regs& regs, Shader::ShaderSetup& setup)
+ : regs(regs), setup(setup) {
+ ASSERT(regs.pipeline.variable_primitive == 0);
+ ASSERT(regs.gs.input_to_uniform == 1);
+ vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1;
+ ASSERT(vs_output_num == regs.pipeline.gs_config.stride_minus_1 + 1);
+ size_t vertex_num = regs.pipeline.gs_config.fixed_vertex_num_minus_1 + 1;
+ buffer_cur = buffer_begin = setup.uniforms.f + regs.pipeline.gs_config.start_index;
+ buffer_end = buffer_begin + vs_output_num * vertex_num;
+ }
+
+ bool IsEmpty() const override {
+ return buffer_cur == buffer_begin;
+ }
+
+ bool NeedIndexInput() const override {
+ return false;
+ }
+
+ void SubmitIndex(unsigned int val) override {
+ UNREACHABLE();
+ }
+
+ bool SubmitVertex(const Shader::AttributeBuffer& input) override {
+ buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur);
+ if (buffer_cur == buffer_end) {
+ buffer_cur = buffer_begin;
+ return true;
+ }
+ return false;
+ }
+
+private:
+ const Regs& regs;
+ Shader::ShaderSetup& setup;
+ Math::Vec4<float24>* buffer_begin;
+ Math::Vec4<float24>* buffer_cur;
+ Math::Vec4<float24>* buffer_end;
+ unsigned int vs_output_num;
+};
+
+GeometryPipeline::GeometryPipeline(State& state) : state(state) {}
+
+GeometryPipeline::~GeometryPipeline() = default;
+
+void GeometryPipeline::SetVertexHandler(Shader::VertexHandler vertex_handler) {
+ this->vertex_handler = vertex_handler;
+}
+
+void GeometryPipeline::Setup(Shader::ShaderEngine* shader_engine) {
+ if (!backend)
+ return;
+
+ this->shader_engine = shader_engine;
+ shader_engine->SetupBatch(state.gs, state.regs.gs.main_offset);
+}
+
+void GeometryPipeline::Reconfigure() {
+ ASSERT(!backend || backend->IsEmpty());
+
+ if (state.regs.pipeline.use_gs == PipelineRegs::UseGS::No) {
+ backend = nullptr;
+ return;
+ }
+
+ ASSERT(state.regs.pipeline.use_gs == PipelineRegs::UseGS::Yes);
+
+ // The following assumes that when geometry shader is in use, the shader unit 3 is configured as
+ // a geometry shader unit.
+ // TODO: what happens if this is not true?
+ ASSERT(state.regs.pipeline.gs_unit_exclusive_configuration == 1);
+ ASSERT(state.regs.gs.shader_mode == ShaderRegs::ShaderMode::GS);
+
+ state.gs_unit.ConfigOutput(state.regs.gs);
+
+ ASSERT(state.regs.pipeline.vs_outmap_total_minus_1_a ==
+ state.regs.pipeline.vs_outmap_total_minus_1_b);
+
+ switch (state.regs.pipeline.gs_config.mode) {
+ case PipelineRegs::GSMode::Point:
+ backend = std::make_unique<GeometryPipeline_Point>(state.regs, state.gs_unit);
+ break;
+ case PipelineRegs::GSMode::VariablePrimitive:
+ backend = std::make_unique<GeometryPipeline_VariablePrimitive>(state.regs, state.gs);
+ break;
+ case PipelineRegs::GSMode::FixedPrimitive:
+ backend = std::make_unique<GeometryPipeline_FixedPrimitive>(state.regs, state.gs);
+ break;
+ default:
+ UNREACHABLE();
+ }
+}
+
+bool GeometryPipeline::NeedIndexInput() const {
+ if (!backend)
+ return false;
+ return backend->NeedIndexInput();
+}
+
+void GeometryPipeline::SubmitIndex(unsigned int val) {
+ backend->SubmitIndex(val);
+}
+
+void GeometryPipeline::SubmitVertex(const Shader::AttributeBuffer& input) {
+ if (!backend) {
+ // No backend means the geometry shader is disabled, so we send the vertex shader output
+ // directly to the primitive assembler.
+ vertex_handler(input);
+ } else {
+ if (backend->SubmitVertex(input)) {
+ shader_engine->Run(state.gs, state.gs_unit);
+
+ // The uniform b15 is set to true after every geometry shader invocation. This is useful
+ // for the shader to know if this is the first invocation in a batch, if the program set
+ // b15 to false first.
+ state.gs.uniforms.b[15] = true;
+ }
+ }
+}
+
+} // namespace Pica
diff --git a/src/video_core/geometry_pipeline.h b/src/video_core/geometry_pipeline.h
new file mode 100644
index 000000000..91fdd3192
--- /dev/null
+++ b/src/video_core/geometry_pipeline.h
@@ -0,0 +1,49 @@
+// Copyright 2017 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include "video_core/shader/shader.h"
+
+namespace Pica {
+
+struct State;
+
+class GeometryPipelineBackend;
+
+/// A pipeline receiving from vertex shader and sending to geometry shader and primitive assembler
+class GeometryPipeline {
+public:
+ explicit GeometryPipeline(State& state);
+ ~GeometryPipeline();
+
+ /// Sets the handler for receiving vertex outputs from vertex shader
+ void SetVertexHandler(Shader::VertexHandler vertex_handler);
+
+ /**
+ * Setup the geometry shader unit if it is in use
+ * @param shader_engine the shader engine for the geometry shader to run
+ */
+ void Setup(Shader::ShaderEngine* shader_engine);
+
+ /// Reconfigures the pipeline according to current register settings
+ void Reconfigure();
+
+ /// Checks if the pipeline needs a direct input from index buffer
+ bool NeedIndexInput() const;
+
+ /// Submits an index from index buffer. Call this only when NeedIndexInput returns true
+ void SubmitIndex(unsigned int val);
+
+ /// Submits vertex attributes output from vertex shader
+ void SubmitVertex(const Shader::AttributeBuffer& input);
+
+private:
+ Shader::VertexHandler vertex_handler;
+ Shader::ShaderEngine* shader_engine;
+ std::unique_ptr<GeometryPipelineBackend> backend;
+ State& state;
+};
+} // namespace Pica
diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp
index b95148a6a..218e06883 100644
--- a/src/video_core/pica.cpp
+++ b/src/video_core/pica.cpp
@@ -3,9 +3,11 @@
// Refer to the license.txt file included.
#include <cstring>
+#include "video_core/geometry_pipeline.h"
#include "video_core/pica.h"
#include "video_core/pica_state.h"
-#include "video_core/regs_pipeline.h"
+#include "video_core/renderer_base.h"
+#include "video_core/video_core.h"
namespace Pica {
@@ -24,6 +26,23 @@ void Zero(T& o) {
memset(&o, 0, sizeof(o));
}
+State::State() : geometry_pipeline(*this) {
+ auto SubmitVertex = [this](const Shader::AttributeBuffer& vertex) {
+ using Pica::Shader::OutputVertex;
+ auto AddTriangle = [this](const OutputVertex& v0, const OutputVertex& v1,
+ const OutputVertex& v2) {
+ VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
+ };
+ primitive_assembler.SubmitVertex(
+ Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, vertex), AddTriangle);
+ };
+
+ auto SetWinding = [this]() { primitive_assembler.SetWinding(); };
+
+ g_state.gs_unit.SetVertexHandler(SubmitVertex, SetWinding);
+ g_state.geometry_pipeline.SetVertexHandler(SubmitVertex);
+}
+
void State::Reset() {
Zero(regs);
Zero(vs);
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h
index 864a2c9e6..c6634a0bc 100644
--- a/src/video_core/pica_state.h
+++ b/src/video_core/pica_state.h
@@ -8,6 +8,7 @@
#include "common/bit_field.h"
#include "common/common_types.h"
#include "common/vector_math.h"
+#include "video_core/geometry_pipeline.h"
#include "video_core/primitive_assembly.h"
#include "video_core/regs.h"
#include "video_core/shader/shader.h"
@@ -16,6 +17,7 @@ namespace Pica {
/// Struct used to describe current Pica state
struct State {
+ State();
void Reset();
/// Pica registers
@@ -137,8 +139,17 @@ struct State {
Shader::AttributeBuffer input_vertex;
// Index of the next attribute to be loaded into `input_vertex`.
u32 current_attribute = 0;
+ // Indicates the immediate mode just started and the geometry pipeline needs to reconfigure
+ bool reset_geometry_pipeline = true;
} immediate;
+ // the geometry shader needs to be kept in the global state because some shaders relie on
+ // preserved register value across shader invocation.
+ // TODO: also bring the three vertex shader units here and implement the shader scheduler.
+ Shader::GSUnitState gs_unit;
+
+ GeometryPipeline geometry_pipeline;
+
// This is constructed with a dummy triangle topology
PrimitiveAssembler<Shader::OutputVertex> primitive_assembler;
};
diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp
index acd2ac5e2..9c3dd4cab 100644
--- a/src/video_core/primitive_assembly.cpp
+++ b/src/video_core/primitive_assembly.cpp
@@ -17,15 +17,18 @@ template <typename VertexType>
void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx,
TriangleHandler triangle_handler) {
switch (topology) {
- // TODO: Figure out what's different with TriangleTopology::Shader.
case PipelineRegs::TriangleTopology::List:
case PipelineRegs::TriangleTopology::Shader:
if (buffer_index < 2) {
buffer[buffer_index++] = vtx;
} else {
buffer_index = 0;
-
- triangle_handler(buffer[0], buffer[1], vtx);
+ if (topology == PipelineRegs::TriangleTopology::Shader && winding) {
+ triangle_handler(buffer[1], buffer[0], vtx);
+ winding = false;
+ } else {
+ triangle_handler(buffer[0], buffer[1], vtx);
+ }
}
break;
@@ -51,9 +54,15 @@ void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx,
}
template <typename VertexType>
+void PrimitiveAssembler<VertexType>::SetWinding() {
+ winding = true;
+}
+
+template <typename VertexType>
void PrimitiveAssembler<VertexType>::Reset() {
buffer_index = 0;
strip_ready = false;
+ winding = false;
}
template <typename VertexType>
diff --git a/src/video_core/primitive_assembly.h b/src/video_core/primitive_assembly.h
index e8eccdf27..12de8e3b9 100644
--- a/src/video_core/primitive_assembly.h
+++ b/src/video_core/primitive_assembly.h
@@ -30,6 +30,12 @@ struct PrimitiveAssembler {
void SubmitVertex(const VertexType& vtx, TriangleHandler triangle_handler);
/**
+ * Invert the vertex order of the next triangle. Called by geometry shader emitter.
+ * This only takes effect for TriangleTopology::Shader.
+ */
+ void SetWinding();
+
+ /**
* Resets the internal state of the PrimitiveAssembler.
*/
void Reset();
@@ -45,6 +51,7 @@ private:
int buffer_index;
VertexType buffer[2];
bool strip_ready = false;
+ bool winding = false;
};
} // namespace
diff --git a/src/video_core/regs_pipeline.h b/src/video_core/regs_pipeline.h
index 8b6369297..e78c3e331 100644
--- a/src/video_core/regs_pipeline.h
+++ b/src/video_core/regs_pipeline.h
@@ -147,7 +147,15 @@ struct PipelineRegs {
// Number of vertices to render
u32 num_vertices;
- INSERT_PADDING_WORDS(0x1);
+ enum class UseGS : u32 {
+ No = 0,
+ Yes = 2,
+ };
+
+ union {
+ BitField<0, 2, UseGS> use_gs;
+ BitField<31, 1, u32> variable_primitive;
+ };
// The index of the first vertex to render
u32 vertex_offset;
@@ -218,7 +226,29 @@ struct PipelineRegs {
GPUMode gpu_mode;
- INSERT_PADDING_WORDS(0x18);
+ INSERT_PADDING_WORDS(0x4);
+ BitField<0, 4, u32> vs_outmap_total_minus_1_a;
+ INSERT_PADDING_WORDS(0x6);
+ BitField<0, 4, u32> vs_outmap_total_minus_1_b;
+
+ enum class GSMode : u32 {
+ Point = 0,
+ VariablePrimitive = 1,
+ FixedPrimitive = 2,
+ };
+
+ union {
+ BitField<0, 8, GSMode> mode;
+ BitField<8, 4, u32> fixed_vertex_num_minus_1;
+ BitField<12, 4, u32> stride_minus_1;
+ BitField<16, 4, u32> start_index;
+ } gs_config;
+
+ INSERT_PADDING_WORDS(0x1);
+
+ u32 variable_vertex_main_num_minus_1;
+
+ INSERT_PADDING_WORDS(0x9);
enum class TriangleTopology : u32 {
List = 0,
diff --git a/src/video_core/regs_rasterizer.h b/src/video_core/regs_rasterizer.h
index 2874fd127..4fef00d76 100644
--- a/src/video_core/regs_rasterizer.h
+++ b/src/video_core/regs_rasterizer.h
@@ -5,10 +5,10 @@
#pragma once
#include <array>
-
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
+#include "video_core/pica_types.h"
namespace Pica {
@@ -31,7 +31,17 @@ struct RasterizerRegs {
BitField<0, 24, u32> viewport_size_y;
- INSERT_PADDING_WORDS(0x9);
+ INSERT_PADDING_WORDS(0x3);
+
+ BitField<0, 1, u32> clip_enable;
+ BitField<0, 24, u32> clip_coef[4]; // float24
+
+ Math::Vec4<float24> GetClipCoef() const {
+ return {float24::FromRaw(clip_coef[0]), float24::FromRaw(clip_coef[1]),
+ float24::FromRaw(clip_coef[2]), float24::FromRaw(clip_coef[3])};
+ }
+
+ INSERT_PADDING_WORDS(0x1);
BitField<0, 24, u32> viewport_depth_range; // float24
BitField<0, 24, u32> viewport_depth_near_plane; // float24
diff --git a/src/video_core/regs_shader.h b/src/video_core/regs_shader.h
index ddb1ee451..c15d4d162 100644
--- a/src/video_core/regs_shader.h
+++ b/src/video_core/regs_shader.h
@@ -24,9 +24,16 @@ struct ShaderRegs {
INSERT_PADDING_WORDS(0x4);
+ enum ShaderMode {
+ GS = 0x08,
+ VS = 0xA0,
+ };
+
union {
// Number of input attributes to shader unit - 1
BitField<0, 4, u32> max_input_attribute_index;
+ BitField<8, 8, u32> input_to_uniform;
+ BitField<24, 8, ShaderMode> shader_mode;
};
// Offset to shader program entry point (in words)
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index aa95ef21d..7b0cd1b66 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -169,6 +169,8 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, proctex_diff_lut_buffer.handle);
// Sync fixed function OpenGL state
+ SyncClipEnabled();
+ SyncClipCoef();
SyncCullMode();
SyncBlendEnabled();
SyncBlendFuncs();
@@ -401,6 +403,18 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
SyncCullMode();
break;
+ // Clipping plane
+ case PICA_REG_INDEX(rasterizer.clip_enable):
+ SyncClipEnabled();
+ break;
+
+ case PICA_REG_INDEX_WORKAROUND(rasterizer.clip_coef[0], 0x48):
+ case PICA_REG_INDEX_WORKAROUND(rasterizer.clip_coef[1], 0x49):
+ case PICA_REG_INDEX_WORKAROUND(rasterizer.clip_coef[2], 0x4a):
+ case PICA_REG_INDEX_WORKAROUND(rasterizer.clip_coef[3], 0x4b):
+ SyncClipCoef();
+ break;
+
// Depth modifiers
case PICA_REG_INDEX(rasterizer.viewport_depth_range):
SyncDepthScale();
@@ -1280,6 +1294,20 @@ void RasterizerOpenGL::SetShader() {
}
}
+void RasterizerOpenGL::SyncClipEnabled() {
+ state.clip_distance[1] = Pica::g_state.regs.rasterizer.clip_enable != 0;
+}
+
+void RasterizerOpenGL::SyncClipCoef() {
+ const auto raw_clip_coef = Pica::g_state.regs.rasterizer.GetClipCoef();
+ const GLvec4 new_clip_coef = {raw_clip_coef.x.ToFloat32(), raw_clip_coef.y.ToFloat32(),
+ raw_clip_coef.z.ToFloat32(), raw_clip_coef.w.ToFloat32()};
+ if (new_clip_coef != uniform_block_data.data.clip_coef) {
+ uniform_block_data.data.clip_coef = new_clip_coef;
+ uniform_block_data.dirty = true;
+ }
+}
+
void RasterizerOpenGL::SyncCullMode() {
const auto& regs = Pica::g_state.regs;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 78e218efe..46c62961c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -151,14 +151,21 @@ private:
LightSrc light_src[8];
alignas(16) GLvec4 const_color[6]; // A vec4 color for each of the six tev stages
alignas(16) GLvec4 tev_combiner_buffer_color;
+ alignas(16) GLvec4 clip_coef;
};
static_assert(
- sizeof(UniformData) == 0x460,
+ sizeof(UniformData) == 0x470,
"The size of the UniformData structure has changed, update the structure in the shader");
static_assert(sizeof(UniformData) < 16384,
"UniformData structure must be less than 16kb as per the OpenGL spec");
+ /// Syncs the clip enabled status to match the PICA register
+ void SyncClipEnabled();
+
+ /// Syncs the clip coefficients to match the PICA register
+ void SyncClipCoef();
+
/// Sets the OpenGL shader in accordance with the current PICA register state
void SetShader();
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 015e69da9..9fe183944 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -8,6 +8,7 @@
#include "common/assert.h"
#include "common/bit_field.h"
#include "common/logging/log.h"
+#include "core/core.h"
#include "video_core/regs_framebuffer.h"
#include "video_core/regs_lighting.h"
#include "video_core/regs_rasterizer.h"
@@ -24,6 +25,42 @@ using TevStageConfig = TexturingRegs::TevStageConfig;
namespace GLShader {
+static const std::string UniformBlockDef = R"(
+#define NUM_TEV_STAGES 6
+#define NUM_LIGHTS 8
+
+struct LightSrc {
+ vec3 specular_0;
+ vec3 specular_1;
+ vec3 diffuse;
+ vec3 ambient;
+ vec3 position;
+ vec3 spot_direction;
+ float dist_atten_bias;
+ float dist_atten_scale;
+};
+
+layout (std140) uniform shader_data {
+ vec2 framebuffer_scale;
+ int alphatest_ref;
+ float depth_scale;
+ float depth_offset;
+ int scissor_x1;
+ int scissor_y1;
+ int scissor_x2;
+ int scissor_y2;
+ vec3 fog_color;
+ vec2 proctex_noise_f;
+ vec2 proctex_noise_a;
+ vec2 proctex_noise_p;
+ vec3 lighting_global_ambient;
+ LightSrc light_src[NUM_LIGHTS];
+ vec4 const_color[NUM_TEV_STAGES];
+ vec4 tev_combiner_buffer_color;
+ vec4 clip_coef;
+};
+)";
+
PicaShaderConfig PicaShaderConfig::BuildFromRegs(const Pica::Regs& regs) {
PicaShaderConfig res;
@@ -594,8 +631,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
// Note: even if the normal vector is modified by normal map, which is not the
// normal of the tangent plane anymore, the half angle vector is still projected
// using the modified normal vector.
- std::string half_angle_proj = "normalize(half_vector) - normal / dot(normal, "
- "normal) * dot(normal, normalize(half_vector))";
+ std::string half_angle_proj =
+ "normalize(half_vector) - normal * dot(normal, normalize(half_vector))";
// Note: the half angle vector projection is confirmed not normalized before the dot
// product. The result is in fact not cos(phi) as the name suggested.
index = "dot(" + half_angle_proj + ", tangent)";
@@ -750,7 +787,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
}
// Fresnel
- if (lighting.lut_fr.enable &&
+ // Note: only the last entry in the light slots applies the Fresnel factor
+ if (light_index == lighting.src_num - 1 && lighting.lut_fr.enable &&
LightingRegs::IsLightingSamplerSupported(lighting.config,
LightingRegs::LightingSampler::Fresnel)) {
// Lookup fresnel LUT value
@@ -759,17 +797,17 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
lighting.lut_fr.type, lighting.lut_fr.abs_input);
value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + value + ")";
- // Enabled for difffuse lighting alpha component
+ // Enabled for diffuse lighting alpha component
if (lighting.fresnel_selector == LightingRegs::LightingFresnelSelector::PrimaryAlpha ||
lighting.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) {
- out += "diffuse_sum.a *= " + value + ";\n";
+ out += "diffuse_sum.a = " + value + ";\n";
}
// Enabled for the specular lighting alpha component
if (lighting.fresnel_selector ==
LightingRegs::LightingFresnelSelector::SecondaryAlpha ||
lighting.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) {
- out += "specular_sum.a *= " + value + ";\n";
+ out += "specular_sum.a = " + value + ";\n";
}
}
@@ -1008,8 +1046,6 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) {
std::string out = R"(
#version 330 core
-#define NUM_TEV_STAGES 6
-#define NUM_LIGHTS 8
in vec4 primary_color;
in vec2 texcoord[3];
@@ -1021,36 +1057,6 @@ in vec4 gl_FragCoord;
out vec4 color;
-struct LightSrc {
- vec3 specular_0;
- vec3 specular_1;
- vec3 diffuse;
- vec3 ambient;
- vec3 position;
- vec3 spot_direction;
- float dist_atten_bias;
- float dist_atten_scale;
-};
-
-layout (std140) uniform shader_data {
- vec2 framebuffer_scale;
- int alphatest_ref;
- float depth_scale;
- float depth_offset;
- int scissor_x1;
- int scissor_y1;
- int scissor_x2;
- int scissor_y2;
- vec3 fog_color;
- vec2 proctex_noise_f;
- vec2 proctex_noise_a;
- vec2 proctex_noise_p;
- vec3 lighting_global_ambient;
- LightSrc light_src[NUM_LIGHTS];
- vec4 const_color[NUM_TEV_STAGES];
- vec4 tev_combiner_buffer_color;
-};
-
uniform sampler2D tex[3];
uniform samplerBuffer lighting_lut;
uniform samplerBuffer fog_lut;
@@ -1059,7 +1065,11 @@ uniform samplerBuffer proctex_color_map;
uniform samplerBuffer proctex_alpha_map;
uniform samplerBuffer proctex_lut;
uniform samplerBuffer proctex_diff_lut;
+)";
+
+ out += UniformBlockDef;
+ out += R"(
// Rotate the vector v by the quaternion q
vec3 quaternion_rotate(vec4 q, vec3 v) {
return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v);
@@ -1155,6 +1165,11 @@ vec4 secondary_fragment_color = vec4(0.0);
// Blend the fog
out += "last_tex_env_out.rgb = mix(fog_color.rgb, last_tex_env_out.rgb, fog_factor);\n";
+ } else if (state.fog_mode == TexturingRegs::FogMode::Gas) {
+ Core::Telemetry().AddField(Telemetry::FieldType::Session, "VideoCore_Pica_UseGasMode",
+ true);
+ LOG_CRITICAL(Render_OpenGL, "Unimplemented gas mode");
+ UNIMPLEMENTED();
}
out += "gl_FragDepth = depth;\n";
@@ -1190,6 +1205,12 @@ out float texcoord0_w;
out vec4 normquat;
out vec3 view;
+)";
+
+ out += UniformBlockDef;
+
+ out += R"(
+
void main() {
primary_color = vert_color;
texcoord[0] = vert_texcoord0;
@@ -1200,7 +1221,7 @@ void main() {
view = vert_view;
gl_Position = vert_position;
gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0
- // TODO (wwylele): calculate gl_ClipDistance[1] from user-defined clipping plane
+ gl_ClipDistance[1] = dot(clip_coef, vert_position);
}
)";
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 67ed19ba8..e9063e616 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -21,7 +21,8 @@ namespace Pica {
namespace Shader {
-OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs, AttributeBuffer& input) {
+OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs,
+ const AttributeBuffer& input) {
// Setup output data
union {
OutputVertex ret{};
@@ -82,6 +83,44 @@ void UnitState::WriteOutput(const ShaderRegs& config, AttributeBuffer& output) {
}
}
+UnitState::UnitState(GSEmitter* emitter) : emitter_ptr(emitter) {}
+
+GSEmitter::GSEmitter() {
+ handlers = new Handlers;
+}
+
+GSEmitter::~GSEmitter() {
+ delete handlers;
+}
+
+void GSEmitter::Emit(Math::Vec4<float24> (&vertex)[16]) {
+ ASSERT(vertex_id < 3);
+ std::copy(std::begin(vertex), std::end(vertex), buffer[vertex_id].begin());
+ if (prim_emit) {
+ if (winding)
+ handlers->winding_setter();
+ for (size_t i = 0; i < buffer.size(); ++i) {
+ AttributeBuffer output;
+ unsigned int output_i = 0;
+ for (unsigned int reg : Common::BitSet<u32>(output_mask)) {
+ output.attr[output_i++] = buffer[i][reg];
+ }
+ handlers->vertex_handler(output);
+ }
+ }
+}
+
+GSUnitState::GSUnitState() : UnitState(&emitter) {}
+
+void GSUnitState::SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter) {
+ emitter.handlers->vertex_handler = std::move(vertex_handler);
+ emitter.handlers->winding_setter = std::move(winding_setter);
+}
+
+void GSUnitState::ConfigOutput(const ShaderRegs& config) {
+ emitter.output_mask = config.output_mask;
+}
+
MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
#ifdef ARCHITECTURE_x86_64
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index e156f6aef..a3789da01 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -6,6 +6,7 @@
#include <array>
#include <cstddef>
+#include <functional>
#include <type_traits>
#include <nihstro/shader_bytecode.h>
#include "common/assert.h"
@@ -31,6 +32,12 @@ struct AttributeBuffer {
alignas(16) Math::Vec4<float24> attr[16];
};
+/// Handler type for receiving vertex outputs from vertex shader or geometry shader
+using VertexHandler = std::function<void(const AttributeBuffer&)>;
+
+/// Handler type for signaling to invert the vertex order of the next triangle
+using WindingSetter = std::function<void()>;
+
struct OutputVertex {
Math::Vec4<float24> pos;
Math::Vec4<float24> quat;
@@ -43,7 +50,8 @@ struct OutputVertex {
INSERT_PADDING_WORDS(1);
Math::Vec2<float24> tc2;
- static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs, AttributeBuffer& output);
+ static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs,
+ const AttributeBuffer& output);
};
#define ASSERT_POS(var, pos) \
static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong " \
@@ -61,12 +69,36 @@ static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size");
/**
+ * This structure contains state information for primitive emitting in geometry shader.
+ */
+struct GSEmitter {
+ std::array<std::array<Math::Vec4<float24>, 16>, 3> buffer;
+ u8 vertex_id;
+ bool prim_emit;
+ bool winding;
+ u32 output_mask;
+
+ // Function objects are hidden behind a raw pointer to make the structure standard layout type,
+ // for JIT to use offsetof to access other members.
+ struct Handlers {
+ VertexHandler vertex_handler;
+ WindingSetter winding_setter;
+ } * handlers;
+
+ GSEmitter();
+ ~GSEmitter();
+ void Emit(Math::Vec4<float24> (&vertex)[16]);
+};
+static_assert(std::is_standard_layout<GSEmitter>::value, "GSEmitter is not standard layout type");
+
+/**
* This structure contains the state information that needs to be unique for a shader unit. The 3DS
* has four shader units that process shaders in parallel. At the present, Citra only implements a
* single shader unit that processes all shaders serially. Putting the state information in a struct
* here will make it easier for us to parallelize the shader processing later.
*/
struct UnitState {
+ explicit UnitState(GSEmitter* emitter = nullptr);
struct Registers {
// The registers are accessed by the shader JIT using SSE instructions, and are therefore
// required to be 16-byte aligned.
@@ -82,6 +114,8 @@ struct UnitState {
// TODO: How many bits do these actually have?
s32 address_registers[3];
+ GSEmitter* emitter_ptr;
+
static size_t InputOffset(const SourceRegister& reg) {
switch (reg.GetRegisterType()) {
case RegisterType::Input:
@@ -125,6 +159,19 @@ struct UnitState {
void WriteOutput(const ShaderRegs& config, AttributeBuffer& output);
};
+/**
+ * This is an extended shader unit state that represents the special unit that can run both vertex
+ * shader and geometry shader. It contains an additional primitive emitter and utilities for
+ * geometry shader.
+ */
+struct GSUnitState : public UnitState {
+ GSUnitState();
+ void SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter);
+ void ConfigOutput(const ShaderRegs& config);
+
+ GSEmitter emitter;
+};
+
struct ShaderSetup {
struct {
// The float uniforms are accessed by the shader JIT using SSE instructions, and are
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index 206c0978a..9d4da4904 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -636,6 +636,22 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
break;
}
+ case OpCode::Id::EMIT: {
+ GSEmitter* emitter = state.emitter_ptr;
+ ASSERT_MSG(emitter, "Execute EMIT on VS");
+ emitter->Emit(state.registers.output);
+ break;
+ }
+
+ case OpCode::Id::SETEMIT: {
+ GSEmitter* emitter = state.emitter_ptr;
+ ASSERT_MSG(emitter, "Execute SETEMIT on VS");
+ emitter->vertex_id = instr.setemit.vertex_id;
+ emitter->prim_emit = instr.setemit.prim_emit != 0;
+ emitter->winding = instr.setemit.winding != 0;
+ break;
+ }
+
default:
LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
(int)instr.opcode.Value().EffectiveOpCode(),
diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp
index 42a57aab1..1b31623bd 100644
--- a/src/video_core/shader/shader_jit_x64_compiler.cpp
+++ b/src/video_core/shader/shader_jit_x64_compiler.cpp
@@ -75,8 +75,8 @@ const JitFunction instr_table[64] = {
&JitShader::Compile_IF, // ifu
&JitShader::Compile_IF, // ifc
&JitShader::Compile_LOOP, // loop
- nullptr, // emit
- nullptr, // sete
+ &JitShader::Compile_EMIT, // emit
+ &JitShader::Compile_SETE, // sete
&JitShader::Compile_JMP, // jmpc
&JitShader::Compile_JMP, // jmpu
&JitShader::Compile_CMP, // cmp
@@ -772,6 +772,51 @@ void JitShader::Compile_JMP(Instruction instr) {
}
}
+static void Emit(GSEmitter* emitter, Math::Vec4<float24> (*output)[16]) {
+ emitter->Emit(*output);
+}
+
+void JitShader::Compile_EMIT(Instruction instr) {
+ Label have_emitter, end;
+ mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]);
+ test(rax, rax);
+ jnz(have_emitter);
+
+ ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+ mov(ABI_PARAM1, reinterpret_cast<size_t>("Execute EMIT on VS"));
+ CallFarFunction(*this, LogCritical);
+ ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+ jmp(end);
+
+ L(have_emitter);
+ ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+ mov(ABI_PARAM1, rax);
+ mov(ABI_PARAM2, STATE);
+ add(ABI_PARAM2, static_cast<Xbyak::uint32>(offsetof(UnitState, registers.output)));
+ CallFarFunction(*this, Emit);
+ ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+ L(end);
+}
+
+void JitShader::Compile_SETE(Instruction instr) {
+ Label have_emitter, end;
+ mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]);
+ test(rax, rax);
+ jnz(have_emitter);
+
+ ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+ mov(ABI_PARAM1, reinterpret_cast<size_t>("Execute SETEMIT on VS"));
+ CallFarFunction(*this, LogCritical);
+ ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+ jmp(end);
+
+ L(have_emitter);
+ mov(byte[rax + offsetof(GSEmitter, vertex_id)], instr.setemit.vertex_id);
+ mov(byte[rax + offsetof(GSEmitter, prim_emit)], instr.setemit.prim_emit);
+ mov(byte[rax + offsetof(GSEmitter, winding)], instr.setemit.winding);
+ L(end);
+}
+
void JitShader::Compile_Block(unsigned end) {
while (program_counter < end) {
Compile_NextInstr();
diff --git a/src/video_core/shader/shader_jit_x64_compiler.h b/src/video_core/shader/shader_jit_x64_compiler.h
index 31af0ca48..4aee56b1d 100644
--- a/src/video_core/shader/shader_jit_x64_compiler.h
+++ b/src/video_core/shader/shader_jit_x64_compiler.h
@@ -66,6 +66,8 @@ public:
void Compile_JMP(Instruction instr);
void Compile_CMP(Instruction instr);
void Compile_MAD(Instruction instr);
+ void Compile_EMIT(Instruction instr);
+ void Compile_SETE(Instruction instr);
private:
void Compile_Block(unsigned end);
diff --git a/src/video_core/swrasterizer/clipper.cpp b/src/video_core/swrasterizer/clipper.cpp
index cdbc71502..a52129eb7 100644
--- a/src/video_core/swrasterizer/clipper.cpp
+++ b/src/video_core/swrasterizer/clipper.cpp
@@ -31,7 +31,7 @@ public:
: coeffs(coeffs), bias(bias) {}
bool IsInside(const Vertex& vertex) const {
- return Math::Dot(vertex.pos + bias, coeffs) <= float24::FromFloat32(0);
+ return Math::Dot(vertex.pos + bias, coeffs) >= float24::FromFloat32(0);
}
bool IsOutSide(const Vertex& vertex) const {
@@ -116,19 +116,18 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu
static const float24 f0 = float24::FromFloat32(0.0);
static const float24 f1 = float24::FromFloat32(1.0);
static const std::array<ClippingEdge, 7> clipping_edges = {{
- {Math::MakeVec(f1, f0, f0, -f1)}, // x = +w
- {Math::MakeVec(-f1, f0, f0, -f1)}, // x = -w
- {Math::MakeVec(f0, f1, f0, -f1)}, // y = +w
- {Math::MakeVec(f0, -f1, f0, -f1)}, // y = -w
- {Math::MakeVec(f0, f0, f1, f0)}, // z = 0
- {Math::MakeVec(f0, f0, -f1, -f1)}, // z = -w
- {Math::MakeVec(f0, f0, f0, -f1), Math::Vec4<float24>(f0, f0, f0, EPSILON)}, // w = EPSILON
+ {Math::MakeVec(-f1, f0, f0, f1)}, // x = +w
+ {Math::MakeVec(f1, f0, f0, f1)}, // x = -w
+ {Math::MakeVec(f0, -f1, f0, f1)}, // y = +w
+ {Math::MakeVec(f0, f1, f0, f1)}, // y = -w
+ {Math::MakeVec(f0, f0, -f1, f0)}, // z = 0
+ {Math::MakeVec(f0, f0, f1, f1)}, // z = -w
+ {Math::MakeVec(f0, f0, f0, f1), Math::Vec4<float24>(f0, f0, f0, EPSILON)}, // w = EPSILON
}};
// Simple implementation of the Sutherland-Hodgman clipping algorithm.
// TODO: Make this less inefficient (currently lots of useless buffering overhead happens here)
- for (auto edge : clipping_edges) {
-
+ auto Clip = [&](const ClippingEdge& edge) {
std::swap(input_list, output_list);
output_list->clear();
@@ -147,12 +146,24 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu
}
reference_vertex = &vertex;
}
+ };
+
+ for (auto edge : clipping_edges) {
+ Clip(edge);
// Need to have at least a full triangle to continue...
if (output_list->size() < 3)
return;
}
+ if (g_state.regs.rasterizer.clip_enable) {
+ ClippingEdge custom_edge{g_state.regs.rasterizer.GetClipCoef()};
+ Clip(custom_edge);
+
+ if (output_list->size() < 3)
+ return;
+ }
+
InitScreenCoordinates((*output_list)[0]);
InitScreenCoordinates((*output_list)[1]);
diff --git a/src/video_core/swrasterizer/lighting.cpp b/src/video_core/swrasterizer/lighting.cpp
index 39a3e396d..5fa748611 100644
--- a/src/video_core/swrasterizer/lighting.cpp
+++ b/src/video_core/swrasterizer/lighting.cpp
@@ -22,18 +22,37 @@ static float LookupLightingLut(const Pica::State::Lighting& lighting, size_t lut
std::tuple<Math::Vec4<u8>, Math::Vec4<u8>> ComputeFragmentsColors(
const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state,
- const Math::Quaternion<float>& normquat, const Math::Vec3<float>& view) {
+ const Math::Quaternion<float>& normquat, const Math::Vec3<float>& view,
+ const Math::Vec4<u8> (&texture_color)[4]) {
- // TODO(Subv): Bump mapping
- Math::Vec3<float> surface_normal = {0.0f, 0.0f, 1.0f};
+ Math::Vec3<float> surface_normal;
+ Math::Vec3<float> surface_tangent;
if (lighting.config0.bump_mode != LightingRegs::LightingBumpMode::None) {
- LOG_CRITICAL(HW_GPU, "unimplemented bump mapping");
- UNIMPLEMENTED();
+ Math::Vec3<float> perturbation =
+ texture_color[lighting.config0.bump_selector].xyz().Cast<float>() / 127.5f -
+ Math::MakeVec(1.0f, 1.0f, 1.0f);
+ if (lighting.config0.bump_mode == LightingRegs::LightingBumpMode::NormalMap) {
+ if (!lighting.config0.disable_bump_renorm) {
+ const float z_square = 1 - perturbation.xy().Length2();
+ perturbation.z = std::sqrt(std::max(z_square, 0.0f));
+ }
+ surface_normal = perturbation;
+ surface_tangent = Math::MakeVec(1.0f, 0.0f, 0.0f);
+ } else if (lighting.config0.bump_mode == LightingRegs::LightingBumpMode::TangentMap) {
+ surface_normal = Math::MakeVec(0.0f, 0.0f, 1.0f);
+ surface_tangent = perturbation;
+ } else {
+ LOG_ERROR(HW_GPU, "Unknown bump mode %u", lighting.config0.bump_mode.Value());
+ }
+ } else {
+ surface_normal = Math::MakeVec(0.0f, 0.0f, 1.0f);
+ surface_tangent = Math::MakeVec(1.0f, 0.0f, 0.0f);
}
// Use the normalized the quaternion when performing the rotation
auto normal = Math::QuaternionRotate(normquat, surface_normal);
+ auto tangent = Math::QuaternionRotate(normquat, surface_tangent);
Math::Vec4<float> diffuse_sum = {0.0f, 0.0f, 0.0f, 1.0f};
Math::Vec4<float> specular_sum = {0.0f, 0.0f, 0.0f, 1.0f};
@@ -102,6 +121,16 @@ std::tuple<Math::Vec4<u8>, Math::Vec4<u8>> ComputeFragmentsColors(
result = Math::Dot(light_vector, spot_dir.Cast<float>() / 2047.0f);
break;
}
+ case LightingRegs::LightingLutInput::CP:
+ if (lighting.config0.config == LightingRegs::LightingConfig::Config7) {
+ const Math::Vec3<float> norm_half_vector = half_vector.Normalized();
+ const Math::Vec3<float> half_vector_proj =
+ norm_half_vector - normal * Math::Dot(normal, norm_half_vector);
+ result = Math::Dot(half_vector_proj, tangent);
+ } else {
+ result = 0.0f;
+ }
+ break;
default:
LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %u\n", static_cast<u32>(input));
UNIMPLEMENTED();
@@ -201,7 +230,8 @@ std::tuple<Math::Vec4<u8>, Math::Vec4<u8>> ComputeFragmentsColors(
d1_lut_value * refl_value * light_config.specular_1.ToVec3f();
// Fresnel
- if (lighting.config1.disable_lut_fr == 0 &&
+ // Note: only the last entry in the light slots applies the Fresnel factor
+ if (light_index == lighting.max_light_index && lighting.config1.disable_lut_fr == 0 &&
LightingRegs::IsLightingSamplerSupported(lighting.config0.config,
LightingRegs::LightingSampler::Fresnel)) {
@@ -213,14 +243,14 @@ std::tuple<Math::Vec4<u8>, Math::Vec4<u8>> ComputeFragmentsColors(
if (lighting.config0.fresnel_selector ==
LightingRegs::LightingFresnelSelector::PrimaryAlpha ||
lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) {
- diffuse_sum.a() *= lut_value;
+ diffuse_sum.a() = lut_value;
}
// Enabled for the specular lighting alpha component
if (lighting.config0.fresnel_selector ==
LightingRegs::LightingFresnelSelector::SecondaryAlpha ||
lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) {
- specular_sum.a() *= lut_value;
+ specular_sum.a() = lut_value;
}
}
diff --git a/src/video_core/swrasterizer/lighting.h b/src/video_core/swrasterizer/lighting.h
index 438dca926..d807a3d94 100644
--- a/src/video_core/swrasterizer/lighting.h
+++ b/src/video_core/swrasterizer/lighting.h
@@ -13,6 +13,7 @@ namespace Pica {
std::tuple<Math::Vec4<u8>, Math::Vec4<u8>> ComputeFragmentsColors(
const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state,
- const Math::Quaternion<float>& normquat, const Math::Vec3<float>& view);
+ const Math::Quaternion<float>& normquat, const Math::Vec3<float>& view,
+ const Math::Vec4<u8> (&texture_color)[4]);
} // namespace Pica
diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp
index fdc1df199..862135614 100644
--- a/src/video_core/swrasterizer/rasterizer.cpp
+++ b/src/video_core/swrasterizer/rasterizer.cpp
@@ -437,8 +437,8 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve
GetInterpolatedAttribute(v0.view.y, v1.view.y, v2.view.y).ToFloat32(),
GetInterpolatedAttribute(v0.view.z, v1.view.z, v2.view.z).ToFloat32(),
};
- std::tie(primary_fragment_color, secondary_fragment_color) =
- ComputeFragmentsColors(g_state.regs.lighting, g_state.lighting, normquat, view);
+ std::tie(primary_fragment_color, secondary_fragment_color) = ComputeFragmentsColors(
+ g_state.regs.lighting, g_state.lighting, normquat, view, texture_color);
}
for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size();