summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/citra/citra.cpp7
-rw-r--r--src/citra_qt/CMakeLists.txt2
-rw-r--r--src/citra_qt/bootmanager.cpp5
-rw-r--r--src/citra_qt/debugger/graphics.cpp4
-rw-r--r--src/citra_qt/debugger/graphics_cmdlists.cpp6
-rw-r--r--src/citra_qt/debugger/graphics_vertex_shader.cpp4
-rw-r--r--src/citra_qt/debugger/profiler.cpp202
-rw-r--r--src/citra_qt/debugger/profiler.h17
-rw-r--r--src/citra_qt/main.cpp40
-rw-r--r--src/citra_qt/main.h20
-rw-r--r--src/citra_qt/util/util.cpp13
-rw-r--r--src/citra_qt/util/util.h10
-rw-r--r--src/common/CMakeLists.txt3
-rw-r--r--src/common/common_funcs.h12
-rw-r--r--src/common/file_util.h2
-rw-r--r--src/common/logging/log.h15
-rw-r--r--src/common/microprofile.cpp7
-rw-r--r--src/common/microprofile.h25
-rw-r--r--src/common/microprofileui.h16
-rw-r--r--src/common/x64/emitter.cpp770
-rw-r--r--src/common/x64/emitter.h850
-rw-r--r--src/core/CMakeLists.txt4
-rw-r--r--src/core/arm/dyncom/arm_dyncom_interpreter.cpp103
-rw-r--r--src/core/arm/skyeye_common/armstate.cpp1
-rw-r--r--src/core/arm/skyeye_common/armsupp.cpp1
-rw-r--r--src/core/hle/config_mem.cpp7
-rw-r--r--src/core/hle/config_mem.h1
-rw-r--r--src/core/hle/function_wrappers.h8
-rw-r--r--src/core/hle/hle.cpp4
-rw-r--r--src/core/hle/kernel/kernel.cpp19
-rw-r--r--src/core/hle/kernel/memory.cpp136
-rw-r--r--src/core/hle/kernel/memory.h35
-rw-r--r--src/core/hle/kernel/process.cpp160
-rw-r--r--src/core/hle/kernel/process.h39
-rw-r--r--src/core/hle/kernel/resource_limit.cpp1
-rw-r--r--src/core/hle/kernel/shared_memory.cpp27
-rw-r--r--src/core/hle/kernel/shared_memory.h2
-rw-r--r--src/core/hle/kernel/thread.cpp4
-rw-r--r--src/core/hle/kernel/vm_manager.cpp118
-rw-r--r--src/core/hle/kernel/vm_manager.h38
-rw-r--r--src/core/hle/service/apt/apt.cpp21
-rw-r--r--src/core/hle/service/gsp_gpu.cpp31
-rw-r--r--src/core/hle/service/gsp_gpu.h11
-rw-r--r--src/core/hle/service/y2r_u.cpp1
-rw-r--r--src/core/hle/shared_page.cpp3
-rw-r--r--src/core/hle/shared_page.h1
-rw-r--r--src/core/hle/svc.cpp162
-rw-r--r--src/core/hw/gpu.cpp77
-rw-r--r--src/core/hw/gpu.h32
-rw-r--r--src/core/mem_map.cpp163
-rw-r--r--src/core/mem_map.h46
-rw-r--r--src/core/memory.cpp40
-rw-r--r--src/core/memory.h22
-rw-r--r--src/core/memory_setup.h3
-rw-r--r--src/core/system.cpp3
-rw-r--r--src/video_core/command_processor.cpp4
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp8
-rw-r--r--src/video_core/pica.h14
-rw-r--r--src/video_core/rasterizer.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp14
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp23
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h4
-rw-r--r--src/video_core/shader/shader.cpp5
-rw-r--r--src/video_core/shader/shader_interpreter.cpp79
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp252
-rw-r--r--src/video_core/shader/shader_jit_x64.h15
66 files changed, 2436 insertions, 1339 deletions
diff --git a/src/citra/citra.cpp b/src/citra/citra.cpp
index d6fcb66a5..46f4a07c9 100644
--- a/src/citra/citra.cpp
+++ b/src/citra/citra.cpp
@@ -6,6 +6,9 @@
#include <thread>
#include <iostream>
+// This needs to be included before getopt.h because the latter #defines symbols used by it
+#include "common/microprofile.h"
+
#ifdef _MSC_VER
#include <getopt.h>
#else
@@ -59,6 +62,8 @@ int main(int argc, char **argv) {
Log::Filter log_filter(Log::Level::Debug);
Log::SetFilter(&log_filter);
+ MicroProfileOnThreadCreate("EmuThread");
+
if (boot_filename.empty()) {
LOG_CRITICAL(Frontend, "Failed to load ROM: No ROM specified");
return -1;
@@ -89,5 +94,7 @@ int main(int argc, char **argv) {
delete emu_window;
+ MicroProfileShutdown();
+
return 0;
}
diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt
index 0c0515054..a82e8a85b 100644
--- a/src/citra_qt/CMakeLists.txt
+++ b/src/citra_qt/CMakeLists.txt
@@ -18,6 +18,7 @@ set(SRCS
debugger/ramview.cpp
debugger/registers.cpp
util/spinbox.cpp
+ util/util.cpp
bootmanager.cpp
hotkeys.cpp
main.cpp
@@ -42,6 +43,7 @@ set(HEADERS
debugger/ramview.h
debugger/registers.h
util/spinbox.h
+ util/util.h
bootmanager.h
hotkeys.h
main.h
diff --git a/src/citra_qt/bootmanager.cpp b/src/citra_qt/bootmanager.cpp
index a96fbea5f..f8aacb527 100644
--- a/src/citra_qt/bootmanager.cpp
+++ b/src/citra_qt/bootmanager.cpp
@@ -14,6 +14,7 @@
#include "common/string_util.h"
#include "common/scm_rev.h"
#include "common/key_map.h"
+#include "common/microprofile.h"
#include "core/core.h"
#include "core/settings.h"
@@ -37,6 +38,8 @@ EmuThread::EmuThread(GRenderWindow* render_window) :
void EmuThread::run() {
render_window->MakeCurrent();
+ MicroProfileOnThreadCreate("EmuThread");
+
stop_run = false;
// holds whether the cpu was running during the last iteration,
@@ -69,6 +72,8 @@ void EmuThread::run() {
}
}
+ MicroProfileOnThreadExit();
+
render_window->moveContext();
}
diff --git a/src/citra_qt/debugger/graphics.cpp b/src/citra_qt/debugger/graphics.cpp
index 7424671f1..7d15028f0 100644
--- a/src/citra_qt/debugger/graphics.cpp
+++ b/src/citra_qt/debugger/graphics.cpp
@@ -7,6 +7,8 @@
#include <QVBoxLayout>
#include <QDebug>
+#include "citra_qt/util/util.h"
+
extern GraphicsDebugger g_debugger;
GPUCommandStreamItemModel::GPUCommandStreamItemModel(QObject* parent) : QAbstractListModel(parent), command_count(0)
@@ -79,7 +81,7 @@ GPUCommandStreamWidget::GPUCommandStreamWidget(QWidget* parent) : QDockWidget(tr
QListView* command_list = new QListView;
command_list->setModel(command_model);
- command_list->setFont(QFont("monospace"));
+ command_list->setFont(GetMonospaceFont());
setWidget(command_list);
}
diff --git a/src/citra_qt/debugger/graphics_cmdlists.cpp b/src/citra_qt/debugger/graphics_cmdlists.cpp
index 35a3140b2..025434687 100644
--- a/src/citra_qt/debugger/graphics_cmdlists.cpp
+++ b/src/citra_qt/debugger/graphics_cmdlists.cpp
@@ -14,6 +14,8 @@
#include <QSpinBox>
#include <QComboBox>
+#include "citra_qt/util/util.h"
+
#include "common/vector_math.h"
#include "video_core/debug_utils/debug_utils.h"
@@ -303,9 +305,7 @@ GPUCommandListWidget::GPUCommandListWidget(QWidget* parent) : QDockWidget(tr("Pi
list_widget = new QTreeView;
list_widget->setModel(model);
- QFont font("monospace");
- font.setStyleHint(QFont::Monospace); // Automatic fallback to a monospace font on on platforms without a font called "monospace"
- list_widget->setFont(font);
+ list_widget->setFont(GetMonospaceFont());
list_widget->setRootIsDecorated(false);
list_widget->setUniformRowHeights(true);
diff --git a/src/citra_qt/debugger/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics_vertex_shader.cpp
index 0c17edee0..1d9a00e89 100644
--- a/src/citra_qt/debugger/graphics_vertex_shader.cpp
+++ b/src/citra_qt/debugger/graphics_vertex_shader.cpp
@@ -15,6 +15,8 @@
#include <QSpinBox>
#include <QTreeView>
+#include "citra_qt/util/util.h"
+
#include "video_core/shader/shader.h"
#include "graphics_vertex_shader.h"
@@ -245,7 +247,7 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con
}
case Qt::FontRole:
- return QFont("monospace");
+ return GetMonospaceFont();
case Qt::BackgroundRole:
// Highlight instructions which have no debug data associated to them
diff --git a/src/citra_qt/debugger/profiler.cpp b/src/citra_qt/debugger/profiler.cpp
index 89b28c2f4..5261d4836 100644
--- a/src/citra_qt/debugger/profiler.cpp
+++ b/src/citra_qt/debugger/profiler.cpp
@@ -2,9 +2,21 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <QMouseEvent>
+#include <QPainter>
+#include <QString>
+
#include "profiler.h"
+#include "citra_qt/util/util.h"
+
#include "common/profiler_reporting.h"
+#include "common/microprofile.h"
+
+// Include the implementation of the UI in this file. This isn't in microprofile.cpp because the
+// non-Qt frontends don't need it (and don't implement the UI drawing hooks either).
+#define MICROPROFILEUI_IMPL 1
+#include "common/microprofileui.h"
using namespace Common::Profiling;
@@ -136,3 +148,193 @@ void ProfilerWidget::setProfilingInfoUpdateEnabled(bool enable)
update_timer.stop();
}
}
+
+class MicroProfileWidget : public QWidget {
+public:
+ MicroProfileWidget(QWidget* parent = 0);
+
+protected:
+ void paintEvent(QPaintEvent* ev) override;
+ void showEvent(QShowEvent* ev) override;
+ void hideEvent(QHideEvent* ev) override;
+
+ void mouseMoveEvent(QMouseEvent* ev) override;
+ void mousePressEvent(QMouseEvent* ev) override;
+ void mouseReleaseEvent(QMouseEvent* ev) override;
+ void wheelEvent(QWheelEvent* ev) override;
+
+ void keyPressEvent(QKeyEvent* ev) override;
+ void keyReleaseEvent(QKeyEvent* ev) override;
+
+private:
+ /// This timer is used to redraw the widget's contents continuously. To save resources, it only
+ /// runs while the widget is visible.
+ QTimer update_timer;
+};
+
+MicroProfileDialog::MicroProfileDialog(QWidget* parent)
+ : QWidget(parent, Qt::Dialog)
+{
+ setObjectName("MicroProfile");
+ setWindowTitle(tr("MicroProfile"));
+ resize(1000, 600);
+ // Remove the "?" button from the titlebar and enable the maximize button
+ setWindowFlags(windowFlags() & ~Qt::WindowContextHelpButtonHint | Qt::WindowMaximizeButtonHint);
+
+ MicroProfileWidget* widget = new MicroProfileWidget(this);
+
+ QLayout* layout = new QVBoxLayout(this);
+ layout->setContentsMargins(0, 0, 0, 0);
+ layout->addWidget(widget);
+ setLayout(layout);
+
+ // Configure focus so that widget is focusable and the dialog automatically forwards focus to it.
+ setFocusProxy(widget);
+ widget->setFocusPolicy(Qt::StrongFocus);
+ widget->setFocus();
+}
+
+QAction* MicroProfileDialog::toggleViewAction() {
+ if (toggle_view_action == nullptr) {
+ toggle_view_action = new QAction(windowTitle(), this);
+ toggle_view_action->setCheckable(true);
+ toggle_view_action->setChecked(isVisible());
+ connect(toggle_view_action, SIGNAL(toggled(bool)), SLOT(setVisible(bool)));
+ }
+
+ return toggle_view_action;
+}
+
+void MicroProfileDialog::showEvent(QShowEvent* ev) {
+ if (toggle_view_action) {
+ toggle_view_action->setChecked(isVisible());
+ }
+ QWidget::showEvent(ev);
+}
+
+void MicroProfileDialog::hideEvent(QHideEvent* ev) {
+ if (toggle_view_action) {
+ toggle_view_action->setChecked(isVisible());
+ }
+ QWidget::hideEvent(ev);
+}
+
+/// There's no way to pass a user pointer to MicroProfile, so this variable is used to make the
+/// QPainter available inside the drawing callbacks.
+static QPainter* mp_painter = nullptr;
+
+MicroProfileWidget::MicroProfileWidget(QWidget* parent) : QWidget(parent) {
+ // Send mouse motion events even when not dragging.
+ setMouseTracking(true);
+
+ MicroProfileSetDisplayMode(1); // Timers screen
+ MicroProfileInitUI();
+
+ connect(&update_timer, SIGNAL(timeout()), SLOT(update()));
+}
+
+void MicroProfileWidget::paintEvent(QPaintEvent* ev) {
+ QPainter painter(this);
+
+ painter.setBackground(Qt::black);
+ painter.eraseRect(rect());
+
+ QFont font = GetMonospaceFont();
+ font.setPixelSize(MICROPROFILE_TEXT_HEIGHT);
+ painter.setFont(font);
+
+ mp_painter = &painter;
+ MicroProfileDraw(rect().width(), rect().height());
+ mp_painter = nullptr;
+}
+
+void MicroProfileWidget::showEvent(QShowEvent* ev) {
+ update_timer.start(15); // ~60 Hz
+ QWidget::showEvent(ev);
+}
+
+void MicroProfileWidget::hideEvent(QHideEvent* ev) {
+ update_timer.stop();
+ QWidget::hideEvent(ev);
+}
+
+void MicroProfileWidget::mouseMoveEvent(QMouseEvent* ev) {
+ MicroProfileMousePosition(ev->x(), ev->y(), 0);
+ ev->accept();
+}
+
+void MicroProfileWidget::mousePressEvent(QMouseEvent* ev) {
+ MicroProfileMousePosition(ev->x(), ev->y(), 0);
+ MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton);
+ ev->accept();
+}
+
+void MicroProfileWidget::mouseReleaseEvent(QMouseEvent* ev) {
+ MicroProfileMousePosition(ev->x(), ev->y(), 0);
+ MicroProfileMouseButton(ev->buttons() & Qt::LeftButton, ev->buttons() & Qt::RightButton);
+ ev->accept();
+}
+
+void MicroProfileWidget::wheelEvent(QWheelEvent* ev) {
+ MicroProfileMousePosition(ev->x(), ev->y(), ev->delta() / 120);
+ ev->accept();
+}
+
+void MicroProfileWidget::keyPressEvent(QKeyEvent* ev) {
+ if (ev->key() == Qt::Key_Control) {
+ // Inform MicroProfile that the user is holding Ctrl.
+ MicroProfileModKey(1);
+ }
+ QWidget::keyPressEvent(ev);
+}
+
+void MicroProfileWidget::keyReleaseEvent(QKeyEvent* ev) {
+ if (ev->key() == Qt::Key_Control) {
+ MicroProfileModKey(0);
+ }
+ QWidget::keyReleaseEvent(ev);
+}
+
+// These functions are called by MicroProfileDraw to draw the interface elements on the screen.
+
+void MicroProfileDrawText(int x, int y, u32 hex_color, const char* text, u32 text_length) {
+ // hex_color does not include an alpha, so it must be assumed to be 255
+ mp_painter->setPen(QColor::fromRgb(hex_color));
+
+ // It's impossible to draw a string using a monospaced font with a fixed width per cell in a
+ // way that's reliable across different platforms and fonts as far as I (yuriks) can tell, so
+ // draw each character individually in order to precisely control the text advance.
+ for (u32 i = 0; i < text_length; ++i) {
+ // Position the text baseline 1 pixel above the bottom of the text cell, this gives nice
+ // vertical alignment of text for a wide range of tested fonts.
+ mp_painter->drawText(x, y + MICROPROFILE_TEXT_HEIGHT - 2, QChar(text[i]));
+ x += MICROPROFILE_TEXT_WIDTH + 1;
+ }
+}
+
+void MicroProfileDrawBox(int left, int top, int right, int bottom, u32 hex_color, MicroProfileBoxType type) {
+ QColor color = QColor::fromRgba(hex_color);
+ QBrush brush = color;
+ if (type == MicroProfileBoxTypeBar) {
+ QLinearGradient gradient(left, top, left, bottom);
+ gradient.setColorAt(0.f, color.lighter(125));
+ gradient.setColorAt(1.f, color.darker(125));
+ brush = gradient;
+ }
+ mp_painter->fillRect(left, top, right - left, bottom - top, brush);
+}
+
+void MicroProfileDrawLine2D(u32 vertices_length, float* vertices, u32 hex_color) {
+ // Temporary vector used to convert between the float array and QPointF. Marked static to reuse
+ // the allocation across calls.
+ static std::vector<QPointF> point_buf;
+
+ for (u32 i = 0; i < vertices_length; ++i) {
+ point_buf.emplace_back(vertices[i*2 + 0], vertices[i*2 + 1]);
+ }
+
+ // hex_color does not include an alpha, so it must be assumed to be 255
+ mp_painter->setPen(QColor::fromRgb(hex_color));
+ mp_painter->drawPolyline(point_buf.data(), vertices_length);
+ point_buf.clear();
+}
diff --git a/src/citra_qt/debugger/profiler.h b/src/citra_qt/debugger/profiler.h
index fabf279b8..2199eaef1 100644
--- a/src/citra_qt/debugger/profiler.h
+++ b/src/citra_qt/debugger/profiler.h
@@ -48,3 +48,20 @@ private:
QTimer update_timer;
};
+
+class MicroProfileDialog : public QWidget {
+ Q_OBJECT
+
+public:
+ MicroProfileDialog(QWidget* parent = 0);
+
+ /// Returns a QAction that can be used to toggle visibility of this dialog.
+ QAction* toggleViewAction();
+
+protected:
+ void showEvent(QShowEvent* ev) override;
+ void hideEvent(QHideEvent* ev) override;
+
+private:
+ QAction* toggle_view_action = nullptr;
+};
diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp
index a1a4865bd..7fb1b0dcb 100644
--- a/src/citra_qt/main.cpp
+++ b/src/citra_qt/main.cpp
@@ -17,6 +17,7 @@
#include "common/logging/backend.h"
#include "common/logging/filter.h"
#include "common/make_unique.h"
+#include "common/microprofile.h"
#include "common/platform.h"
#include "common/scm_rev.h"
#include "common/scope_exit.h"
@@ -64,6 +65,9 @@ GMainWindow::GMainWindow() : emu_thread(nullptr)
addDockWidget(Qt::BottomDockWidgetArea, profilerWidget);
profilerWidget->hide();
+ microProfileDialog = new MicroProfileDialog(this);
+ microProfileDialog->hide();
+
disasmWidget = new DisassemblerWidget(this, emu_thread.get());
addDockWidget(Qt::BottomDockWidgetArea, disasmWidget);
disasmWidget->hide();
@@ -102,6 +106,7 @@ GMainWindow::GMainWindow() : emu_thread(nullptr)
QMenu* debug_menu = ui.menu_View->addMenu(tr("Debugging"));
debug_menu->addAction(profilerWidget->toggleViewAction());
+ debug_menu->addAction(microProfileDialog->toggleViewAction());
debug_menu->addAction(disasmWidget->toggleViewAction());
debug_menu->addAction(registersWidget->toggleViewAction());
debug_menu->addAction(callstackWidget->toggleViewAction());
@@ -128,6 +133,8 @@ GMainWindow::GMainWindow() : emu_thread(nullptr)
restoreGeometry(settings.value("geometry").toByteArray());
restoreState(settings.value("state").toByteArray());
render_window->restoreGeometry(settings.value("geometryRenderWindow").toByteArray());
+ microProfileDialog->restoreGeometry(settings.value("microProfileDialogGeometry").toByteArray());
+ microProfileDialog->setVisible(settings.value("microProfileDialogVisible").toBool());
ui.action_Use_Hardware_Renderer->setChecked(Settings::values.use_hw_renderer);
SetHardwareRendererEnabled(ui.action_Use_Hardware_Renderer->isChecked());
@@ -287,6 +294,17 @@ void GMainWindow::ShutdownGame() {
render_window->hide();
}
+void GMainWindow::StoreRecentFile(const QString& filename)
+{
+ QSettings settings;
+ QStringList recent_files = settings.value("recentFiles").toStringList();
+ recent_files.prepend(filename);
+ recent_files.removeDuplicates();
+ settings.setValue("recentFiles", recent_files);
+
+ UpdateRecentFiles();
+}
+
void GMainWindow::UpdateRecentFiles() {
QSettings settings;
QStringList recent_files = settings.value("recentFiles").toStringList();
@@ -297,6 +315,7 @@ void GMainWindow::UpdateRecentFiles() {
QString text = QString("&%1. %2").arg(i + 1).arg(QFileInfo(recent_files[i]).fileName());
actions_recent_files[i]->setText(text);
actions_recent_files[i]->setData(recent_files[i]);
+ actions_recent_files[i]->setToolTip(recent_files[i]);
actions_recent_files[i]->setVisible(true);
}
@@ -319,11 +338,7 @@ void GMainWindow::OnMenuLoadFile() {
QString filename = QFileDialog::getOpenFileName(this, tr("Load File"), rom_path, tr("3DS executable (*.3ds *.3dsx *.elf *.axf *.cci *.cxi)"));
if (filename.size()) {
settings.setValue("romsPath", QFileInfo(filename).path());
- // Update recent files list
- QStringList recent_files = settings.value("recentFiles").toStringList();
- recent_files.prepend(filename);
- settings.setValue("recentFiles", recent_files);
- UpdateRecentFiles(); // Update UI
+ StoreRecentFile(filename);
BootGame(filename.toLatin1().data());
}
@@ -349,6 +364,7 @@ void GMainWindow::OnMenuRecentFile() {
QFileInfo file_info(filename);
if (file_info.exists()) {
BootGame(filename.toLatin1().data());
+ StoreRecentFile(filename); // Put the filename on top of the list
} else {
// Display an error message and remove the file from the list.
QMessageBox::information(this, tr("File not found"), tr("File \"%1\" not found").arg(filename));
@@ -357,12 +373,7 @@ void GMainWindow::OnMenuRecentFile() {
QStringList recent_files = settings.value("recentFiles").toStringList();
recent_files.removeOne(filename);
settings.setValue("recentFiles", recent_files);
-
- action->setVisible(false);
- // Grey out the recent files menu if the list is empty
- if (ui.menu_recent_files->isEmpty()) {
- ui.menu_recent_files->setEnabled(false);
- }
+ UpdateRecentFiles();
}
}
@@ -430,6 +441,8 @@ void GMainWindow::closeEvent(QCloseEvent* event) {
settings.setValue("geometry", saveGeometry());
settings.setValue("state", saveState());
settings.setValue("geometryRenderWindow", render_window->saveGeometry());
+ settings.setValue("microProfileDialogGeometry", microProfileDialog->saveGeometry());
+ settings.setValue("microProfileDialogVisible", microProfileDialog->isVisible());
settings.setValue("singleWindowMode", ui.action_Single_Window_Mode->isChecked());
settings.setValue("displayTitleBars", ui.actionDisplay_widget_title_bars->isChecked());
settings.setValue("firstStart", false);
@@ -452,6 +465,11 @@ int main(int argc, char* argv[]) {
Log::Filter log_filter(Log::Level::Info);
Log::SetFilter(&log_filter);
+ MicroProfileOnThreadCreate("Frontend");
+ SCOPE_EXIT({
+ MicroProfileShutdown();
+ });
+
// Init settings params
QSettings::setDefaultFormat(QSettings::IniFormat);
QCoreApplication::setOrganizationName("Citra team");
diff --git a/src/citra_qt/main.h b/src/citra_qt/main.h
index 4b260ae8b..32523fded 100644
--- a/src/citra_qt/main.h
+++ b/src/citra_qt/main.h
@@ -14,6 +14,7 @@ class GImageInfo;
class GRenderWindow;
class EmuThread;
class ProfilerWidget;
+class MicroProfileDialog;
class DisassemblerWidget;
class RegistersWidget;
class CallstackWidget;
@@ -60,6 +61,24 @@ private:
void BootGame(const std::string& filename);
void ShutdownGame();
+ /**
+ * Stores the filename in the recently loaded files list.
+ * The new filename is stored at the beginning of the recently loaded files list.
+ * After inserting the new entry, duplicates are removed meaning that if
+ * this was inserted from \a OnMenuRecentFile(), the entry will be put on top
+ * and remove from its previous position.
+ *
+ * Finally, this function calls \a UpdateRecentFiles() to update the UI.
+ *
+ * @param filename the filename to store
+ */
+ void StoreRecentFile(const QString& filename);
+
+ /**
+ * Updates the recent files menu.
+ * Menu entries are rebuilt from the configuration file.
+ * If there is no entry in the menu, the menu is greyed out.
+ */
void UpdateRecentFiles();
void closeEvent(QCloseEvent* event) override;
@@ -86,6 +105,7 @@ private:
std::unique_ptr<EmuThread> emu_thread;
ProfilerWidget* profilerWidget;
+ MicroProfileDialog* microProfileDialog;
DisassemblerWidget* disasmWidget;
RegistersWidget* registersWidget;
CallstackWidget* callstackWidget;
diff --git a/src/citra_qt/util/util.cpp b/src/citra_qt/util/util.cpp
new file mode 100644
index 000000000..2cb939af1
--- /dev/null
+++ b/src/citra_qt/util/util.cpp
@@ -0,0 +1,13 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "util.h"
+
+QFont GetMonospaceFont() {
+ QFont font("monospace");
+ // Automatic fallback to a monospace font on on platforms without a font called "monospace"
+ font.setStyleHint(QFont::Monospace);
+ font.setFixedPitch(true);
+ return font;
+}
diff --git a/src/citra_qt/util/util.h b/src/citra_qt/util/util.h
new file mode 100644
index 000000000..98a944047
--- /dev/null
+++ b/src/citra_qt/util/util.h
@@ -0,0 +1,10 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <QFont>
+
+/// Returns a QFont object appropriate to use as a monospace font for debugging widgets, etc.
+QFont GetMonospaceFont();
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index e743a026d..7f3712efa 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -11,6 +11,7 @@ set(SRCS
logging/text_formatter.cpp
logging/backend.cpp
memory_util.cpp
+ microprofile.cpp
misc.cpp
profiler.cpp
scm_rev.cpp
@@ -43,6 +44,8 @@ set(HEADERS
make_unique.h
math_util.h
memory_util.h
+ microprofile.h
+ microprofileui.h
platform.h
profiler.h
profiler_reporting.h
diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h
index 88e452a16..ed20c3629 100644
--- a/src/common/common_funcs.h
+++ b/src/common/common_funcs.h
@@ -45,14 +45,20 @@
// GCC 4.8 defines all the rotate functions now
// Small issue with GCC's lrotl/lrotr intrinsics is they are still 32bit while we require 64bit
-#ifndef _rotl
-inline u32 _rotl(u32 x, int shift) {
+#ifdef _rotl
+#define rotl _rotl
+#else
+inline u32 rotl(u32 x, int shift) {
shift &= 31;
if (!shift) return x;
return (x << shift) | (x >> (32 - shift));
}
+#endif
-inline u32 _rotr(u32 x, int shift) {
+#ifdef _rotr
+#define rotr _rotr
+#else
+inline u32 rotr(u32 x, int shift) {
shift &= 31;
if (!shift) return x;
return (x >> shift) | (x << (32 - shift));
diff --git a/src/common/file_util.h b/src/common/file_util.h
index d0dccdf69..e71a9b2fa 100644
--- a/src/common/file_util.h
+++ b/src/common/file_util.h
@@ -244,7 +244,7 @@ private:
template <typename T>
void OpenFStream(T& fstream, const std::string& filename, std::ios_base::openmode openmode)
{
-#ifdef _WIN32
+#ifdef _MSC_VER
fstream.open(Common::UTF8ToTStr(filename).c_str(), openmode);
#else
fstream.open(filename.c_str(), openmode);
diff --git a/src/common/logging/log.h b/src/common/logging/log.h
index e16dde7fc..5fd3bd7f5 100644
--- a/src/common/logging/log.h
+++ b/src/common/logging/log.h
@@ -91,17 +91,16 @@ void LogMessage(Class log_class, Level log_level,
} // namespace Log
#define LOG_GENERIC(log_class, log_level, ...) \
- ::Log::LogMessage(::Log::Class::log_class, ::Log::Level::log_level, \
- __FILE__, __LINE__, __func__, __VA_ARGS__)
+ ::Log::LogMessage(log_class, log_level, __FILE__, __LINE__, __func__, __VA_ARGS__)
#ifdef _DEBUG
-#define LOG_TRACE( log_class, ...) LOG_GENERIC(log_class, Trace, __VA_ARGS__)
+#define LOG_TRACE( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Trace, __VA_ARGS__)
#else
#define LOG_TRACE( log_class, ...) (void(0))
#endif
-#define LOG_DEBUG( log_class, ...) LOG_GENERIC(log_class, Debug, __VA_ARGS__)
-#define LOG_INFO( log_class, ...) LOG_GENERIC(log_class, Info, __VA_ARGS__)
-#define LOG_WARNING( log_class, ...) LOG_GENERIC(log_class, Warning, __VA_ARGS__)
-#define LOG_ERROR( log_class, ...) LOG_GENERIC(log_class, Error, __VA_ARGS__)
-#define LOG_CRITICAL(log_class, ...) LOG_GENERIC(log_class, Critical, __VA_ARGS__)
+#define LOG_DEBUG( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Debug, __VA_ARGS__)
+#define LOG_INFO( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Info, __VA_ARGS__)
+#define LOG_WARNING( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Warning, __VA_ARGS__)
+#define LOG_ERROR( log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Error, __VA_ARGS__)
+#define LOG_CRITICAL(log_class, ...) LOG_GENERIC(::Log::Class::log_class, ::Log::Level::Critical, __VA_ARGS__)
diff --git a/src/common/microprofile.cpp b/src/common/microprofile.cpp
new file mode 100644
index 000000000..ee25dd37f
--- /dev/null
+++ b/src/common/microprofile.cpp
@@ -0,0 +1,7 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+// Includes the MicroProfile implementation in this file for compilation
+#define MICROPROFILE_IMPL 1
+#include "common/microprofile.h"
diff --git a/src/common/microprofile.h b/src/common/microprofile.h
new file mode 100644
index 000000000..9eb6016a8
--- /dev/null
+++ b/src/common/microprofile.h
@@ -0,0 +1,25 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+// Customized Citra settings.
+// This file wraps the MicroProfile header so that these are consistent everywhere.
+#define MICROPROFILE_WEBSERVER 0
+#define MICROPROFILE_GPU_TIMERS 0 // TODO: Implement timer queries when we upgrade to OpenGL 3.3
+#define MICROPROFILE_CONTEXT_SWITCH_TRACE 0
+#define MICROPROFILE_PER_THREAD_BUFFER_SIZE (2048<<12) // 8 MB
+
+#include <microprofile.h>
+
+#define MP_RGB(r, g, b) ((r) << 16 | (g) << 8 | (b) << 0)
+
+// On OS X, some Mach header included by MicroProfile defines these as macros, conflicting with
+// identifiers we use.
+#ifdef PAGE_SIZE
+#undef PAGE_SIZE
+#endif
+#ifdef PAGE_MASK
+#undef PAGE_MASK
+#endif
diff --git a/src/common/microprofileui.h b/src/common/microprofileui.h
new file mode 100644
index 000000000..97c369bd9
--- /dev/null
+++ b/src/common/microprofileui.h
@@ -0,0 +1,16 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/microprofile.h"
+
+// Customized Citra settings.
+// This file wraps the MicroProfile header so that these are consistent everywhere.
+#define MICROPROFILE_TEXT_WIDTH 6
+#define MICROPROFILE_TEXT_HEIGHT 12
+#define MICROPROFILE_HELP_ALT "Right-Click"
+#define MICROPROFILE_HELP_MOD "Ctrl"
+
+#include <microprofileui.h>
diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp
index 4b79acd1f..939df210e 100644
--- a/src/common/x64/emitter.cpp
+++ b/src/common/x64/emitter.cpp
@@ -15,6 +15,7 @@
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
+#include <cinttypes>
#include <cstring>
#include "common/assert.h"
@@ -25,11 +26,6 @@
#include "cpu_detect.h"
#include "emitter.h"
-#define PRIx64 "llx"
-
-// Minimize the diff against Dolphin
-#define DYNA_REC JIT
-
namespace Gen
{
@@ -113,6 +109,29 @@ u8 *XEmitter::GetWritableCodePtr()
return code;
}
+void XEmitter::Write8(u8 value)
+{
+ *code++ = value;
+}
+
+void XEmitter::Write16(u16 value)
+{
+ std::memcpy(code, &value, sizeof(u16));
+ code += sizeof(u16);
+}
+
+void XEmitter::Write32(u32 value)
+{
+ std::memcpy(code, &value, sizeof(u32));
+ code += sizeof(u32);
+}
+
+void XEmitter::Write64(u64 value)
+{
+ std::memcpy(code, &value, sizeof(u64));
+ code += sizeof(u64);
+}
+
void XEmitter::ReserveCodeSpace(int bytes)
{
for (int i = 0; i < bytes; i++)
@@ -374,7 +393,7 @@ void XEmitter::Rex(int w, int r, int x, int b)
Write8(rx);
}
-void XEmitter::JMP(const u8 *addr, bool force5Bytes)
+void XEmitter::JMP(const u8* addr, bool force5Bytes)
{
u64 fn = (u64)addr;
if (!force5Bytes)
@@ -398,7 +417,7 @@ void XEmitter::JMP(const u8 *addr, bool force5Bytes)
}
}
-void XEmitter::JMPptr(const OpArg &arg2)
+void XEmitter::JMPptr(const OpArg& arg2)
{
OpArg arg = arg2;
if (arg.IsImm()) ASSERT_MSG(0, "JMPptr - Imm argument");
@@ -425,7 +444,7 @@ void XEmitter::CALLptr(OpArg arg)
arg.WriteRest(this);
}
-void XEmitter::CALL(const void *fnptr)
+void XEmitter::CALL(const void* fnptr)
{
u64 distance = u64(fnptr) - (u64(code) + 5);
ASSERT_MSG(
@@ -496,7 +515,7 @@ void XEmitter::J_CC(CCFlags conditionCode, const u8* addr, bool force5bytes)
}
}
-void XEmitter::SetJumpTarget(const FixupBranch &branch)
+void XEmitter::SetJumpTarget(const FixupBranch& branch)
{
if (branch.type == 0)
{
@@ -512,30 +531,6 @@ void XEmitter::SetJumpTarget(const FixupBranch &branch)
}
}
-// INC/DEC considered harmful on newer CPUs due to partial flag set.
-// Use ADD, SUB instead.
-
-/*
-void XEmitter::INC(int bits, OpArg arg)
-{
- if (arg.IsImm()) ASSERT_MSG(0, "INC - Imm argument");
- arg.operandReg = 0;
- if (bits == 16) {Write8(0x66);}
- arg.WriteRex(this, bits, bits);
- Write8(bits == 8 ? 0xFE : 0xFF);
- arg.WriteRest(this);
-}
-void XEmitter::DEC(int bits, OpArg arg)
-{
- if (arg.IsImm()) ASSERT_MSG(0, "DEC - Imm argument");
- arg.operandReg = 1;
- if (bits == 16) {Write8(0x66);}
- arg.WriteRex(this, bits, bits);
- Write8(bits == 8 ? 0xFE : 0xFF);
- arg.WriteRest(this);
-}
-*/
-
//Single byte opcodes
//There is no PUSHAD/POPAD in 64-bit mode.
void XEmitter::INT3() {Write8(0xCC);}
@@ -667,7 +662,7 @@ void XEmitter::CBW(int bits)
void XEmitter::PUSH(X64Reg reg) {WriteSimple1Byte(32, 0x50, reg);}
void XEmitter::POP(X64Reg reg) {WriteSimple1Byte(32, 0x58, reg);}
-void XEmitter::PUSH(int bits, const OpArg &reg)
+void XEmitter::PUSH(int bits, const OpArg& reg)
{
if (reg.IsSimpleReg())
PUSH(reg.GetSimpleReg());
@@ -703,7 +698,7 @@ void XEmitter::PUSH(int bits, const OpArg &reg)
}
}
-void XEmitter::POP(int /*bits*/, const OpArg &reg)
+void XEmitter::POP(int /*bits*/, const OpArg& reg)
{
if (reg.IsSimpleReg())
POP(reg.GetSimpleReg());
@@ -791,12 +786,12 @@ void XEmitter::WriteMulDivType(int bits, OpArg src, int ext)
src.WriteRest(this);
}
-void XEmitter::MUL(int bits, OpArg src) {WriteMulDivType(bits, src, 4);}
-void XEmitter::DIV(int bits, OpArg src) {WriteMulDivType(bits, src, 6);}
-void XEmitter::IMUL(int bits, OpArg src) {WriteMulDivType(bits, src, 5);}
-void XEmitter::IDIV(int bits, OpArg src) {WriteMulDivType(bits, src, 7);}
-void XEmitter::NEG(int bits, OpArg src) {WriteMulDivType(bits, src, 3);}
-void XEmitter::NOT(int bits, OpArg src) {WriteMulDivType(bits, src, 2);}
+void XEmitter::MUL(int bits, const OpArg& src) {WriteMulDivType(bits, src, 4);}
+void XEmitter::DIV(int bits, const OpArg& src) {WriteMulDivType(bits, src, 6);}
+void XEmitter::IMUL(int bits, const OpArg& src) {WriteMulDivType(bits, src, 5);}
+void XEmitter::IDIV(int bits, const OpArg& src) {WriteMulDivType(bits, src, 7);}
+void XEmitter::NEG(int bits, const OpArg& src) {WriteMulDivType(bits, src, 3);}
+void XEmitter::NOT(int bits, const OpArg& src) {WriteMulDivType(bits, src, 2);}
void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep)
{
@@ -813,24 +808,24 @@ void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bo
src.WriteRest(this);
}
-void XEmitter::MOVNTI(int bits, OpArg dest, X64Reg src)
+void XEmitter::MOVNTI(int bits, const OpArg& dest, X64Reg src)
{
if (bits <= 16)
ASSERT_MSG(0, "MOVNTI - bits<=16");
WriteBitSearchType(bits, src, dest, 0xC3);
}
-void XEmitter::BSF(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBC);} //bottom bit to top bit
-void XEmitter::BSR(int bits, X64Reg dest, OpArg src) {WriteBitSearchType(bits,dest,src,0xBD);} //top bit to bottom bit
+void XEmitter::BSF(int bits, X64Reg dest, const OpArg& src) {WriteBitSearchType(bits,dest,src,0xBC);} // Bottom bit to top bit
+void XEmitter::BSR(int bits, X64Reg dest, const OpArg& src) {WriteBitSearchType(bits,dest,src,0xBD);} // Top bit to bottom bit
-void XEmitter::TZCNT(int bits, X64Reg dest, OpArg src)
+void XEmitter::TZCNT(int bits, X64Reg dest, const OpArg& src)
{
CheckFlags();
if (!Common::GetCPUCaps().bmi1)
ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer.");
WriteBitSearchType(bits, dest, src, 0xBC, true);
}
-void XEmitter::LZCNT(int bits, X64Reg dest, OpArg src)
+void XEmitter::LZCNT(int bits, X64Reg dest, const OpArg& src)
{
CheckFlags();
if (!Common::GetCPUCaps().lzcnt)
@@ -950,7 +945,7 @@ void XEmitter::LEA(int bits, X64Reg dest, OpArg src)
}
//shift can be either imm8 or cl
-void XEmitter::WriteShift(int bits, OpArg dest, OpArg &shift, int ext)
+void XEmitter::WriteShift(int bits, OpArg dest, const OpArg& shift, int ext)
{
CheckFlags();
bool writeImm = false;
@@ -991,16 +986,16 @@ void XEmitter::WriteShift(int bits, OpArg dest, OpArg &shift, int ext)
// large rotates and shift are slower on intel than amd
// intel likes to rotate by 1, and the op is smaller too
-void XEmitter::ROL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 0);}
-void XEmitter::ROR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 1);}
-void XEmitter::RCL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 2);}
-void XEmitter::RCR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 3);}
-void XEmitter::SHL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 4);}
-void XEmitter::SHR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 5);}
-void XEmitter::SAR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 7);}
+void XEmitter::ROL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 0);}
+void XEmitter::ROR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 1);}
+void XEmitter::RCL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 2);}
+void XEmitter::RCR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 3);}
+void XEmitter::SHL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 4);}
+void XEmitter::SHR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 5);}
+void XEmitter::SAR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 7);}
// index can be either imm8 or register, don't use memory destination because it's slow
-void XEmitter::WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext)
+void XEmitter::WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext)
{
CheckFlags();
if (dest.IsImm())
@@ -1029,13 +1024,13 @@ void XEmitter::WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext)
}
}
-void XEmitter::BT(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 4);}
-void XEmitter::BTS(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 5);}
-void XEmitter::BTR(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 6);}
-void XEmitter::BTC(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 7);}
+void XEmitter::BT(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 4);}
+void XEmitter::BTS(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 5);}
+void XEmitter::BTR(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 6);}
+void XEmitter::BTC(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 7);}
//shift can be either imm8 or cl
-void XEmitter::SHRD(int bits, OpArg dest, OpArg src, OpArg shift)
+void XEmitter::SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift)
{
CheckFlags();
if (dest.IsImm())
@@ -1067,7 +1062,7 @@ void XEmitter::SHRD(int bits, OpArg dest, OpArg src, OpArg shift)
}
}
-void XEmitter::SHLD(int bits, OpArg dest, OpArg src, OpArg shift)
+void XEmitter::SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift)
{
CheckFlags();
if (dest.IsImm())
@@ -1111,7 +1106,7 @@ void OpArg::WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg _operandReg, int bit
}
//operand can either be immediate or register
-void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &operand, int bits) const
+void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg& operand, int bits) const
{
X64Reg _operandReg;
if (IsImm())
@@ -1257,7 +1252,7 @@ void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &o
}
}
-void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2)
+void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2)
{
if (a1.IsImm())
{
@@ -1283,24 +1278,24 @@ void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg
}
}
-void XEmitter::ADD (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADD, a1, a2);}
-void XEmitter::ADC (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADC, a1, a2);}
-void XEmitter::SUB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSUB, a1, a2);}
-void XEmitter::SBB (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSBB, a1, a2);}
-void XEmitter::AND (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmAND, a1, a2);}
-void XEmitter::OR (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmOR , a1, a2);}
-void XEmitter::XOR (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmXOR, a1, a2);}
-void XEmitter::MOV (int bits, const OpArg &a1, const OpArg &a2)
+void XEmitter::ADD (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADD, a1, a2);}
+void XEmitter::ADC (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADC, a1, a2);}
+void XEmitter::SUB (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSUB, a1, a2);}
+void XEmitter::SBB (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSBB, a1, a2);}
+void XEmitter::AND (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmAND, a1, a2);}
+void XEmitter::OR (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmOR , a1, a2);}
+void XEmitter::XOR (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmXOR, a1, a2);}
+void XEmitter::MOV (int bits, const OpArg& a1, const OpArg& a2)
{
if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg())
LOG_ERROR(Common, "Redundant MOV @ %p - bug in JIT?", code);
WriteNormalOp(this, bits, nrmMOV, a1, a2);
}
-void XEmitter::TEST(int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmTEST, a1, a2);}
-void XEmitter::CMP (int bits, const OpArg &a1, const OpArg &a2) {CheckFlags(); WriteNormalOp(this, bits, nrmCMP, a1, a2);}
-void XEmitter::XCHG(int bits, const OpArg &a1, const OpArg &a2) {WriteNormalOp(this, bits, nrmXCHG, a1, a2);}
+void XEmitter::TEST(int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmTEST, a1, a2);}
+void XEmitter::CMP (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmCMP, a1, a2);}
+void XEmitter::XCHG(int bits, const OpArg& a1, const OpArg& a2) {WriteNormalOp(this, bits, nrmXCHG, a1, a2);}
-void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2)
+void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a1, const OpArg& a2)
{
CheckFlags();
if (bits == 8)
@@ -1353,7 +1348,7 @@ void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a1, OpArg a2)
}
}
-void XEmitter::IMUL(int bits, X64Reg regOp, OpArg a)
+void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a)
{
CheckFlags();
if (bits == 8)
@@ -1390,7 +1385,7 @@ void XEmitter::WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extr
arg.WriteRest(this, extrabytes);
}
-void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes)
+void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes)
{
WriteAVXOp(opPrefix, op, regOp, INVALID_REG, arg, extrabytes);
}
@@ -1400,25 +1395,25 @@ static int GetVEXmmmmm(u16 op)
// Currently, only 0x38 and 0x3A are used as secondary escape byte.
if ((op >> 8) == 0x3A)
return 3;
- else if ((op >> 8) == 0x38)
+ if ((op >> 8) == 0x38)
return 2;
- else
- return 1;
+
+ return 1;
}
static int GetVEXpp(u8 opPrefix)
{
if (opPrefix == 0x66)
return 1;
- else if (opPrefix == 0xF3)
+ if (opPrefix == 0xF3)
return 2;
- else if (opPrefix == 0xF2)
+ if (opPrefix == 0xF2)
return 3;
- else
- return 0;
+
+ return 0;
}
-void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
+void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes)
{
if (!Common::GetCPUCaps().avx)
ASSERT_MSG(0, "Trying to use AVX on a system that doesn't support it. Bad programmer.");
@@ -1431,7 +1426,7 @@ void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpA
}
// Like the above, but more general; covers GPR-based VEX operations, like BMI1/2
-void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
+void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes)
{
if (size != 32 && size != 64)
ASSERT_MSG(0, "VEX GPR instructions only support 32-bit and 64-bit modes!");
@@ -1442,7 +1437,7 @@ void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg r
arg.WriteRest(this, extrabytes, regOp1);
}
-void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
+void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes)
{
CheckFlags();
if (!Common::GetCPUCaps().bmi1)
@@ -1450,7 +1445,7 @@ void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg
WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes);
}
-void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
+void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes)
{
CheckFlags();
if (!Common::GetCPUCaps().bmi2)
@@ -1517,135 +1512,136 @@ void XEmitter::WriteMXCSR(OpArg arg, int ext)
arg.WriteRest(this);
}
-void XEmitter::STMXCSR(OpArg memloc) {WriteMXCSR(memloc, 3);}
-void XEmitter::LDMXCSR(OpArg memloc) {WriteMXCSR(memloc, 2);}
-
-void XEmitter::MOVNTDQ(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);}
-void XEmitter::MOVNTPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVNTP, regOp, arg);}
-void XEmitter::MOVNTPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTP, regOp, arg);}
-
-void XEmitter::ADDSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseADD, regOp, arg);}
-void XEmitter::ADDSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseADD, regOp, arg);}
-void XEmitter::SUBSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseSUB, regOp, arg);}
-void XEmitter::SUBSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseSUB, regOp, arg);}
-void XEmitter::CMPSS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0xF3, sseCMP, regOp, arg, 1); Write8(compare);}
-void XEmitter::CMPSD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0xF2, sseCMP, regOp, arg, 1); Write8(compare);}
-void XEmitter::MULSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMUL, regOp, arg);}
-void XEmitter::MULSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMUL, regOp, arg);}
-void XEmitter::DIVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseDIV, regOp, arg);}
-void XEmitter::DIVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseDIV, regOp, arg);}
-void XEmitter::MINSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMIN, regOp, arg);}
-void XEmitter::MINSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMIN, regOp, arg);}
-void XEmitter::MAXSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMAX, regOp, arg);}
-void XEmitter::MAXSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMAX, regOp, arg);}
-void XEmitter::SQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseSQRT, regOp, arg);}
-void XEmitter::SQRTSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseSQRT, regOp, arg);}
-void XEmitter::RSQRTSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseRSQRT, regOp, arg);}
-
-void XEmitter::ADDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseADD, regOp, arg);}
-void XEmitter::ADDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseADD, regOp, arg);}
-void XEmitter::SUBPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseSUB, regOp, arg);}
-void XEmitter::SUBPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseSUB, regOp, arg);}
-void XEmitter::CMPPS(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0x00, sseCMP, regOp, arg, 1); Write8(compare);}
-void XEmitter::CMPPD(X64Reg regOp, OpArg arg, u8 compare) {WriteSSEOp(0x66, sseCMP, regOp, arg, 1); Write8(compare);}
-void XEmitter::ANDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseAND, regOp, arg);}
-void XEmitter::ANDPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseAND, regOp, arg);}
-void XEmitter::ANDNPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseANDN, regOp, arg);}
-void XEmitter::ANDNPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseANDN, regOp, arg);}
-void XEmitter::ORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseOR, regOp, arg);}
-void XEmitter::ORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseOR, regOp, arg);}
-void XEmitter::XORPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseXOR, regOp, arg);}
-void XEmitter::XORPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseXOR, regOp, arg);}
-void XEmitter::MULPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMUL, regOp, arg);}
-void XEmitter::MULPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMUL, regOp, arg);}
-void XEmitter::DIVPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseDIV, regOp, arg);}
-void XEmitter::DIVPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseDIV, regOp, arg);}
-void XEmitter::MINPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMIN, regOp, arg);}
-void XEmitter::MINPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMIN, regOp, arg);}
-void XEmitter::MAXPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMAX, regOp, arg);}
-void XEmitter::MAXPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMAX, regOp, arg);}
-void XEmitter::SQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseSQRT, regOp, arg);}
-void XEmitter::SQRTPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseSQRT, regOp, arg);}
-void XEmitter::RCPPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseRCP, regOp, arg); }
-void XEmitter::RSQRTPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseRSQRT, regOp, arg);}
-void XEmitter::SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x00, sseSHUF, regOp, arg,1); Write8(shuffle);}
-void XEmitter::SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x66, sseSHUF, regOp, arg,1); Write8(shuffle);}
-
-void XEmitter::HADDPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseHADD, regOp, arg);}
-
-void XEmitter::COMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseCOMIS, regOp, arg);} //weird that these should be packed
-void XEmitter::COMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseCOMIS, regOp, arg);} //ordered
-void XEmitter::UCOMISS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseUCOMIS, regOp, arg);} //unordered
-void XEmitter::UCOMISD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseUCOMIS, regOp, arg);}
-
-void XEmitter::MOVAPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);}
-void XEmitter::MOVAPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);}
-void XEmitter::MOVAPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);}
-void XEmitter::MOVAPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);}
-
-void XEmitter::MOVUPS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);}
-void XEmitter::MOVUPD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);}
-void XEmitter::MOVUPS(OpArg arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);}
-void XEmitter::MOVUPD(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);}
-
-void XEmitter::MOVDQA(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);}
-void XEmitter::MOVDQA(OpArg arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);}
-void XEmitter::MOVDQU(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);}
-void XEmitter::MOVDQU(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);}
-
-void XEmitter::MOVSS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);}
-void XEmitter::MOVSD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);}
-void XEmitter::MOVSS(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);}
-void XEmitter::MOVSD(OpArg arg, X64Reg regOp) {WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);}
-
-void XEmitter::MOVLPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg); }
-void XEmitter::MOVLPD(X64Reg regOp, OpArg arg) { WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg); }
-void XEmitter::MOVLPS(OpArg arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg); }
-void XEmitter::MOVLPD(OpArg arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg); }
-
-void XEmitter::MOVHPS(X64Reg regOp, OpArg arg) { WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg); }
-void XEmitter::MOVHPD(X64Reg regOp, OpArg arg) { WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg); }
-void XEmitter::MOVHPS(OpArg arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg); }
-void XEmitter::MOVHPD(OpArg arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg); }
+void XEmitter::STMXCSR(const OpArg& memloc) {WriteMXCSR(memloc, 3);}
+void XEmitter::LDMXCSR(const OpArg& memloc) {WriteMXCSR(memloc, 2);}
+
+void XEmitter::MOVNTDQ(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);}
+void XEmitter::MOVNTPS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVNTP, regOp, arg);}
+void XEmitter::MOVNTPD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTP, regOp, arg);}
+
+void XEmitter::ADDSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseADD, regOp, arg);}
+void XEmitter::ADDSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseADD, regOp, arg);}
+void XEmitter::SUBSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseSUB, regOp, arg);}
+void XEmitter::SUBSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseSUB, regOp, arg);}
+void XEmitter::CMPSS(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0xF3, sseCMP, regOp, arg, 1); Write8(compare);}
+void XEmitter::CMPSD(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0xF2, sseCMP, regOp, arg, 1); Write8(compare);}
+void XEmitter::MULSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMUL, regOp, arg);}
+void XEmitter::MULSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMUL, regOp, arg);}
+void XEmitter::DIVSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseDIV, regOp, arg);}
+void XEmitter::DIVSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseDIV, regOp, arg);}
+void XEmitter::MINSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMIN, regOp, arg);}
+void XEmitter::MINSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMIN, regOp, arg);}
+void XEmitter::MAXSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMAX, regOp, arg);}
+void XEmitter::MAXSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMAX, regOp, arg);}
+void XEmitter::SQRTSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseSQRT, regOp, arg);}
+void XEmitter::SQRTSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseSQRT, regOp, arg);}
+void XEmitter::RCPSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseRCP, regOp, arg);}
+void XEmitter::RSQRTSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseRSQRT, regOp, arg);}
+
+void XEmitter::ADDPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseADD, regOp, arg);}
+void XEmitter::ADDPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseADD, regOp, arg);}
+void XEmitter::SUBPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseSUB, regOp, arg);}
+void XEmitter::SUBPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseSUB, regOp, arg);}
+void XEmitter::CMPPS(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0x00, sseCMP, regOp, arg, 1); Write8(compare);}
+void XEmitter::CMPPD(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0x66, sseCMP, regOp, arg, 1); Write8(compare);}
+void XEmitter::ANDPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseAND, regOp, arg);}
+void XEmitter::ANDPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseAND, regOp, arg);}
+void XEmitter::ANDNPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseANDN, regOp, arg);}
+void XEmitter::ANDNPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseANDN, regOp, arg);}
+void XEmitter::ORPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseOR, regOp, arg);}
+void XEmitter::ORPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseOR, regOp, arg);}
+void XEmitter::XORPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseXOR, regOp, arg);}
+void XEmitter::XORPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseXOR, regOp, arg);}
+void XEmitter::MULPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMUL, regOp, arg);}
+void XEmitter::MULPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMUL, regOp, arg);}
+void XEmitter::DIVPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseDIV, regOp, arg);}
+void XEmitter::DIVPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseDIV, regOp, arg);}
+void XEmitter::MINPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMIN, regOp, arg);}
+void XEmitter::MINPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMIN, regOp, arg);}
+void XEmitter::MAXPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMAX, regOp, arg);}
+void XEmitter::MAXPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMAX, regOp, arg);}
+void XEmitter::SQRTPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseSQRT, regOp, arg);}
+void XEmitter::SQRTPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseSQRT, regOp, arg);}
+void XEmitter::RCPPS(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x00, sseRCP, regOp, arg); }
+void XEmitter::RSQRTPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseRSQRT, regOp, arg);}
+void XEmitter::SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0x00, sseSHUF, regOp, arg,1); Write8(shuffle);}
+void XEmitter::SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0x66, sseSHUF, regOp, arg,1); Write8(shuffle);}
+
+void XEmitter::HADDPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseHADD, regOp, arg);}
+
+void XEmitter::COMISS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseCOMIS, regOp, arg);} //weird that these should be packed
+void XEmitter::COMISD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseCOMIS, regOp, arg);} //ordered
+void XEmitter::UCOMISS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseUCOMIS, regOp, arg);} //unordered
+void XEmitter::UCOMISD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseUCOMIS, regOp, arg);}
+
+void XEmitter::MOVAPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);}
+void XEmitter::MOVAPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);}
+void XEmitter::MOVAPS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);}
+void XEmitter::MOVAPD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);}
+
+void XEmitter::MOVUPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);}
+void XEmitter::MOVUPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);}
+void XEmitter::MOVUPS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);}
+void XEmitter::MOVUPD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);}
+
+void XEmitter::MOVDQA(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);}
+void XEmitter::MOVDQA(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);}
+void XEmitter::MOVDQU(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);}
+void XEmitter::MOVDQU(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);}
+
+void XEmitter::MOVSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);}
+void XEmitter::MOVSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);}
+void XEmitter::MOVSS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);}
+void XEmitter::MOVSD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);}
+
+void XEmitter::MOVLPS(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg); }
+void XEmitter::MOVLPD(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg); }
+void XEmitter::MOVLPS(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg); }
+void XEmitter::MOVLPD(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg); }
+
+void XEmitter::MOVHPS(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg); }
+void XEmitter::MOVHPD(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg); }
+void XEmitter::MOVHPS(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg); }
+void XEmitter::MOVHPD(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg); }
void XEmitter::MOVHLPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVHLPS, regOp1, R(regOp2));}
void XEmitter::MOVLHPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVLHPS, regOp1, R(regOp2));}
-void XEmitter::CVTPS2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5A, regOp, arg);}
-void XEmitter::CVTPD2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5A, regOp, arg);}
+void XEmitter::CVTPS2PD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, 0x5A, regOp, arg);}
+void XEmitter::CVTPD2PS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0x5A, regOp, arg);}
-void XEmitter::CVTSD2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x5A, regOp, arg);}
-void XEmitter::CVTSS2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5A, regOp, arg);}
-void XEmitter::CVTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2D, regOp, arg);}
-void XEmitter::CVTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2D, regOp, arg);}
-void XEmitter::CVTSI2SD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2A, regOp, arg);}
-void XEmitter::CVTSI2SS(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2A, regOp, arg);}
+void XEmitter::CVTSD2SS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x5A, regOp, arg);}
+void XEmitter::CVTSS2SD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x5A, regOp, arg);}
+void XEmitter::CVTSD2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2D, regOp, arg);}
+void XEmitter::CVTSS2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2D, regOp, arg);}
+void XEmitter::CVTSI2SD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2A, regOp, arg);}
+void XEmitter::CVTSI2SS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2A, regOp, arg);}
-void XEmitter::CVTDQ2PD(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0xE6, regOp, arg);}
-void XEmitter::CVTDQ2PS(X64Reg regOp, OpArg arg) {WriteSSEOp(0x00, 0x5B, regOp, arg);}
-void XEmitter::CVTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0xE6, regOp, arg);}
-void XEmitter::CVTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0x5B, regOp, arg);}
+void XEmitter::CVTDQ2PD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0xE6, regOp, arg);}
+void XEmitter::CVTDQ2PS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, 0x5B, regOp, arg);}
+void XEmitter::CVTPD2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0xE6, regOp, arg);}
+void XEmitter::CVTPS2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0x5B, regOp, arg);}
-void XEmitter::CVTTSD2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF2, 0x2C, regOp, arg);}
-void XEmitter::CVTTSS2SI(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x2C, regOp, arg);}
-void XEmitter::CVTTPS2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0xF3, 0x5B, regOp, arg);}
-void XEmitter::CVTTPD2DQ(X64Reg regOp, OpArg arg) {WriteSSEOp(0x66, 0xE6, regOp, arg);}
+void XEmitter::CVTTSD2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2C, regOp, arg);}
+void XEmitter::CVTTSS2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2C, regOp, arg);}
+void XEmitter::CVTTPS2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x5B, regOp, arg);}
+void XEmitter::CVTTPD2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0xE6, regOp, arg);}
void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(0x66, sseMASKMOVDQU, dest, R(src));}
-void XEmitter::MOVMSKPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x50, dest, arg);}
-void XEmitter::MOVMSKPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x50, dest, arg);}
+void XEmitter::MOVMSKPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x50, dest, arg);}
+void XEmitter::MOVMSKPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x50, dest, arg);}
-void XEmitter::LDDQU(X64Reg dest, OpArg arg) {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only
+void XEmitter::LDDQU(X64Reg dest, const OpArg& arg) {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only
// THESE TWO ARE UNTESTED.
-void XEmitter::UNPCKLPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x14, dest, arg);}
-void XEmitter::UNPCKHPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x15, dest, arg);}
+void XEmitter::UNPCKLPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x14, dest, arg);}
+void XEmitter::UNPCKHPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x15, dest, arg);}
-void XEmitter::UNPCKLPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x14, dest, arg);}
-void XEmitter::UNPCKHPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x15, dest, arg);}
+void XEmitter::UNPCKLPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x14, dest, arg);}
+void XEmitter::UNPCKHPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x15, dest, arg);}
-void XEmitter::MOVDDUP(X64Reg regOp, OpArg arg)
+void XEmitter::MOVDDUP(X64Reg regOp, const OpArg& arg)
{
if (Common::GetCPUCaps().sse3)
{
@@ -1663,9 +1659,9 @@ void XEmitter::MOVDDUP(X64Reg regOp, OpArg arg)
//There are a few more left
// Also some integer instructions are missing
-void XEmitter::PACKSSDW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x6B, dest, arg);}
-void XEmitter::PACKSSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x63, dest, arg);}
-void XEmitter::PACKUSWB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x67, dest, arg);}
+void XEmitter::PACKSSDW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x6B, dest, arg);}
+void XEmitter::PACKSSWB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x63, dest, arg);}
+void XEmitter::PACKUSWB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x67, dest, arg);}
void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x60, dest, arg);}
void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x61, dest, arg);}
@@ -1690,7 +1686,7 @@ void XEmitter::PSRLQ(X64Reg reg, int shift)
Write8(shift);
}
-void XEmitter::PSRLQ(X64Reg reg, OpArg arg)
+void XEmitter::PSRLQ(X64Reg reg, const OpArg& arg)
{
WriteSSEOp(0x66, 0xd3, reg, arg);
}
@@ -1735,212 +1731,212 @@ void XEmitter::PSRAD(X64Reg reg, int shift)
Write8(shift);
}
-void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes)
+void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes)
{
if (!Common::GetCPUCaps().ssse3)
ASSERT_MSG(0, "Trying to use SSSE3 on a system that doesn't support it. Bad programmer.");
WriteSSEOp(opPrefix, op, regOp, arg, extrabytes);
}
-void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes)
+void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes)
{
if (!Common::GetCPUCaps().sse4_1)
ASSERT_MSG(0, "Trying to use SSE4.1 on a system that doesn't support it. Bad programmer.");
WriteSSEOp(opPrefix, op, regOp, arg, extrabytes);
}
-void XEmitter::PSHUFB(X64Reg dest, OpArg arg) {WriteSSSE3Op(0x66, 0x3800, dest, arg);}
-void XEmitter::PTEST(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3817, dest, arg);}
-void XEmitter::PACKUSDW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x382b, dest, arg);}
-void XEmitter::DPPS(X64Reg dest, OpArg arg, u8 mask) {WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); Write8(mask);}
-
-void XEmitter::PMINSB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3838, dest, arg);}
-void XEmitter::PMINSD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3839, dest, arg);}
-void XEmitter::PMINUW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383a, dest, arg);}
-void XEmitter::PMINUD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383b, dest, arg);}
-void XEmitter::PMAXSB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383c, dest, arg);}
-void XEmitter::PMAXSD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383d, dest, arg);}
-void XEmitter::PMAXUW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383e, dest, arg);}
-void XEmitter::PMAXUD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383f, dest, arg);}
-
-void XEmitter::PMOVSXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3820, dest, arg);}
-void XEmitter::PMOVSXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3821, dest, arg);}
-void XEmitter::PMOVSXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3822, dest, arg);}
-void XEmitter::PMOVSXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3823, dest, arg);}
-void XEmitter::PMOVSXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3824, dest, arg);}
-void XEmitter::PMOVSXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3825, dest, arg);}
-void XEmitter::PMOVZXBW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3830, dest, arg);}
-void XEmitter::PMOVZXBD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3831, dest, arg);}
-void XEmitter::PMOVZXBQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3832, dest, arg);}
-void XEmitter::PMOVZXWD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3833, dest, arg);}
-void XEmitter::PMOVZXWQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3834, dest, arg);}
-void XEmitter::PMOVZXDQ(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3835, dest, arg);}
-
-void XEmitter::PBLENDVB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3810, dest, arg);}
-void XEmitter::BLENDVPS(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3814, dest, arg);}
-void XEmitter::BLENDVPD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3815, dest, arg);}
+void XEmitter::PSHUFB(X64Reg dest, const OpArg& arg) {WriteSSSE3Op(0x66, 0x3800, dest, arg);}
+void XEmitter::PTEST(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3817, dest, arg);}
+void XEmitter::PACKUSDW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x382b, dest, arg);}
+void XEmitter::DPPS(X64Reg dest, const OpArg& arg, u8 mask) {WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); Write8(mask);}
+
+void XEmitter::PMINSB(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3838, dest, arg);}
+void XEmitter::PMINSD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3839, dest, arg);}
+void XEmitter::PMINUW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383a, dest, arg);}
+void XEmitter::PMINUD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383b, dest, arg);}
+void XEmitter::PMAXSB(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383c, dest, arg);}
+void XEmitter::PMAXSD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383d, dest, arg);}
+void XEmitter::PMAXUW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383e, dest, arg);}
+void XEmitter::PMAXUD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383f, dest, arg);}
+
+void XEmitter::PMOVSXBW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3820, dest, arg);}
+void XEmitter::PMOVSXBD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3821, dest, arg);}
+void XEmitter::PMOVSXBQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3822, dest, arg);}
+void XEmitter::PMOVSXWD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3823, dest, arg);}
+void XEmitter::PMOVSXWQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3824, dest, arg);}
+void XEmitter::PMOVSXDQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3825, dest, arg);}
+void XEmitter::PMOVZXBW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3830, dest, arg);}
+void XEmitter::PMOVZXBD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3831, dest, arg);}
+void XEmitter::PMOVZXBQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3832, dest, arg);}
+void XEmitter::PMOVZXWD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3833, dest, arg);}
+void XEmitter::PMOVZXWQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3834, dest, arg);}
+void XEmitter::PMOVZXDQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3835, dest, arg);}
+
+void XEmitter::PBLENDVB(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3810, dest, arg);}
+void XEmitter::BLENDVPS(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3814, dest, arg);}
+void XEmitter::BLENDVPD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3815, dest, arg);}
void XEmitter::BLENDPS(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0C, dest, arg, 1); Write8(blend); }
void XEmitter::BLENDPD(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0D, dest, arg, 1); Write8(blend); }
-void XEmitter::ROUNDSS(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1); Write8(mode);}
-void XEmitter::ROUNDSD(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1); Write8(mode);}
-void XEmitter::ROUNDPS(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A08, dest, arg, 1); Write8(mode);}
-void XEmitter::ROUNDPD(X64Reg dest, OpArg arg, u8 mode) {WriteSSE41Op(0x66, 0x3A09, dest, arg, 1); Write8(mode);}
+void XEmitter::ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1); Write8(mode);}
+void XEmitter::ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1); Write8(mode);}
+void XEmitter::ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A08, dest, arg, 1); Write8(mode);}
+void XEmitter::ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A09, dest, arg, 1); Write8(mode);}
-void XEmitter::PAND(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDB, dest, arg);}
-void XEmitter::PANDN(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDF, dest, arg);}
-void XEmitter::PXOR(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEF, dest, arg);}
-void XEmitter::POR(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEB, dest, arg);}
+void XEmitter::PAND(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDB, dest, arg);}
+void XEmitter::PANDN(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDF, dest, arg);}
+void XEmitter::PXOR(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEF, dest, arg);}
+void XEmitter::POR(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEB, dest, arg);}
-void XEmitter::PADDB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFC, dest, arg);}
-void XEmitter::PADDW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFD, dest, arg);}
-void XEmitter::PADDD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFE, dest, arg);}
-void XEmitter::PADDQ(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD4, dest, arg);}
+void XEmitter::PADDB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFC, dest, arg);}
+void XEmitter::PADDW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFD, dest, arg);}
+void XEmitter::PADDD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFE, dest, arg);}
+void XEmitter::PADDQ(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD4, dest, arg);}
-void XEmitter::PADDSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEC, dest, arg);}
-void XEmitter::PADDSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xED, dest, arg);}
-void XEmitter::PADDUSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDC, dest, arg);}
-void XEmitter::PADDUSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDD, dest, arg);}
+void XEmitter::PADDSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEC, dest, arg);}
+void XEmitter::PADDSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xED, dest, arg);}
+void XEmitter::PADDUSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDC, dest, arg);}
+void XEmitter::PADDUSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDD, dest, arg);}
-void XEmitter::PSUBB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF8, dest, arg);}
-void XEmitter::PSUBW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF9, dest, arg);}
-void XEmitter::PSUBD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFA, dest, arg);}
-void XEmitter::PSUBQ(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xFB, dest, arg);}
+void XEmitter::PSUBB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF8, dest, arg);}
+void XEmitter::PSUBW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF9, dest, arg);}
+void XEmitter::PSUBD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFA, dest, arg);}
+void XEmitter::PSUBQ(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFB, dest, arg);}
-void XEmitter::PSUBSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE8, dest, arg);}
-void XEmitter::PSUBSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE9, dest, arg);}
-void XEmitter::PSUBUSB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD8, dest, arg);}
-void XEmitter::PSUBUSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD9, dest, arg);}
+void XEmitter::PSUBSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE8, dest, arg);}
+void XEmitter::PSUBSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE9, dest, arg);}
+void XEmitter::PSUBUSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD8, dest, arg);}
+void XEmitter::PSUBUSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD9, dest, arg);}
-void XEmitter::PAVGB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE0, dest, arg);}
-void XEmitter::PAVGW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xE3, dest, arg);}
+void XEmitter::PAVGB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE0, dest, arg);}
+void XEmitter::PAVGW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE3, dest, arg);}
-void XEmitter::PCMPEQB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x74, dest, arg);}
-void XEmitter::PCMPEQW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x75, dest, arg);}
-void XEmitter::PCMPEQD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x76, dest, arg);}
+void XEmitter::PCMPEQB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x74, dest, arg);}
+void XEmitter::PCMPEQW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x75, dest, arg);}
+void XEmitter::PCMPEQD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x76, dest, arg);}
-void XEmitter::PCMPGTB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x64, dest, arg);}
-void XEmitter::PCMPGTW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x65, dest, arg);}
-void XEmitter::PCMPGTD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x66, dest, arg);}
+void XEmitter::PCMPGTB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x64, dest, arg);}
+void XEmitter::PCMPGTW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x65, dest, arg);}
+void XEmitter::PCMPGTD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x66, dest, arg);}
-void XEmitter::PEXTRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(0x66, 0xC5, dest, arg, 1); Write8(subreg);}
-void XEmitter::PINSRW(X64Reg dest, OpArg arg, u8 subreg) {WriteSSEOp(0x66, 0xC4, dest, arg, 1); Write8(subreg);}
+void XEmitter::PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg) {WriteSSEOp(0x66, 0xC5, dest, arg, 1); Write8(subreg);}
+void XEmitter::PINSRW(X64Reg dest, const OpArg& arg, u8 subreg) {WriteSSEOp(0x66, 0xC4, dest, arg, 1); Write8(subreg);}
-void XEmitter::PMADDWD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF5, dest, arg); }
-void XEmitter::PSADBW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xF6, dest, arg);}
+void XEmitter::PMADDWD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF5, dest, arg); }
+void XEmitter::PSADBW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF6, dest, arg);}
-void XEmitter::PMAXSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEE, dest, arg); }
-void XEmitter::PMAXUB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDE, dest, arg); }
-void XEmitter::PMINSW(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xEA, dest, arg); }
-void XEmitter::PMINUB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xDA, dest, arg); }
+void XEmitter::PMAXSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEE, dest, arg); }
+void XEmitter::PMAXUB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDE, dest, arg); }
+void XEmitter::PMINSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEA, dest, arg); }
+void XEmitter::PMINUB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDA, dest, arg); }
-void XEmitter::PMOVMSKB(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0xD7, dest, arg); }
-void XEmitter::PSHUFD(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0x66, 0x70, regOp, arg, 1); Write8(shuffle);}
-void XEmitter::PSHUFLW(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0xF2, 0x70, regOp, arg, 1); Write8(shuffle);}
-void XEmitter::PSHUFHW(X64Reg regOp, OpArg arg, u8 shuffle) {WriteSSEOp(0xF3, 0x70, regOp, arg, 1); Write8(shuffle);}
+void XEmitter::PMOVMSKB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD7, dest, arg); }
+void XEmitter::PSHUFD(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0x66, 0x70, regOp, arg, 1); Write8(shuffle);}
+void XEmitter::PSHUFLW(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0xF2, 0x70, regOp, arg, 1); Write8(shuffle);}
+void XEmitter::PSHUFHW(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0xF3, 0x70, regOp, arg, 1); Write8(shuffle);}
// VEX
-void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);}
-void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);}
-void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);}
-void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);}
-void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);}
-void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);}
-void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);}
-void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);}
-void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);}
-void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle) {WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1); Write8(shuffle);}
-void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);}
-void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg){WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);}
-
-void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg); }
-void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg); }
-void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg); }
-void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg); }
-void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg); }
-void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg); }
-void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg); }
-void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg); }
-
-void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg); }
-void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg); }
-void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg); }
-void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg); }
-
-void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg); }
-void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg); }
-void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg); }
-void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg); }
-void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg); }
-void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg); }
-void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg); }
-void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg); }
-void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg); }
-void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg); }
-void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg); }
-void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg); }
-void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg); }
-void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg); }
-void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg); }
-void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg); }
-void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg); }
-void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg); }
-void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg); }
-void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg); }
-void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg); }
-void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg); }
-void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg); }
-void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg); }
-void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1); }
-void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg); }
-void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg); }
-void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg); }
-void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg); }
-void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg); }
-void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg); }
-void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1); }
-void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1); }
-
-void XEmitter::SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);}
-void XEmitter::SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);}
-void XEmitter::SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);}
-void XEmitter::RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate) {WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); Write8(rotate);}
-void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);}
-void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);}
-void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);}
-void XEmitter::BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);}
-void XEmitter::BLSR(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);}
-void XEmitter::BLSMSK(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);}
-void XEmitter::BLSI(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);}
-void XEmitter::BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2){WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);}
-void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);}
+void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);}
+void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);}
+void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);}
+void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);}
+void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);}
+void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);}
+void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);}
+void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);}
+void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);}
+void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle) {WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1); Write8(shuffle);}
+void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg){WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);}
+void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg){WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);}
+
+void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg); }
+void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg); }
+void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg); }
+void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg); }
+void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg); }
+void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg); }
+void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg); }
+void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg); }
+
+void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg); }
+void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg); }
+void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg); }
+void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg); }
+
+void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg); }
+void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg); }
+void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg); }
+void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg); }
+void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg); }
+void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg); }
+void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg); }
+void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg); }
+void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg); }
+void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg); }
+void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg); }
+void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg); }
+void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg); }
+void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg); }
+void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg); }
+void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg); }
+void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg); }
+void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg); }
+void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg); }
+void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg); }
+void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg); }
+void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg); }
+void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg); }
+void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg); }
+void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1); }
+void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg); }
+void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg); }
+void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg); }
+void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg); }
+void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg); }
+void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg); }
+void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1); }
+void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1); }
+
+void XEmitter::SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);}
+void XEmitter::SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);}
+void XEmitter::SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);}
+void XEmitter::RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate) {WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); Write8(rotate);}
+void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);}
+void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);}
+void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);}
+void XEmitter::BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);}
+void XEmitter::BLSR(int bits, X64Reg regOp, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);}
+void XEmitter::BLSMSK(int bits, X64Reg regOp, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);}
+void XEmitter::BLSI(int bits, X64Reg regOp, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);}
+void XEmitter::BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2){WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);}
+void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);}
// Prefixes
@@ -1956,7 +1952,7 @@ void XEmitter::FWAIT()
}
// TODO: make this more generic
-void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg)
+void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg)
{
int mf = 0;
ASSERT_MSG(!(bits == 80 && op_80b == floatINVALID), "WriteFloatLoadStore: 80 bits not supported for this instruction");
@@ -1974,9 +1970,9 @@ void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg a
arg.WriteRest(this, 0, (X64Reg) op);
}
-void XEmitter::FLD(int bits, OpArg src) {WriteFloatLoadStore(bits, floatLD, floatLD80, src);}
-void XEmitter::FST(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatST, floatINVALID, dest);}
-void XEmitter::FSTP(int bits, OpArg dest) {WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);}
+void XEmitter::FLD(int bits, const OpArg& src) {WriteFloatLoadStore(bits, floatLD, floatLD80, src);}
+void XEmitter::FST(int bits, const OpArg& dest) {WriteFloatLoadStore(bits, floatST, floatINVALID, dest);}
+void XEmitter::FSTP(int bits, const OpArg& dest) {WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);}
void XEmitter::FNSTSW_AX() { Write8(0xDF); Write8(0xE0); }
void XEmitter::RDTSC() { Write8(0x0F); Write8(0x31); }
diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h
index e9c924126..a49cd2cf1 100644
--- a/src/common/x64/emitter.h
+++ b/src/common/x64/emitter.h
@@ -328,8 +328,6 @@ enum SSECompare
ORD,
};
-typedef const u8* JumpTarget;
-
class XEmitter
{
friend struct OpArg; // for Write8 etc
@@ -344,27 +342,27 @@ private:
void WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg);
void WriteMulDivType(int bits, OpArg src, int ext);
void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep = false);
- void WriteShift(int bits, OpArg dest, OpArg &shift, int ext);
- void WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext);
+ void WriteShift(int bits, OpArg dest, const OpArg& shift, int ext);
+ void WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext);
void WriteMXCSR(OpArg arg, int ext);
void WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
- void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
- void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
- void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes = 0);
- void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
- void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
- void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
- void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
- void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg);
- void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2);
+ void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
+ void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
+ void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0);
+ void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
+ void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
+ void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
+ void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0);
+ void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg);
+ void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2);
void ABI_CalculateFrameSize(u32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp);
protected:
- inline void Write8(u8 value) {*code++ = value;}
- inline void Write16(u16 value) {*(u16*)code = (value); code += 2;}
- inline void Write32(u32 value) {*(u32*)code = (value); code += 4;}
- inline void Write64(u64 value) {*(u64*)code = (value); code += 8;}
+ void Write8(u8 value);
+ void Write16(u16 value);
+ void Write32(u32 value);
+ void Write64(u64 value);
public:
XEmitter() { code = nullptr; flags_locked = false; }
@@ -413,8 +411,8 @@ public:
// Stack control
void PUSH(X64Reg reg);
void POP(X64Reg reg);
- void PUSH(int bits, const OpArg &reg);
- void POP(int bits, const OpArg &reg);
+ void PUSH(int bits, const OpArg& reg);
+ void POP(int bits, const OpArg& reg);
void PUSHF();
void POPF();
@@ -424,21 +422,19 @@ public:
void UD2();
FixupBranch J(bool force5bytes = false);
- void JMP(const u8 * addr, bool force5Bytes = false);
- void JMP(OpArg arg);
- void JMPptr(const OpArg &arg);
+ void JMP(const u8* addr, bool force5Bytes = false);
+ void JMPptr(const OpArg& arg);
void JMPself(); //infinite loop!
#ifdef CALL
#undef CALL
#endif
- void CALL(const void *fnptr);
+ void CALL(const void* fnptr);
void CALLptr(OpArg arg);
FixupBranch J_CC(CCFlags conditionCode, bool force5bytes = false);
- //void J_CC(CCFlags conditionCode, JumpTarget target);
- void J_CC(CCFlags conditionCode, const u8 * addr, bool force5Bytes = false);
+ void J_CC(CCFlags conditionCode, const u8* addr, bool force5Bytes = false);
- void SetJumpTarget(const FixupBranch &branch);
+ void SetJumpTarget(const FixupBranch& branch);
void SETcc(CCFlags flag, OpArg dest);
// Note: CMOV brings small if any benefit on current cpus.
@@ -450,8 +446,8 @@ public:
void SFENCE();
// Bit scan
- void BSF(int bits, X64Reg dest, OpArg src); //bottom bit to top bit
- void BSR(int bits, X64Reg dest, OpArg src); //top bit to bottom bit
+ void BSF(int bits, X64Reg dest, const OpArg& src); // Bottom bit to top bit
+ void BSR(int bits, X64Reg dest, const OpArg& src); // Top bit to bottom bit
// Cache control
enum PrefetchLevel
@@ -462,67 +458,67 @@ public:
PF_T2, //Levels 3+ (aliased to T0 on AMD)
};
void PREFETCH(PrefetchLevel level, OpArg arg);
- void MOVNTI(int bits, OpArg dest, X64Reg src);
- void MOVNTDQ(OpArg arg, X64Reg regOp);
- void MOVNTPS(OpArg arg, X64Reg regOp);
- void MOVNTPD(OpArg arg, X64Reg regOp);
+ void MOVNTI(int bits, const OpArg& dest, X64Reg src);
+ void MOVNTDQ(const OpArg& arg, X64Reg regOp);
+ void MOVNTPS(const OpArg& arg, X64Reg regOp);
+ void MOVNTPD(const OpArg& arg, X64Reg regOp);
// Multiplication / division
- void MUL(int bits, OpArg src); //UNSIGNED
- void IMUL(int bits, OpArg src); //SIGNED
- void IMUL(int bits, X64Reg regOp, OpArg src);
- void IMUL(int bits, X64Reg regOp, OpArg src, OpArg imm);
- void DIV(int bits, OpArg src);
- void IDIV(int bits, OpArg src);
+ void MUL(int bits, const OpArg& src); //UNSIGNED
+ void IMUL(int bits, const OpArg& src); //SIGNED
+ void IMUL(int bits, X64Reg regOp, const OpArg& src);
+ void IMUL(int bits, X64Reg regOp, const OpArg& src, const OpArg& imm);
+ void DIV(int bits, const OpArg& src);
+ void IDIV(int bits, const OpArg& src);
// Shift
- void ROL(int bits, OpArg dest, OpArg shift);
- void ROR(int bits, OpArg dest, OpArg shift);
- void RCL(int bits, OpArg dest, OpArg shift);
- void RCR(int bits, OpArg dest, OpArg shift);
- void SHL(int bits, OpArg dest, OpArg shift);
- void SHR(int bits, OpArg dest, OpArg shift);
- void SAR(int bits, OpArg dest, OpArg shift);
+ void ROL(int bits, const OpArg& dest, const OpArg& shift);
+ void ROR(int bits, const OpArg& dest, const OpArg& shift);
+ void RCL(int bits, const OpArg& dest, const OpArg& shift);
+ void RCR(int bits, const OpArg& dest, const OpArg& shift);
+ void SHL(int bits, const OpArg& dest, const OpArg& shift);
+ void SHR(int bits, const OpArg& dest, const OpArg& shift);
+ void SAR(int bits, const OpArg& dest, const OpArg& shift);
// Bit Test
- void BT(int bits, OpArg dest, OpArg index);
- void BTS(int bits, OpArg dest, OpArg index);
- void BTR(int bits, OpArg dest, OpArg index);
- void BTC(int bits, OpArg dest, OpArg index);
+ void BT(int bits, const OpArg& dest, const OpArg& index);
+ void BTS(int bits, const OpArg& dest, const OpArg& index);
+ void BTR(int bits, const OpArg& dest, const OpArg& index);
+ void BTC(int bits, const OpArg& dest, const OpArg& index);
// Double-Precision Shift
- void SHRD(int bits, OpArg dest, OpArg src, OpArg shift);
- void SHLD(int bits, OpArg dest, OpArg src, OpArg shift);
+ void SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift);
+ void SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift);
// Extend EAX into EDX in various ways
void CWD(int bits = 16);
- inline void CDQ() {CWD(32);}
- inline void CQO() {CWD(64);}
+ void CDQ() {CWD(32);}
+ void CQO() {CWD(64);}
void CBW(int bits = 8);
- inline void CWDE() {CBW(16);}
- inline void CDQE() {CBW(32);}
+ void CWDE() {CBW(16);}
+ void CDQE() {CBW(32);}
// Load effective address
void LEA(int bits, X64Reg dest, OpArg src);
// Integer arithmetic
- void NEG (int bits, OpArg src);
- void ADD (int bits, const OpArg &a1, const OpArg &a2);
- void ADC (int bits, const OpArg &a1, const OpArg &a2);
- void SUB (int bits, const OpArg &a1, const OpArg &a2);
- void SBB (int bits, const OpArg &a1, const OpArg &a2);
- void AND (int bits, const OpArg &a1, const OpArg &a2);
- void CMP (int bits, const OpArg &a1, const OpArg &a2);
+ void NEG(int bits, const OpArg& src);
+ void ADD(int bits, const OpArg& a1, const OpArg& a2);
+ void ADC(int bits, const OpArg& a1, const OpArg& a2);
+ void SUB(int bits, const OpArg& a1, const OpArg& a2);
+ void SBB(int bits, const OpArg& a1, const OpArg& a2);
+ void AND(int bits, const OpArg& a1, const OpArg& a2);
+ void CMP(int bits, const OpArg& a1, const OpArg& a2);
// Bit operations
- void NOT (int bits, OpArg src);
- void OR (int bits, const OpArg &a1, const OpArg &a2);
- void XOR (int bits, const OpArg &a1, const OpArg &a2);
- void MOV (int bits, const OpArg &a1, const OpArg &a2);
- void TEST(int bits, const OpArg &a1, const OpArg &a2);
+ void NOT (int bits, const OpArg& src);
+ void OR(int bits, const OpArg& a1, const OpArg& a2);
+ void XOR(int bits, const OpArg& a1, const OpArg& a2);
+ void MOV(int bits, const OpArg& a1, const OpArg& a2);
+ void TEST(int bits, const OpArg& a1, const OpArg& a2);
// Are these useful at all? Consider removing.
- void XCHG(int bits, const OpArg &a1, const OpArg &a2);
+ void XCHG(int bits, const OpArg& a1, const OpArg& a2);
void XCHG_AHAL();
// Byte swapping (32 and 64-bit only).
@@ -536,13 +532,13 @@ public:
void MOVBE(int dbits, const OpArg& dest, const OpArg& src);
// Available only on AMD >= Phenom or Intel >= Haswell
- void LZCNT(int bits, X64Reg dest, OpArg src);
+ void LZCNT(int bits, X64Reg dest, const OpArg& src);
// Note: this one is actually part of BMI1
- void TZCNT(int bits, X64Reg dest, OpArg src);
+ void TZCNT(int bits, X64Reg dest, const OpArg& src);
// WARNING - These two take 11-13 cycles and are VectorPath! (AMD64)
- void STMXCSR(OpArg memloc);
- void LDMXCSR(OpArg memloc);
+ void STMXCSR(const OpArg& memloc);
+ void LDMXCSR(const OpArg& memloc);
// Prefixes
void LOCK();
@@ -569,259 +565,243 @@ public:
x87_FPUBusy = 0x8000,
};
- void FLD(int bits, OpArg src);
- void FST(int bits, OpArg dest);
- void FSTP(int bits, OpArg dest);
+ void FLD(int bits, const OpArg& src);
+ void FST(int bits, const OpArg& dest);
+ void FSTP(int bits, const OpArg& dest);
void FNSTSW_AX();
void FWAIT();
// SSE/SSE2: Floating point arithmetic
- void ADDSS(X64Reg regOp, OpArg arg);
- void ADDSD(X64Reg regOp, OpArg arg);
- void SUBSS(X64Reg regOp, OpArg arg);
- void SUBSD(X64Reg regOp, OpArg arg);
- void MULSS(X64Reg regOp, OpArg arg);
- void MULSD(X64Reg regOp, OpArg arg);
- void DIVSS(X64Reg regOp, OpArg arg);
- void DIVSD(X64Reg regOp, OpArg arg);
- void MINSS(X64Reg regOp, OpArg arg);
- void MINSD(X64Reg regOp, OpArg arg);
- void MAXSS(X64Reg regOp, OpArg arg);
- void MAXSD(X64Reg regOp, OpArg arg);
- void SQRTSS(X64Reg regOp, OpArg arg);
- void SQRTSD(X64Reg regOp, OpArg arg);
- void RSQRTSS(X64Reg regOp, OpArg arg);
+ void ADDSS(X64Reg regOp, const OpArg& arg);
+ void ADDSD(X64Reg regOp, const OpArg& arg);
+ void SUBSS(X64Reg regOp, const OpArg& arg);
+ void SUBSD(X64Reg regOp, const OpArg& arg);
+ void MULSS(X64Reg regOp, const OpArg& arg);
+ void MULSD(X64Reg regOp, const OpArg& arg);
+ void DIVSS(X64Reg regOp, const OpArg& arg);
+ void DIVSD(X64Reg regOp, const OpArg& arg);
+ void MINSS(X64Reg regOp, const OpArg& arg);
+ void MINSD(X64Reg regOp, const OpArg& arg);
+ void MAXSS(X64Reg regOp, const OpArg& arg);
+ void MAXSD(X64Reg regOp, const OpArg& arg);
+ void SQRTSS(X64Reg regOp, const OpArg& arg);
+ void SQRTSD(X64Reg regOp, const OpArg& arg);
+ void RCPSS(X64Reg regOp, const OpArg& arg);
+ void RSQRTSS(X64Reg regOp, const OpArg& arg);
// SSE/SSE2: Floating point bitwise (yes)
- void CMPSS(X64Reg regOp, OpArg arg, u8 compare);
- void CMPSD(X64Reg regOp, OpArg arg, u8 compare);
+ void CMPSS(X64Reg regOp, const OpArg& arg, u8 compare);
+ void CMPSD(X64Reg regOp, const OpArg& arg, u8 compare);
- inline void CMPEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_EQ); }
- inline void CMPLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LT); }
- inline void CMPLESS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_LE); }
- inline void CMPUNORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_UNORD); }
- inline void CMPNEQSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NEQ); }
- inline void CMPNLTSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_NLT); }
- inline void CMPORDSS(X64Reg regOp, OpArg arg) { CMPSS(regOp, arg, CMP_ORD); }
+ void CMPEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_EQ); }
+ void CMPLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LT); }
+ void CMPLESS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LE); }
+ void CMPUNORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_UNORD); }
+ void CMPNEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NEQ); }
+ void CMPNLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NLT); }
+ void CMPORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_ORD); }
// SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double)
- void ADDPS(X64Reg regOp, OpArg arg);
- void ADDPD(X64Reg regOp, OpArg arg);
- void SUBPS(X64Reg regOp, OpArg arg);
- void SUBPD(X64Reg regOp, OpArg arg);
- void CMPPS(X64Reg regOp, OpArg arg, u8 compare);
- void CMPPD(X64Reg regOp, OpArg arg, u8 compare);
- void MULPS(X64Reg regOp, OpArg arg);
- void MULPD(X64Reg regOp, OpArg arg);
- void DIVPS(X64Reg regOp, OpArg arg);
- void DIVPD(X64Reg regOp, OpArg arg);
- void MINPS(X64Reg regOp, OpArg arg);
- void MINPD(X64Reg regOp, OpArg arg);
- void MAXPS(X64Reg regOp, OpArg arg);
- void MAXPD(X64Reg regOp, OpArg arg);
- void SQRTPS(X64Reg regOp, OpArg arg);
- void SQRTPD(X64Reg regOp, OpArg arg);
- void RCPPS(X64Reg regOp, OpArg arg);
- void RSQRTPS(X64Reg regOp, OpArg arg);
+ void ADDPS(X64Reg regOp, const OpArg& arg);
+ void ADDPD(X64Reg regOp, const OpArg& arg);
+ void SUBPS(X64Reg regOp, const OpArg& arg);
+ void SUBPD(X64Reg regOp, const OpArg& arg);
+ void CMPPS(X64Reg regOp, const OpArg& arg, u8 compare);
+ void CMPPD(X64Reg regOp, const OpArg& arg, u8 compare);
+ void MULPS(X64Reg regOp, const OpArg& arg);
+ void MULPD(X64Reg regOp, const OpArg& arg);
+ void DIVPS(X64Reg regOp, const OpArg& arg);
+ void DIVPD(X64Reg regOp, const OpArg& arg);
+ void MINPS(X64Reg regOp, const OpArg& arg);
+ void MINPD(X64Reg regOp, const OpArg& arg);
+ void MAXPS(X64Reg regOp, const OpArg& arg);
+ void MAXPD(X64Reg regOp, const OpArg& arg);
+ void SQRTPS(X64Reg regOp, const OpArg& arg);
+ void SQRTPD(X64Reg regOp, const OpArg& arg);
+ void RCPPS(X64Reg regOp, const OpArg& arg);
+ void RSQRTPS(X64Reg regOp, const OpArg& arg);
// SSE/SSE2: Floating point packed bitwise (x4 for float, x2 for double)
- void ANDPS(X64Reg regOp, OpArg arg);
- void ANDPD(X64Reg regOp, OpArg arg);
- void ANDNPS(X64Reg regOp, OpArg arg);
- void ANDNPD(X64Reg regOp, OpArg arg);
- void ORPS(X64Reg regOp, OpArg arg);
- void ORPD(X64Reg regOp, OpArg arg);
- void XORPS(X64Reg regOp, OpArg arg);
- void XORPD(X64Reg regOp, OpArg arg);
+ void ANDPS(X64Reg regOp, const OpArg& arg);
+ void ANDPD(X64Reg regOp, const OpArg& arg);
+ void ANDNPS(X64Reg regOp, const OpArg& arg);
+ void ANDNPD(X64Reg regOp, const OpArg& arg);
+ void ORPS(X64Reg regOp, const OpArg& arg);
+ void ORPD(X64Reg regOp, const OpArg& arg);
+ void XORPS(X64Reg regOp, const OpArg& arg);
+ void XORPD(X64Reg regOp, const OpArg& arg);
// SSE/SSE2: Shuffle components. These are tricky - see Intel documentation.
- void SHUFPS(X64Reg regOp, OpArg arg, u8 shuffle);
- void SHUFPD(X64Reg regOp, OpArg arg, u8 shuffle);
+ void SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle);
+ void SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle);
// SSE/SSE2: Useful alternative to shuffle in some cases.
- void MOVDDUP(X64Reg regOp, OpArg arg);
-
- // TODO: Actually implement
-#if 0
- // SSE3: Horizontal operations in SIMD registers. Could be useful for various VFPU things like dot products...
- void ADDSUBPS(X64Reg dest, OpArg src);
- void ADDSUBPD(X64Reg dest, OpArg src);
- void HADDPD(X64Reg dest, OpArg src);
- void HSUBPS(X64Reg dest, OpArg src);
- void HSUBPD(X64Reg dest, OpArg src);
-
- // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask".
- void DPPD(X64Reg dest, OpArg src, u8 arg);
-
- // These are probably useful for VFPU emulation.
- void INSERTPS(X64Reg dest, OpArg src, u8 arg);
- void EXTRACTPS(OpArg dest, X64Reg src, u8 arg);
-#endif
+ void MOVDDUP(X64Reg regOp, const OpArg& arg);
// SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily on Ivy.
- void HADDPS(X64Reg dest, OpArg src);
+ void HADDPS(X64Reg dest, const OpArg& src);
// SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask".
- void DPPS(X64Reg dest, OpArg src, u8 arg);
+ void DPPS(X64Reg dest, const OpArg& src, u8 arg);
- void UNPCKLPS(X64Reg dest, OpArg src);
- void UNPCKHPS(X64Reg dest, OpArg src);
- void UNPCKLPD(X64Reg dest, OpArg src);
- void UNPCKHPD(X64Reg dest, OpArg src);
+ void UNPCKLPS(X64Reg dest, const OpArg& src);
+ void UNPCKHPS(X64Reg dest, const OpArg& src);
+ void UNPCKLPD(X64Reg dest, const OpArg& src);
+ void UNPCKHPD(X64Reg dest, const OpArg& src);
// SSE/SSE2: Compares.
- void COMISS(X64Reg regOp, OpArg arg);
- void COMISD(X64Reg regOp, OpArg arg);
- void UCOMISS(X64Reg regOp, OpArg arg);
- void UCOMISD(X64Reg regOp, OpArg arg);
+ void COMISS(X64Reg regOp, const OpArg& arg);
+ void COMISD(X64Reg regOp, const OpArg& arg);
+ void UCOMISS(X64Reg regOp, const OpArg& arg);
+ void UCOMISD(X64Reg regOp, const OpArg& arg);
// SSE/SSE2: Moves. Use the right data type for your data, in most cases.
- void MOVAPS(X64Reg regOp, OpArg arg);
- void MOVAPD(X64Reg regOp, OpArg arg);
- void MOVAPS(OpArg arg, X64Reg regOp);
- void MOVAPD(OpArg arg, X64Reg regOp);
-
- void MOVUPS(X64Reg regOp, OpArg arg);
- void MOVUPD(X64Reg regOp, OpArg arg);
- void MOVUPS(OpArg arg, X64Reg regOp);
- void MOVUPD(OpArg arg, X64Reg regOp);
-
- void MOVDQA(X64Reg regOp, OpArg arg);
- void MOVDQA(OpArg arg, X64Reg regOp);
- void MOVDQU(X64Reg regOp, OpArg arg);
- void MOVDQU(OpArg arg, X64Reg regOp);
-
- void MOVSS(X64Reg regOp, OpArg arg);
- void MOVSD(X64Reg regOp, OpArg arg);
- void MOVSS(OpArg arg, X64Reg regOp);
- void MOVSD(OpArg arg, X64Reg regOp);
-
- void MOVLPS(X64Reg regOp, OpArg arg);
- void MOVLPD(X64Reg regOp, OpArg arg);
- void MOVLPS(OpArg arg, X64Reg regOp);
- void MOVLPD(OpArg arg, X64Reg regOp);
-
- void MOVHPS(X64Reg regOp, OpArg arg);
- void MOVHPD(X64Reg regOp, OpArg arg);
- void MOVHPS(OpArg arg, X64Reg regOp);
- void MOVHPD(OpArg arg, X64Reg regOp);
+ void MOVAPS(X64Reg regOp, const OpArg& arg);
+ void MOVAPD(X64Reg regOp, const OpArg& arg);
+ void MOVAPS(const OpArg& arg, X64Reg regOp);
+ void MOVAPD(const OpArg& arg, X64Reg regOp);
+
+ void MOVUPS(X64Reg regOp, const OpArg& arg);
+ void MOVUPD(X64Reg regOp, const OpArg& arg);
+ void MOVUPS(const OpArg& arg, X64Reg regOp);
+ void MOVUPD(const OpArg& arg, X64Reg regOp);
+
+ void MOVDQA(X64Reg regOp, const OpArg& arg);
+ void MOVDQA(const OpArg& arg, X64Reg regOp);
+ void MOVDQU(X64Reg regOp, const OpArg& arg);
+ void MOVDQU(const OpArg& arg, X64Reg regOp);
+
+ void MOVSS(X64Reg regOp, const OpArg& arg);
+ void MOVSD(X64Reg regOp, const OpArg& arg);
+ void MOVSS(const OpArg& arg, X64Reg regOp);
+ void MOVSD(const OpArg& arg, X64Reg regOp);
+
+ void MOVLPS(X64Reg regOp, const OpArg& arg);
+ void MOVLPD(X64Reg regOp, const OpArg& arg);
+ void MOVLPS(const OpArg& arg, X64Reg regOp);
+ void MOVLPD(const OpArg& arg, X64Reg regOp);
+
+ void MOVHPS(X64Reg regOp, const OpArg& arg);
+ void MOVHPD(X64Reg regOp, const OpArg& arg);
+ void MOVHPS(const OpArg& arg, X64Reg regOp);
+ void MOVHPD(const OpArg& arg, X64Reg regOp);
void MOVHLPS(X64Reg regOp1, X64Reg regOp2);
void MOVLHPS(X64Reg regOp1, X64Reg regOp2);
- void MOVD_xmm(X64Reg dest, const OpArg &arg);
+ void MOVD_xmm(X64Reg dest, const OpArg& arg);
void MOVQ_xmm(X64Reg dest, OpArg arg);
- void MOVD_xmm(const OpArg &arg, X64Reg src);
+ void MOVD_xmm(const OpArg& arg, X64Reg src);
void MOVQ_xmm(OpArg arg, X64Reg src);
// SSE/SSE2: Generates a mask from the high bits of the components of the packed register in question.
- void MOVMSKPS(X64Reg dest, OpArg arg);
- void MOVMSKPD(X64Reg dest, OpArg arg);
+ void MOVMSKPS(X64Reg dest, const OpArg& arg);
+ void MOVMSKPD(X64Reg dest, const OpArg& arg);
// SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a weird one.
void MASKMOVDQU(X64Reg dest, X64Reg src);
- void LDDQU(X64Reg dest, OpArg src);
+ void LDDQU(X64Reg dest, const OpArg& src);
// SSE/SSE2: Data type conversions.
- void CVTPS2PD(X64Reg dest, OpArg src);
- void CVTPD2PS(X64Reg dest, OpArg src);
- void CVTSS2SD(X64Reg dest, OpArg src);
- void CVTSI2SS(X64Reg dest, OpArg src);
- void CVTSD2SS(X64Reg dest, OpArg src);
- void CVTSI2SD(X64Reg dest, OpArg src);
- void CVTDQ2PD(X64Reg regOp, OpArg arg);
- void CVTPD2DQ(X64Reg regOp, OpArg arg);
- void CVTDQ2PS(X64Reg regOp, OpArg arg);
- void CVTPS2DQ(X64Reg regOp, OpArg arg);
-
- void CVTTPS2DQ(X64Reg regOp, OpArg arg);
- void CVTTPD2DQ(X64Reg regOp, OpArg arg);
+ void CVTPS2PD(X64Reg dest, const OpArg& src);
+ void CVTPD2PS(X64Reg dest, const OpArg& src);
+ void CVTSS2SD(X64Reg dest, const OpArg& src);
+ void CVTSI2SS(X64Reg dest, const OpArg& src);
+ void CVTSD2SS(X64Reg dest, const OpArg& src);
+ void CVTSI2SD(X64Reg dest, const OpArg& src);
+ void CVTDQ2PD(X64Reg regOp, const OpArg& arg);
+ void CVTPD2DQ(X64Reg regOp, const OpArg& arg);
+ void CVTDQ2PS(X64Reg regOp, const OpArg& arg);
+ void CVTPS2DQ(X64Reg regOp, const OpArg& arg);
+
+ void CVTTPS2DQ(X64Reg regOp, const OpArg& arg);
+ void CVTTPD2DQ(X64Reg regOp, const OpArg& arg);
// Destinations are X64 regs (rax, rbx, ...) for these instructions.
- void CVTSS2SI(X64Reg xregdest, OpArg src);
- void CVTSD2SI(X64Reg xregdest, OpArg src);
- void CVTTSS2SI(X64Reg xregdest, OpArg arg);
- void CVTTSD2SI(X64Reg xregdest, OpArg arg);
+ void CVTSS2SI(X64Reg xregdest, const OpArg& src);
+ void CVTSD2SI(X64Reg xregdest, const OpArg& src);
+ void CVTTSS2SI(X64Reg xregdest, const OpArg& arg);
+ void CVTTSD2SI(X64Reg xregdest, const OpArg& arg);
// SSE2: Packed integer instructions
- void PACKSSDW(X64Reg dest, OpArg arg);
- void PACKSSWB(X64Reg dest, OpArg arg);
- void PACKUSDW(X64Reg dest, OpArg arg);
- void PACKUSWB(X64Reg dest, OpArg arg);
+ void PACKSSDW(X64Reg dest, const OpArg& arg);
+ void PACKSSWB(X64Reg dest, const OpArg& arg);
+ void PACKUSDW(X64Reg dest, const OpArg& arg);
+ void PACKUSWB(X64Reg dest, const OpArg& arg);
void PUNPCKLBW(X64Reg dest, const OpArg &arg);
void PUNPCKLWD(X64Reg dest, const OpArg &arg);
void PUNPCKLDQ(X64Reg dest, const OpArg &arg);
void PUNPCKLQDQ(X64Reg dest, const OpArg &arg);
- void PTEST(X64Reg dest, OpArg arg);
- void PAND(X64Reg dest, OpArg arg);
- void PANDN(X64Reg dest, OpArg arg);
- void PXOR(X64Reg dest, OpArg arg);
- void POR(X64Reg dest, OpArg arg);
-
- void PADDB(X64Reg dest, OpArg arg);
- void PADDW(X64Reg dest, OpArg arg);
- void PADDD(X64Reg dest, OpArg arg);
- void PADDQ(X64Reg dest, OpArg arg);
-
- void PADDSB(X64Reg dest, OpArg arg);
- void PADDSW(X64Reg dest, OpArg arg);
- void PADDUSB(X64Reg dest, OpArg arg);
- void PADDUSW(X64Reg dest, OpArg arg);
-
- void PSUBB(X64Reg dest, OpArg arg);
- void PSUBW(X64Reg dest, OpArg arg);
- void PSUBD(X64Reg dest, OpArg arg);
- void PSUBQ(X64Reg dest, OpArg arg);
-
- void PSUBSB(X64Reg dest, OpArg arg);
- void PSUBSW(X64Reg dest, OpArg arg);
- void PSUBUSB(X64Reg dest, OpArg arg);
- void PSUBUSW(X64Reg dest, OpArg arg);
-
- void PAVGB(X64Reg dest, OpArg arg);
- void PAVGW(X64Reg dest, OpArg arg);
-
- void PCMPEQB(X64Reg dest, OpArg arg);
- void PCMPEQW(X64Reg dest, OpArg arg);
- void PCMPEQD(X64Reg dest, OpArg arg);
-
- void PCMPGTB(X64Reg dest, OpArg arg);
- void PCMPGTW(X64Reg dest, OpArg arg);
- void PCMPGTD(X64Reg dest, OpArg arg);
-
- void PEXTRW(X64Reg dest, OpArg arg, u8 subreg);
- void PINSRW(X64Reg dest, OpArg arg, u8 subreg);
-
- void PMADDWD(X64Reg dest, OpArg arg);
- void PSADBW(X64Reg dest, OpArg arg);
-
- void PMAXSW(X64Reg dest, OpArg arg);
- void PMAXUB(X64Reg dest, OpArg arg);
- void PMINSW(X64Reg dest, OpArg arg);
- void PMINUB(X64Reg dest, OpArg arg);
+ void PTEST(X64Reg dest, const OpArg& arg);
+ void PAND(X64Reg dest, const OpArg& arg);
+ void PANDN(X64Reg dest, const OpArg& arg);
+ void PXOR(X64Reg dest, const OpArg& arg);
+ void POR(X64Reg dest, const OpArg& arg);
+
+ void PADDB(X64Reg dest, const OpArg& arg);
+ void PADDW(X64Reg dest, const OpArg& arg);
+ void PADDD(X64Reg dest, const OpArg& arg);
+ void PADDQ(X64Reg dest, const OpArg& arg);
+
+ void PADDSB(X64Reg dest, const OpArg& arg);
+ void PADDSW(X64Reg dest, const OpArg& arg);
+ void PADDUSB(X64Reg dest, const OpArg& arg);
+ void PADDUSW(X64Reg dest, const OpArg& arg);
+
+ void PSUBB(X64Reg dest, const OpArg& arg);
+ void PSUBW(X64Reg dest, const OpArg& arg);
+ void PSUBD(X64Reg dest, const OpArg& arg);
+ void PSUBQ(X64Reg dest, const OpArg& arg);
+
+ void PSUBSB(X64Reg dest, const OpArg& arg);
+ void PSUBSW(X64Reg dest, const OpArg& arg);
+ void PSUBUSB(X64Reg dest, const OpArg& arg);
+ void PSUBUSW(X64Reg dest, const OpArg& arg);
+
+ void PAVGB(X64Reg dest, const OpArg& arg);
+ void PAVGW(X64Reg dest, const OpArg& arg);
+
+ void PCMPEQB(X64Reg dest, const OpArg& arg);
+ void PCMPEQW(X64Reg dest, const OpArg& arg);
+ void PCMPEQD(X64Reg dest, const OpArg& arg);
+
+ void PCMPGTB(X64Reg dest, const OpArg& arg);
+ void PCMPGTW(X64Reg dest, const OpArg& arg);
+ void PCMPGTD(X64Reg dest, const OpArg& arg);
+
+ void PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg);
+ void PINSRW(X64Reg dest, const OpArg& arg, u8 subreg);
+
+ void PMADDWD(X64Reg dest, const OpArg& arg);
+ void PSADBW(X64Reg dest, const OpArg& arg);
+
+ void PMAXSW(X64Reg dest, const OpArg& arg);
+ void PMAXUB(X64Reg dest, const OpArg& arg);
+ void PMINSW(X64Reg dest, const OpArg& arg);
+ void PMINUB(X64Reg dest, const OpArg& arg);
// SSE4: More MAX/MIN instructions.
- void PMINSB(X64Reg dest, OpArg arg);
- void PMINSD(X64Reg dest, OpArg arg);
- void PMINUW(X64Reg dest, OpArg arg);
- void PMINUD(X64Reg dest, OpArg arg);
- void PMAXSB(X64Reg dest, OpArg arg);
- void PMAXSD(X64Reg dest, OpArg arg);
- void PMAXUW(X64Reg dest, OpArg arg);
- void PMAXUD(X64Reg dest, OpArg arg);
-
- void PMOVMSKB(X64Reg dest, OpArg arg);
- void PSHUFD(X64Reg dest, OpArg arg, u8 shuffle);
- void PSHUFB(X64Reg dest, OpArg arg);
-
- void PSHUFLW(X64Reg dest, OpArg arg, u8 shuffle);
- void PSHUFHW(X64Reg dest, OpArg arg, u8 shuffle);
+ void PMINSB(X64Reg dest, const OpArg& arg);
+ void PMINSD(X64Reg dest, const OpArg& arg);
+ void PMINUW(X64Reg dest, const OpArg& arg);
+ void PMINUD(X64Reg dest, const OpArg& arg);
+ void PMAXSB(X64Reg dest, const OpArg& arg);
+ void PMAXSD(X64Reg dest, const OpArg& arg);
+ void PMAXUW(X64Reg dest, const OpArg& arg);
+ void PMAXUD(X64Reg dest, const OpArg& arg);
+
+ void PMOVMSKB(X64Reg dest, const OpArg& arg);
+ void PSHUFD(X64Reg dest, const OpArg& arg, u8 shuffle);
+ void PSHUFB(X64Reg dest, const OpArg& arg);
+
+ void PSHUFLW(X64Reg dest, const OpArg& arg, u8 shuffle);
+ void PSHUFHW(X64Reg dest, const OpArg& arg, u8 shuffle);
void PSRLW(X64Reg reg, int shift);
void PSRLD(X64Reg reg, int shift);
void PSRLQ(X64Reg reg, int shift);
- void PSRLQ(X64Reg reg, OpArg arg);
+ void PSRLQ(X64Reg reg, const OpArg& arg);
void PSRLDQ(X64Reg reg, int shift);
void PSLLW(X64Reg reg, int shift);
@@ -833,198 +813,198 @@ public:
void PSRAD(X64Reg reg, int shift);
// SSE4: data type conversions
- void PMOVSXBW(X64Reg dest, OpArg arg);
- void PMOVSXBD(X64Reg dest, OpArg arg);
- void PMOVSXBQ(X64Reg dest, OpArg arg);
- void PMOVSXWD(X64Reg dest, OpArg arg);
- void PMOVSXWQ(X64Reg dest, OpArg arg);
- void PMOVSXDQ(X64Reg dest, OpArg arg);
- void PMOVZXBW(X64Reg dest, OpArg arg);
- void PMOVZXBD(X64Reg dest, OpArg arg);
- void PMOVZXBQ(X64Reg dest, OpArg arg);
- void PMOVZXWD(X64Reg dest, OpArg arg);
- void PMOVZXWQ(X64Reg dest, OpArg arg);
- void PMOVZXDQ(X64Reg dest, OpArg arg);
+ void PMOVSXBW(X64Reg dest, const OpArg& arg);
+ void PMOVSXBD(X64Reg dest, const OpArg& arg);
+ void PMOVSXBQ(X64Reg dest, const OpArg& arg);
+ void PMOVSXWD(X64Reg dest, const OpArg& arg);
+ void PMOVSXWQ(X64Reg dest, const OpArg& arg);
+ void PMOVSXDQ(X64Reg dest, const OpArg& arg);
+ void PMOVZXBW(X64Reg dest, const OpArg& arg);
+ void PMOVZXBD(X64Reg dest, const OpArg& arg);
+ void PMOVZXBQ(X64Reg dest, const OpArg& arg);
+ void PMOVZXWD(X64Reg dest, const OpArg& arg);
+ void PMOVZXWQ(X64Reg dest, const OpArg& arg);
+ void PMOVZXDQ(X64Reg dest, const OpArg& arg);
// SSE4: variable blend instructions (xmm0 implicit argument)
- void PBLENDVB(X64Reg dest, OpArg arg);
- void BLENDVPS(X64Reg dest, OpArg arg);
- void BLENDVPD(X64Reg dest, OpArg arg);
+ void PBLENDVB(X64Reg dest, const OpArg& arg);
+ void BLENDVPS(X64Reg dest, const OpArg& arg);
+ void BLENDVPD(X64Reg dest, const OpArg& arg);
void BLENDPS(X64Reg dest, const OpArg& arg, u8 blend);
void BLENDPD(X64Reg dest, const OpArg& arg, u8 blend);
// SSE4: rounding (see FloatRound for mode or use ROUNDNEARSS, etc. helpers.)
- void ROUNDSS(X64Reg dest, OpArg arg, u8 mode);
- void ROUNDSD(X64Reg dest, OpArg arg, u8 mode);
- void ROUNDPS(X64Reg dest, OpArg arg, u8 mode);
- void ROUNDPD(X64Reg dest, OpArg arg, u8 mode);
-
- inline void ROUNDNEARSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_NEAREST); }
- inline void ROUNDFLOORSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_FLOOR); }
- inline void ROUNDCEILSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_CEIL); }
- inline void ROUNDZEROSS(X64Reg dest, OpArg arg) { ROUNDSS(dest, arg, FROUND_ZERO); }
-
- inline void ROUNDNEARSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_NEAREST); }
- inline void ROUNDFLOORSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_FLOOR); }
- inline void ROUNDCEILSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_CEIL); }
- inline void ROUNDZEROSD(X64Reg dest, OpArg arg) { ROUNDSD(dest, arg, FROUND_ZERO); }
-
- inline void ROUNDNEARPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_NEAREST); }
- inline void ROUNDFLOORPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_FLOOR); }
- inline void ROUNDCEILPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_CEIL); }
- inline void ROUNDZEROPS(X64Reg dest, OpArg arg) { ROUNDPS(dest, arg, FROUND_ZERO); }
-
- inline void ROUNDNEARPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_NEAREST); }
- inline void ROUNDFLOORPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_FLOOR); }
- inline void ROUNDCEILPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_CEIL); }
- inline void ROUNDZEROPD(X64Reg dest, OpArg arg) { ROUNDPD(dest, arg, FROUND_ZERO); }
+ void ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode);
+ void ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode);
+ void ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode);
+ void ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode);
+
+ void ROUNDNEARSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_NEAREST); }
+ void ROUNDFLOORSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_FLOOR); }
+ void ROUNDCEILSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_CEIL); }
+ void ROUNDZEROSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_ZERO); }
+
+ void ROUNDNEARSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_NEAREST); }
+ void ROUNDFLOORSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_FLOOR); }
+ void ROUNDCEILSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_CEIL); }
+ void ROUNDZEROSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_ZERO); }
+
+ void ROUNDNEARPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_NEAREST); }
+ void ROUNDFLOORPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_FLOOR); }
+ void ROUNDCEILPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_CEIL); }
+ void ROUNDZEROPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_ZERO); }
+
+ void ROUNDNEARPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_NEAREST); }
+ void ROUNDFLOORPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_FLOOR); }
+ void ROUNDCEILPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_CEIL); }
+ void ROUNDZEROPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_ZERO); }
// AVX
- void VADDSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VSUBSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VMULSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VDIVSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VADDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VSUBPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VMULPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VDIVPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VSHUFPD(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 shuffle);
- void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-
- void VANDPS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VANDPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VANDNPS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VANDNPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VXORPS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VXORPD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
-
- void VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VPOR(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VPXOR(X64Reg regOp1, X64Reg regOp2, OpArg arg);
+ void VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle);
+ void VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+
+ void VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+
+ void VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
// FMA3
- void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, OpArg arg);
+ void VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
// VEX GPR instructions
- void SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
- void SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
- void SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
- void RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate);
- void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
- void BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
- void BLSR(int bits, X64Reg regOp, OpArg arg);
- void BLSMSK(int bits, X64Reg regOp, OpArg arg);
- void BLSI(int bits, X64Reg regOp, OpArg arg);
- void BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
- void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
+ void SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
+ void SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
+ void SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
+ void RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate);
+ void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
+ void BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
+ void BLSR(int bits, X64Reg regOp, const OpArg& arg);
+ void BLSMSK(int bits, X64Reg regOp, const OpArg& arg);
+ void BLSI(int bits, X64Reg regOp, const OpArg& arg);
+ void BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2);
+ void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg);
void RDTSC();
// Utility functions
// The difference between this and CALL is that this aligns the stack
// where appropriate.
- void ABI_CallFunction(const void *func);
+ void ABI_CallFunction(const void* func);
template <typename T>
void ABI_CallFunction(T (*func)()) {
- ABI_CallFunction((const void *)func);
+ ABI_CallFunction((const void*)func);
}
- void ABI_CallFunction(const u8 *func) {
- ABI_CallFunction((const void *)func);
+ void ABI_CallFunction(const u8* func) {
+ ABI_CallFunction((const void*)func);
}
- void ABI_CallFunctionC16(const void *func, u16 param1);
- void ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2);
+ void ABI_CallFunctionC16(const void* func, u16 param1);
+ void ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2);
// These only support u32 parameters, but that's enough for a lot of uses.
// These will destroy the 1 or 2 first "parameter regs".
- void ABI_CallFunctionC(const void *func, u32 param1);
- void ABI_CallFunctionCC(const void *func, u32 param1, u32 param2);
- void ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3);
- void ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3);
- void ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4);
- void ABI_CallFunctionP(const void *func, void *param1);
- void ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2);
- void ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3);
- void ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3);
- void ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2);
- void ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3);
- void ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1);
- void ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2);
+ void ABI_CallFunctionC(const void* func, u32 param1);
+ void ABI_CallFunctionCC(const void* func, u32 param1, u32 param2);
+ void ABI_CallFunctionCCC(const void* func, u32 param1, u32 param2, u32 param3);
+ void ABI_CallFunctionCCP(const void* func, u32 param1, u32 param2, void* param3);
+ void ABI_CallFunctionCCCP(const void* func, u32 param1, u32 param2, u32 param3, void* param4);
+ void ABI_CallFunctionP(const void* func, void* param1);
+ void ABI_CallFunctionPA(const void* func, void* param1, const OpArg& arg2);
+ void ABI_CallFunctionPAA(const void* func, void* param1, const OpArg& arg2, const OpArg& arg3);
+ void ABI_CallFunctionPPC(const void* func, void* param1, void* param2, u32 param3);
+ void ABI_CallFunctionAC(const void* func, const OpArg& arg1, u32 param2);
+ void ABI_CallFunctionACC(const void* func, const OpArg& arg1, u32 param2, u32 param3);
+ void ABI_CallFunctionA(const void* func, const OpArg& arg1);
+ void ABI_CallFunctionAA(const void* func, const OpArg& arg1, const OpArg& arg2);
// Pass a register as a parameter.
- void ABI_CallFunctionR(const void *func, X64Reg reg1);
- void ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2);
+ void ABI_CallFunctionR(const void* func, X64Reg reg1);
+ void ABI_CallFunctionRR(const void* func, X64Reg reg1, X64Reg reg2);
template <typename Tr, typename T1>
void ABI_CallFunctionC(Tr (*func)(T1), u32 param1) {
- ABI_CallFunctionC((const void *)func, param1);
+ ABI_CallFunctionC((const void*)func, param1);
}
// A function that doesn't have any control over what it will do to regs,
@@ -1048,9 +1028,9 @@ public:
void ABI_EmitEpilogue(int maxCallParams);
#ifdef _M_IX86
- inline int ABI_GetNumXMMRegs() { return 8; }
+ static int ABI_GetNumXMMRegs() { return 8; }
#else
- inline int ABI_GetNumXMMRegs() { return 16; }
+ static int ABI_GetNumXMMRegs() { return 16; }
#endif
}; // class XEmitter
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 6cc60fd58..c17290b9b 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -29,6 +29,7 @@ set(SRCS
hle/kernel/address_arbiter.cpp
hle/kernel/event.cpp
hle/kernel/kernel.cpp
+ hle/kernel/memory.cpp
hle/kernel/mutex.cpp
hle/kernel/process.cpp
hle/kernel/resource_limit.cpp
@@ -115,7 +116,6 @@ set(SRCS
loader/loader.cpp
loader/ncch.cpp
tracer/recorder.cpp
- mem_map.cpp
memory.cpp
settings.cpp
system.cpp
@@ -157,6 +157,7 @@ set(HEADERS
hle/kernel/address_arbiter.h
hle/kernel/event.h
hle/kernel/kernel.h
+ hle/kernel/memory.h
hle/kernel/mutex.h
hle/kernel/process.h
hle/kernel/resource_limit.h
@@ -245,7 +246,6 @@ set(HEADERS
loader/ncch.h
tracer/recorder.h
tracer/citrace.h
- mem_map.h
memory.h
memory_setup.h
settings.h
diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
index 422e80b50..0fddb07a0 100644
--- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
+++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
@@ -9,6 +9,7 @@
#include "common/common_types.h"
#include "common/logging/log.h"
+#include "common/microprofile.h"
#include "common/profiler.h"
#include "core/memory.h"
@@ -48,65 +49,47 @@ enum {
typedef unsigned int (*shtop_fp_t)(ARMul_State* cpu, unsigned int sht_oper);
-static int CondPassed(ARMul_State* cpu, unsigned int cond) {
- const u32 NFLAG = cpu->NFlag;
- const u32 ZFLAG = cpu->ZFlag;
- const u32 CFLAG = cpu->CFlag;
- const u32 VFLAG = cpu->VFlag;
-
- int temp = 0;
+static bool CondPassed(ARMul_State* cpu, unsigned int cond) {
+ const bool n_flag = cpu->NFlag != 0;
+ const bool z_flag = cpu->ZFlag != 0;
+ const bool c_flag = cpu->CFlag != 0;
+ const bool v_flag = cpu->VFlag != 0;
switch (cond) {
- case 0x0:
- temp = ZFLAG;
- break;
- case 0x1: // NE
- temp = !ZFLAG;
- break;
- case 0x2: // CS
- temp = CFLAG;
- break;
- case 0x3: // CC
- temp = !CFLAG;
- break;
- case 0x4: // MI
- temp = NFLAG;
- break;
- case 0x5: // PL
- temp = !NFLAG;
- break;
- case 0x6: // VS
- temp = VFLAG;
- break;
- case 0x7: // VC
- temp = !VFLAG;
- break;
- case 0x8: // HI
- temp = (CFLAG && !ZFLAG);
- break;
- case 0x9: // LS
- temp = (!CFLAG || ZFLAG);
- break;
- case 0xa: // GE
- temp = ((!NFLAG && !VFLAG) || (NFLAG && VFLAG));
- break;
- case 0xb: // LT
- temp = ((NFLAG && !VFLAG) || (!NFLAG && VFLAG));
- break;
- case 0xc: // GT
- temp = ((!NFLAG && !VFLAG && !ZFLAG) || (NFLAG && VFLAG && !ZFLAG));
- break;
- case 0xd: // LE
- temp = ((NFLAG && !VFLAG) || (!NFLAG && VFLAG)) || ZFLAG;
- break;
- case 0xe: // AL
- temp = 1;
- break;
- case 0xf:
- temp = 1;
- break;
- }
- return temp;
+ case ConditionCode::EQ:
+ return z_flag;
+ case ConditionCode::NE:
+ return !z_flag;
+ case ConditionCode::CS:
+ return c_flag;
+ case ConditionCode::CC:
+ return !c_flag;
+ case ConditionCode::MI:
+ return n_flag;
+ case ConditionCode::PL:
+ return !n_flag;
+ case ConditionCode::VS:
+ return v_flag;
+ case ConditionCode::VC:
+ return !v_flag;
+ case ConditionCode::HI:
+ return (c_flag && !z_flag);
+ case ConditionCode::LS:
+ return (!c_flag || z_flag);
+ case ConditionCode::GE:
+ return (n_flag == v_flag);
+ case ConditionCode::LT:
+ return (n_flag != v_flag);
+ case ConditionCode::GT:
+ return (!z_flag && (n_flag == v_flag));
+ case ConditionCode::LE:
+ return (z_flag || (n_flag != v_flag));
+ case ConditionCode::AL:
+ case ConditionCode::NV: // Unconditional
+ return true;
+ }
+
+ return false;
}
static unsigned int DPO(Immediate)(ARMul_State* cpu, unsigned int sht_oper) {
@@ -3522,8 +3505,11 @@ enum {
FETCH_EXCEPTION
};
+MICROPROFILE_DEFINE(DynCom_Decode, "DynCom", "Decode", MP_RGB(255, 64, 64));
+
static int InterpreterTranslate(ARMul_State* cpu, int& bb_start, u32 addr) {
Common::Profiling::ScopeTimer timer_decode(profile_decode);
+ MICROPROFILE_SCOPE(DynCom_Decode);
// Decode instruction, get index
// Allocate memory and init InsCream
@@ -3588,8 +3574,11 @@ static int clz(unsigned int x) {
return n;
}
+MICROPROFILE_DEFINE(DynCom_Execute, "DynCom", "Execute", MP_RGB(255, 0, 0));
+
unsigned InterpreterMainLoop(ARMul_State* cpu) {
Common::Profiling::ScopeTimer timer_execute(profile_execute);
+ MICROPROFILE_SCOPE(DynCom_Execute);
#undef RM
#undef RS
diff --git a/src/core/arm/skyeye_common/armstate.cpp b/src/core/arm/skyeye_common/armstate.cpp
index ccb2eb0eb..0491717dc 100644
--- a/src/core/arm/skyeye_common/armstate.cpp
+++ b/src/core/arm/skyeye_common/armstate.cpp
@@ -4,7 +4,6 @@
#include "common/swap.h"
#include "common/logging/log.h"
-#include "core/mem_map.h"
#include "core/memory.h"
#include "core/arm/skyeye_common/armstate.h"
#include "core/arm/skyeye_common/vfp/vfp.h"
diff --git a/src/core/arm/skyeye_common/armsupp.cpp b/src/core/arm/skyeye_common/armsupp.cpp
index d31fb9449..883713e86 100644
--- a/src/core/arm/skyeye_common/armsupp.cpp
+++ b/src/core/arm/skyeye_common/armsupp.cpp
@@ -17,7 +17,6 @@
#include "common/logging/log.h"
-#include "core/mem_map.h"
#include "core/arm/skyeye_common/arm_regformat.h"
#include "core/arm/skyeye_common/armstate.h"
#include "core/arm/skyeye_common/armsupp.h"
diff --git a/src/core/hle/config_mem.cpp b/src/core/hle/config_mem.cpp
index aea936d2d..b1a72dc0c 100644
--- a/src/core/hle/config_mem.cpp
+++ b/src/core/hle/config_mem.cpp
@@ -25,10 +25,6 @@ void Init() {
config_mem.sys_core_ver = 0x2;
config_mem.unit_info = 0x1; // Bit 0 set for Retail
config_mem.prev_firm = 0;
- config_mem.app_mem_type = 0x2; // Default app mem type is 0
- config_mem.app_mem_alloc = 0x06000000; // Set to 96MB, since some games use more than the default (64MB)
- config_mem.base_mem_alloc = 0x01400000; // Default base memory is 20MB
- config_mem.sys_mem_alloc = Memory::FCRAM_SIZE - (config_mem.app_mem_alloc + config_mem.base_mem_alloc);
config_mem.firm_unk = 0;
config_mem.firm_version_rev = 0;
config_mem.firm_version_min = 0x40;
@@ -36,7 +32,4 @@ void Init() {
config_mem.firm_sys_core_ver = 0x2;
}
-void Shutdown() {
-}
-
} // namespace
diff --git a/src/core/hle/config_mem.h b/src/core/hle/config_mem.h
index 9825a09e8..24a1254f2 100644
--- a/src/core/hle/config_mem.h
+++ b/src/core/hle/config_mem.h
@@ -52,6 +52,5 @@ static_assert(sizeof(ConfigMemDef) == Memory::CONFIG_MEMORY_SIZE, "Config Memory
extern ConfigMemDef config_mem;
void Init();
-void Shutdown();
} // namespace
diff --git a/src/core/hle/function_wrappers.h b/src/core/hle/function_wrappers.h
index 1a0518926..5846a161b 100644
--- a/src/core/hle/function_wrappers.h
+++ b/src/core/hle/function_wrappers.h
@@ -172,6 +172,14 @@ template<ResultCode func(u32, s64, s64)> void Wrap() {
FuncReturn(func(PARAM(0), param1, param2).raw);
}
+template<ResultCode func(s64*, Handle, u32)> void Wrap() {
+ s64 param_1 = 0;
+ u32 retval = func(&param_1, PARAM(1), PARAM(2)).raw;
+ Core::g_app_core->SetReg(1, (u32)param_1);
+ Core::g_app_core->SetReg(2, (u32)(param_1 >> 32));
+ FuncReturn(retval);
+}
+
////////////////////////////////////////////////////////////////////////////////////////////////////
// Function wrappers that return type u32
diff --git a/src/core/hle/hle.cpp b/src/core/hle/hle.cpp
index cd0a400dc..331b1b22a 100644
--- a/src/core/hle/hle.cpp
+++ b/src/core/hle/hle.cpp
@@ -34,8 +34,6 @@ void Reschedule(const char *reason) {
void Init() {
Service::Init();
- ConfigMem::Init();
- SharedPage::Init();
g_reschedule = false;
@@ -43,8 +41,6 @@ void Init() {
}
void Shutdown() {
- ConfigMem::Shutdown();
- SharedPage::Shutdown();
Service::Shutdown();
LOG_DEBUG(Kernel, "shutdown OK");
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 5711c0405..7a401a965 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -7,11 +7,14 @@
#include "common/assert.h"
#include "common/logging/log.h"
+#include "core/hle/config_mem.h"
#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/resource_limit.h"
+#include "core/hle/kernel/memory.h"
#include "core/hle/kernel/process.h"
+#include "core/hle/kernel/resource_limit.h"
#include "core/hle/kernel/thread.h"
#include "core/hle/kernel/timer.h"
+#include "core/hle/shared_page.h"
namespace Kernel {
@@ -119,6 +122,13 @@ void HandleTable::Clear() {
/// Initialize the kernel
void Init() {
+ ConfigMem::Init();
+ SharedPage::Init();
+
+ // TODO(yuriks): The memory type parameter needs to be determined by the ExHeader field instead
+ // For now it defaults to the one with a largest allocation to the app
+ Kernel::MemoryInit(2); // Allocates 96MB to the application
+
Kernel::ResourceLimitsInit();
Kernel::ThreadingInit();
Kernel::TimersInit();
@@ -131,11 +141,14 @@ void Init() {
/// Shutdown the kernel
void Shutdown() {
+ g_handle_table.Clear(); // Free all kernel objects
+
Kernel::ThreadingShutdown();
+ g_current_process = nullptr;
+
Kernel::TimersShutdown();
Kernel::ResourceLimitsShutdown();
- g_handle_table.Clear(); // Free all kernel objects
- g_current_process = nullptr;
+ Kernel::MemoryShutdown();
}
} // namespace
diff --git a/src/core/hle/kernel/memory.cpp b/src/core/hle/kernel/memory.cpp
new file mode 100644
index 000000000..e4fc5f3c4
--- /dev/null
+++ b/src/core/hle/kernel/memory.cpp
@@ -0,0 +1,136 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <map>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "common/common_types.h"
+#include "common/logging/log.h"
+
+#include "core/hle/config_mem.h"
+#include "core/hle/kernel/memory.h"
+#include "core/hle/kernel/vm_manager.h"
+#include "core/hle/result.h"
+#include "core/hle/shared_page.h"
+#include "core/memory.h"
+#include "core/memory_setup.h"
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+namespace Kernel {
+
+static MemoryRegionInfo memory_regions[3];
+
+/// Size of the APPLICATION, SYSTEM and BASE memory regions (respectively) for each sytem
+/// memory configuration type.
+static const u32 memory_region_sizes[8][3] = {
+ // Old 3DS layouts
+ {0x04000000, 0x02C00000, 0x01400000}, // 0
+ { /* This appears to be unused. */ }, // 1
+ {0x06000000, 0x00C00000, 0x01400000}, // 2
+ {0x05000000, 0x01C00000, 0x01400000}, // 3
+ {0x04800000, 0x02400000, 0x01400000}, // 4
+ {0x02000000, 0x04C00000, 0x01400000}, // 5
+
+ // New 3DS layouts
+ {0x07C00000, 0x06400000, 0x02000000}, // 6
+ {0x0B200000, 0x02E00000, 0x02000000}, // 7
+};
+
+void MemoryInit(u32 mem_type) {
+ // TODO(yuriks): On the n3DS, all o3DS configurations (<=5) are forced to 6 instead.
+ ASSERT_MSG(mem_type <= 5, "New 3DS memory configuration aren't supported yet!");
+ ASSERT(mem_type != 1);
+
+ // The kernel allocation regions (APPLICATION, SYSTEM and BASE) are laid out in sequence, with
+ // the sizes specified in the memory_region_sizes table.
+ VAddr base = 0;
+ for (int i = 0; i < 3; ++i) {
+ memory_regions[i].base = base;
+ memory_regions[i].size = memory_region_sizes[mem_type][i];
+ memory_regions[i].linear_heap_memory = std::make_shared<std::vector<u8>>();
+
+ base += memory_regions[i].size;
+ }
+
+ // We must've allocated the entire FCRAM by the end
+ ASSERT(base == Memory::FCRAM_SIZE);
+
+ using ConfigMem::config_mem;
+ config_mem.app_mem_type = mem_type;
+ // app_mem_malloc does not always match the configured size for memory_region[0]: in case the
+ // n3DS type override is in effect it reports the size the game expects, not the real one.
+ config_mem.app_mem_alloc = memory_region_sizes[mem_type][0];
+ config_mem.sys_mem_alloc = memory_regions[1].size;
+ config_mem.base_mem_alloc = memory_regions[2].size;
+}
+
+void MemoryShutdown() {
+ for (auto& region : memory_regions) {
+ region.base = 0;
+ region.size = 0;
+ region.linear_heap_memory = nullptr;
+ }
+}
+
+MemoryRegionInfo* GetMemoryRegion(MemoryRegion region) {
+ switch (region) {
+ case MemoryRegion::APPLICATION:
+ return &memory_regions[0];
+ case MemoryRegion::SYSTEM:
+ return &memory_regions[1];
+ case MemoryRegion::BASE:
+ return &memory_regions[2];
+ default:
+ UNREACHABLE();
+ }
+}
+
+}
+
+namespace Memory {
+
+namespace {
+
+struct MemoryArea {
+ u32 base;
+ u32 size;
+ const char* name;
+};
+
+// We don't declare the IO regions in here since its handled by other means.
+static MemoryArea memory_areas[] = {
+ {SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE, "Shared Memory"}, // Shared memory
+ {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM)
+ {DSP_RAM_VADDR, DSP_RAM_SIZE, "DSP RAM"}, // DSP memory
+ {TLS_AREA_VADDR, TLS_AREA_SIZE, "TLS Area"}, // TLS memory
+};
+
+}
+
+void Init() {
+ InitMemoryMap();
+ LOG_DEBUG(HW_Memory, "initialized OK");
+}
+
+void InitLegacyAddressSpace(Kernel::VMManager& address_space) {
+ using namespace Kernel;
+
+ for (MemoryArea& area : memory_areas) {
+ auto block = std::make_shared<std::vector<u8>>(area.size);
+ address_space.MapMemoryBlock(area.base, std::move(block), 0, area.size, MemoryState::Private).Unwrap();
+ }
+
+ auto cfg_mem_vma = address_space.MapBackingMemory(CONFIG_MEMORY_VADDR,
+ (u8*)&ConfigMem::config_mem, CONFIG_MEMORY_SIZE, MemoryState::Shared).MoveFrom();
+ address_space.Reprotect(cfg_mem_vma, VMAPermission::Read);
+
+ auto shared_page_vma = address_space.MapBackingMemory(SHARED_PAGE_VADDR,
+ (u8*)&SharedPage::shared_page, SHARED_PAGE_SIZE, MemoryState::Shared).MoveFrom();
+ address_space.Reprotect(shared_page_vma, VMAPermission::Read);
+}
+
+} // namespace
diff --git a/src/core/hle/kernel/memory.h b/src/core/hle/kernel/memory.h
new file mode 100644
index 000000000..36690b091
--- /dev/null
+++ b/src/core/hle/kernel/memory.h
@@ -0,0 +1,35 @@
+// Copyright 2014 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+
+#include "common/common_types.h"
+
+#include "core/hle/kernel/process.h"
+
+namespace Kernel {
+
+class VMManager;
+
+struct MemoryRegionInfo {
+ u32 base; // Not an address, but offset from start of FCRAM
+ u32 size;
+
+ std::shared_ptr<std::vector<u8>> linear_heap_memory;
+};
+
+void MemoryInit(u32 mem_type);
+void MemoryShutdown();
+MemoryRegionInfo* GetMemoryRegion(MemoryRegion region);
+
+}
+
+namespace Memory {
+
+void Init();
+void InitLegacyAddressSpace(Kernel::VMManager& address_space);
+
+} // namespace
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index a7892c652..c2b4963d4 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -7,11 +7,11 @@
#include "common/logging/log.h"
#include "common/make_unique.h"
+#include "core/hle/kernel/memory.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/resource_limit.h"
#include "core/hle/kernel/thread.h"
#include "core/hle/kernel/vm_manager.h"
-#include "core/mem_map.h"
#include "core/memory.h"
namespace Kernel {
@@ -36,8 +36,7 @@ SharedPtr<Process> Process::Create(SharedPtr<CodeSet> code_set) {
process->codeset = std::move(code_set);
process->flags.raw = 0;
process->flags.memory_region = MemoryRegion::APPLICATION;
- process->address_space = Common::make_unique<VMManager>();
- Memory::InitLegacyAddressSpace(*process->address_space);
+ Memory::InitLegacyAddressSpace(process->vm_manager);
return process;
}
@@ -93,9 +92,11 @@ void Process::ParseKernelCaps(const u32* kernel_caps, size_t len) {
mapping.unk_flag = false;
} else if ((type & 0xFE0) == 0xFC0) { // 0x01FF
// Kernel version
- int minor = descriptor & 0xFF;
- int major = (descriptor >> 8) & 0xFF;
- LOG_INFO(Loader, "ExHeader kernel version ignored: %d.%d", major, minor);
+ kernel_version = descriptor & 0xFFFF;
+
+ int minor = kernel_version & 0xFF;
+ int major = (kernel_version >> 8) & 0xFF;
+ LOG_INFO(Loader, "ExHeader kernel version: %d.%d", major, minor);
} else {
LOG_ERROR(Loader, "Unhandled kernel caps descriptor: 0x%08X", descriptor);
}
@@ -103,20 +104,161 @@ void Process::ParseKernelCaps(const u32* kernel_caps, size_t len) {
}
void Process::Run(s32 main_thread_priority, u32 stack_size) {
+ memory_region = GetMemoryRegion(flags.memory_region);
+
auto MapSegment = [&](CodeSet::Segment& segment, VMAPermission permissions, MemoryState memory_state) {
- auto vma = address_space->MapMemoryBlock(segment.addr, codeset->memory,
+ auto vma = vm_manager.MapMemoryBlock(segment.addr, codeset->memory,
segment.offset, segment.size, memory_state).Unwrap();
- address_space->Reprotect(vma, permissions);
+ vm_manager.Reprotect(vma, permissions);
+ misc_memory_used += segment.size;
};
+ // Map CodeSet segments
MapSegment(codeset->code, VMAPermission::ReadExecute, MemoryState::Code);
MapSegment(codeset->rodata, VMAPermission::Read, MemoryState::Code);
MapSegment(codeset->data, VMAPermission::ReadWrite, MemoryState::Private);
- address_space->LogLayout();
+ // Allocate and map stack
+ vm_manager.MapMemoryBlock(Memory::HEAP_VADDR_END - stack_size,
+ std::make_shared<std::vector<u8>>(stack_size, 0), 0, stack_size, MemoryState::Locked
+ ).Unwrap();
+ misc_memory_used += stack_size;
+
+ vm_manager.LogLayout(Log::Level::Debug);
Kernel::SetupMainThread(codeset->entrypoint, main_thread_priority);
}
+VAddr Process::GetLinearHeapBase() const {
+ return (kernel_version < 0x22C ? Memory::LINEAR_HEAP_VADDR : Memory::NEW_LINEAR_HEAP_VADDR)
+ + memory_region->base;
+}
+
+VAddr Process::GetLinearHeapLimit() const {
+ return GetLinearHeapBase() + memory_region->size;
+}
+
+ResultVal<VAddr> Process::HeapAllocate(VAddr target, u32 size, VMAPermission perms) {
+ if (target < Memory::HEAP_VADDR || target + size > Memory::HEAP_VADDR_END || target + size < target) {
+ return ERR_INVALID_ADDRESS;
+ }
+
+ if (heap_memory == nullptr) {
+ // Initialize heap
+ heap_memory = std::make_shared<std::vector<u8>>();
+ heap_start = heap_end = target;
+ }
+
+ // If necessary, expand backing vector to cover new heap extents.
+ if (target < heap_start) {
+ heap_memory->insert(begin(*heap_memory), heap_start - target, 0);
+ heap_start = target;
+ vm_manager.RefreshMemoryBlockMappings(heap_memory.get());
+ }
+ if (target + size > heap_end) {
+ heap_memory->insert(end(*heap_memory), (target + size) - heap_end, 0);
+ heap_end = target + size;
+ vm_manager.RefreshMemoryBlockMappings(heap_memory.get());
+ }
+ ASSERT(heap_end - heap_start == heap_memory->size());
+
+ CASCADE_RESULT(auto vma, vm_manager.MapMemoryBlock(target, heap_memory, target - heap_start, size, MemoryState::Private));
+ vm_manager.Reprotect(vma, perms);
+
+ heap_used += size;
+
+ return MakeResult<VAddr>(heap_end - size);
+}
+
+ResultCode Process::HeapFree(VAddr target, u32 size) {
+ if (target < Memory::HEAP_VADDR || target + size > Memory::HEAP_VADDR_END || target + size < target) {
+ return ERR_INVALID_ADDRESS;
+ }
+
+ if (size == 0) {
+ return RESULT_SUCCESS;
+ }
+
+ ResultCode result = vm_manager.UnmapRange(target, size);
+ if (result.IsError()) return result;
+
+ heap_used -= size;
+
+ return RESULT_SUCCESS;
+}
+
+ResultVal<VAddr> Process::LinearAllocate(VAddr target, u32 size, VMAPermission perms) {
+ auto& linheap_memory = memory_region->linear_heap_memory;
+
+ VAddr heap_end = GetLinearHeapBase() + (u32)linheap_memory->size();
+ // Games and homebrew only ever seem to pass 0 here (which lets the kernel decide the address),
+ // but explicit addresses are also accepted and respected.
+ if (target == 0) {
+ target = heap_end;
+ }
+
+ if (target < GetLinearHeapBase() || target + size > GetLinearHeapLimit() ||
+ target > heap_end || target + size < target) {
+
+ return ERR_INVALID_ADDRESS;
+ }
+
+ // Expansion of the linear heap is only allowed if you do an allocation immediatelly at its
+ // end. It's possible to free gaps in the middle of the heap and then reallocate them later,
+ // but expansions are only allowed at the end.
+ if (target == heap_end) {
+ linheap_memory->insert(linheap_memory->end(), size, 0);
+ vm_manager.RefreshMemoryBlockMappings(linheap_memory.get());
+ }
+
+ // TODO(yuriks): As is, this lets processes map memory allocated by other processes from the
+ // same region. It is unknown if or how the 3DS kernel checks against this.
+ size_t offset = target - GetLinearHeapBase();
+ CASCADE_RESULT(auto vma, vm_manager.MapMemoryBlock(target, linheap_memory, offset, size, MemoryState::Continuous));
+ vm_manager.Reprotect(vma, perms);
+
+ linear_heap_used += size;
+
+ return MakeResult<VAddr>(target);
+}
+
+ResultCode Process::LinearFree(VAddr target, u32 size) {
+ auto& linheap_memory = memory_region->linear_heap_memory;
+
+ if (target < GetLinearHeapBase() || target + size > GetLinearHeapLimit() ||
+ target + size < target) {
+
+ return ERR_INVALID_ADDRESS;
+ }
+
+ if (size == 0) {
+ return RESULT_SUCCESS;
+ }
+
+ VAddr heap_end = GetLinearHeapBase() + (u32)linheap_memory->size();
+ if (target + size > heap_end) {
+ return ERR_INVALID_ADDRESS_STATE;
+ }
+
+ ResultCode result = vm_manager.UnmapRange(target, size);
+ if (result.IsError()) return result;
+
+ linear_heap_used -= size;
+
+ if (target + size == heap_end) {
+ // End of linear heap has been freed, so check what's the last allocated block in it and
+ // reduce the size.
+ auto vma = vm_manager.FindVMA(target);
+ ASSERT(vma != vm_manager.vma_map.end());
+ ASSERT(vma->second.type == VMAType::Free);
+ VAddr new_end = vma->second.base;
+ if (new_end >= GetLinearHeapBase()) {
+ linheap_memory->resize(new_end - GetLinearHeapBase());
+ }
+ }
+
+ return RESULT_SUCCESS;
+}
+
Kernel::Process::Process() {}
Kernel::Process::~Process() {}
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index 83d3aceae..60e17f251 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -15,6 +15,7 @@
#include "common/common_types.h"
#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/vm_manager.h"
namespace Kernel {
@@ -48,7 +49,7 @@ union ProcessFlags {
};
class ResourceLimit;
-class VMManager;
+struct MemoryRegionInfo;
struct CodeSet final : public Object {
static SharedPtr<CodeSet> Create(std::string name, u64 program_id);
@@ -104,14 +105,12 @@ public:
/// processes access to specific I/O regions and device memory.
boost::container::static_vector<AddressMapping, 8> address_mappings;
ProcessFlags flags;
+ /// Kernel compatibility version for this process
+ u16 kernel_version = 0;
/// The id of this process
u32 process_id = next_process_id++;
- /// Bitmask of the used TLS slots
- std::bitset<300> used_tls_slots;
- std::unique_ptr<VMManager> address_space;
-
/**
* Parses a list of kernel capability descriptors (as found in the ExHeader) and applies them
* to this process.
@@ -123,6 +122,36 @@ public:
*/
void Run(s32 main_thread_priority, u32 stack_size);
+
+ ///////////////////////////////////////////////////////////////////////////////////////////////
+ // Memory Management
+
+ VMManager vm_manager;
+
+ // Memory used to back the allocations in the regular heap. A single vector is used to cover
+ // the entire virtual address space extents that bound the allocations, including any holes.
+ // This makes deallocation and reallocation of holes fast and keeps process memory contiguous
+ // in the emulator address space, allowing Memory::GetPointer to be reasonably safe.
+ std::shared_ptr<std::vector<u8>> heap_memory;
+ // The left/right bounds of the address space covered by heap_memory.
+ VAddr heap_start = 0, heap_end = 0;
+
+ u32 heap_used = 0, linear_heap_used = 0, misc_memory_used = 0;
+
+ MemoryRegionInfo* memory_region = nullptr;
+
+ /// Bitmask of the used TLS slots
+ std::bitset<300> used_tls_slots;
+
+ VAddr GetLinearHeapBase() const;
+ VAddr GetLinearHeapLimit() const;
+
+ ResultVal<VAddr> HeapAllocate(VAddr target, u32 size, VMAPermission perms);
+ ResultCode HeapFree(VAddr target, u32 size);
+
+ ResultVal<VAddr> LinearAllocate(VAddr target, u32 size, VMAPermission perms);
+ ResultCode LinearFree(VAddr target, u32 size);
+
private:
Process();
~Process() override;
diff --git a/src/core/hle/kernel/resource_limit.cpp b/src/core/hle/kernel/resource_limit.cpp
index 94b3e3298..67dde08c2 100644
--- a/src/core/hle/kernel/resource_limit.cpp
+++ b/src/core/hle/kernel/resource_limit.cpp
@@ -6,7 +6,6 @@
#include "common/logging/log.h"
-#include "core/mem_map.h"
#include "core/hle/kernel/resource_limit.h"
namespace Kernel {
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp
index 4137683b5..1f477664b 100644
--- a/src/core/hle/kernel/shared_memory.cpp
+++ b/src/core/hle/kernel/shared_memory.cpp
@@ -20,6 +20,7 @@ SharedPtr<SharedMemory> SharedMemory::Create(u32 size, MemoryPermission permissi
shared_memory->name = std::move(name);
shared_memory->base_address = 0x0;
+ shared_memory->fixed_address = 0x0;
shared_memory->size = size;
shared_memory->permissions = permissions;
shared_memory->other_permissions = other_permissions;
@@ -30,9 +31,31 @@ SharedPtr<SharedMemory> SharedMemory::Create(u32 size, MemoryPermission permissi
ResultCode SharedMemory::Map(VAddr address, MemoryPermission permissions,
MemoryPermission other_permissions) {
+ if (base_address != 0) {
+ LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s: already mapped at 0x%08X!",
+ GetObjectId(), address, name.c_str(), base_address);
+ // TODO: Verify error code with hardware
+ return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::Kernel,
+ ErrorSummary::InvalidArgument, ErrorLevel::Permanent);
+ }
+
+ if (fixed_address != 0) {
+ if (address != 0 && address != fixed_address) {
+ LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s: fixed_addres is 0x%08X!",
+ GetObjectId(), address, name.c_str(), fixed_address);
+ // TODO: Verify error code with hardware
+ return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::Kernel,
+ ErrorSummary::InvalidArgument, ErrorLevel::Permanent);
+ }
+
+ // HACK(yuriks): This is only here to support the APT shared font mapping right now.
+ // Later, this should actually map the memory block onto the address space.
+ return RESULT_SUCCESS;
+ }
+
if (address < Memory::SHARED_MEMORY_VADDR || address + size >= Memory::SHARED_MEMORY_VADDR_END) {
- LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X outside of shared mem bounds!",
- GetObjectId(), address);
+ LOG_ERROR(Kernel, "cannot map id=%u, address=0x%08X name=%s outside of shared mem bounds!",
+ GetObjectId(), address, name.c_str());
// TODO: Verify error code with hardware
return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::Kernel,
ErrorSummary::InvalidArgument, ErrorLevel::Permanent);
diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h
index 7a2922776..35b550d12 100644
--- a/src/core/hle/kernel/shared_memory.h
+++ b/src/core/hle/kernel/shared_memory.h
@@ -61,6 +61,8 @@ public:
/// Address of shared memory block in the process.
VAddr base_address;
+ /// Fixed address to allow mapping to. Used for blocks created from the linear heap.
+ VAddr fixed_address;
/// Size of the memory block. Page-aligned.
u32 size;
/// Permission restrictions applied to the process which created the block.
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 29ea6d531..c10126513 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -117,6 +117,7 @@ void Thread::Stop() {
wait_objects.clear();
Kernel::g_current_process->used_tls_slots[tls_index] = false;
+ g_current_process->misc_memory_used -= Memory::TLS_ENTRY_SIZE;
HLE::Reschedule(__func__);
}
@@ -414,6 +415,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
}
ASSERT_MSG(thread->tls_index != -1, "Out of TLS space");
+ g_current_process->misc_memory_used += Memory::TLS_ENTRY_SIZE;
// TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used
// to initialize the context
@@ -504,7 +506,7 @@ void Thread::SetWaitSynchronizationOutput(s32 output) {
}
VAddr Thread::GetTLSAddress() const {
- return Memory::TLS_AREA_VADDR + tls_index * 0x200;
+ return Memory::TLS_AREA_VADDR + tls_index * Memory::TLS_ENTRY_SIZE;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index 205cc7b53..2610acf76 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -11,6 +11,15 @@
namespace Kernel {
+static const char* GetMemoryStateName(MemoryState state) {
+ static const char* names[] = {
+ "Free", "Reserved", "IO", "Static", "Code", "Private", "Shared", "Continuous", "Aliased",
+ "Alias", "AliasCode", "Locked",
+ };
+
+ return names[(int)state];
+}
+
bool VirtualMemoryArea::CanBeMergedWith(const VirtualMemoryArea& next) const {
ASSERT(base + size == next.base);
if (permissions != next.permissions ||
@@ -51,11 +60,15 @@ void VMManager::Reset() {
}
VMManager::VMAHandle VMManager::FindVMA(VAddr target) const {
- return std::prev(vma_map.upper_bound(target));
+ if (target >= MAX_ADDRESS) {
+ return vma_map.end();
+ } else {
+ return std::prev(vma_map.upper_bound(target));
+ }
}
ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
- std::shared_ptr<std::vector<u8>> block, u32 offset, u32 size, MemoryState state) {
+ std::shared_ptr<std::vector<u8>> block, size_t offset, u32 size, MemoryState state) {
ASSERT(block != nullptr);
ASSERT(offset + size <= block->size());
@@ -106,10 +119,8 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMMIO(VAddr target, PAddr paddr, u3
return MakeResult<VMAHandle>(MergeAdjacent(vma_handle));
}
-void VMManager::Unmap(VMAHandle vma_handle) {
- VMAIter iter = StripIterConstness(vma_handle);
-
- VirtualMemoryArea& vma = iter->second;
+VMManager::VMAIter VMManager::Unmap(VMAIter vma_handle) {
+ VirtualMemoryArea& vma = vma_handle->second;
vma.type = VMAType::Free;
vma.permissions = VMAPermission::None;
vma.meminfo_state = MemoryState::Free;
@@ -121,26 +132,67 @@ void VMManager::Unmap(VMAHandle vma_handle) {
UpdatePageTableForVMA(vma);
- MergeAdjacent(iter);
+ return MergeAdjacent(vma_handle);
+}
+
+ResultCode VMManager::UnmapRange(VAddr target, u32 size) {
+ CASCADE_RESULT(VMAIter vma, CarveVMARange(target, size));
+ VAddr target_end = target + size;
+
+ VMAIter end = vma_map.end();
+ // The comparison against the end of the range must be done using addresses since VMAs can be
+ // merged during this process, causing invalidation of the iterators.
+ while (vma != end && vma->second.base < target_end) {
+ vma = std::next(Unmap(vma));
+ }
+
+ ASSERT(FindVMA(target)->second.size >= size);
+ return RESULT_SUCCESS;
}
-void VMManager::Reprotect(VMAHandle vma_handle, VMAPermission new_perms) {
+VMManager::VMAHandle VMManager::Reprotect(VMAHandle vma_handle, VMAPermission new_perms) {
VMAIter iter = StripIterConstness(vma_handle);
VirtualMemoryArea& vma = iter->second;
vma.permissions = new_perms;
UpdatePageTableForVMA(vma);
- MergeAdjacent(iter);
+ return MergeAdjacent(iter);
+}
+
+ResultCode VMManager::ReprotectRange(VAddr target, u32 size, VMAPermission new_perms) {
+ CASCADE_RESULT(VMAIter vma, CarveVMARange(target, size));
+ VAddr target_end = target + size;
+
+ VMAIter end = vma_map.end();
+ // The comparison against the end of the range must be done using addresses since VMAs can be
+ // merged during this process, causing invalidation of the iterators.
+ while (vma != end && vma->second.base < target_end) {
+ vma = std::next(StripIterConstness(Reprotect(vma, new_perms)));
+ }
+
+ return RESULT_SUCCESS;
}
-void VMManager::LogLayout() const {
+void VMManager::RefreshMemoryBlockMappings(const std::vector<u8>* block) {
+ // If this ever proves to have a noticeable performance impact, allow users of the function to
+ // specify a specific range of addresses to limit the scan to.
for (const auto& p : vma_map) {
const VirtualMemoryArea& vma = p.second;
- LOG_DEBUG(Kernel, "%08X - %08X size: %8X %c%c%c", vma.base, vma.base + vma.size, vma.size,
+ if (block == vma.backing_block.get()) {
+ UpdatePageTableForVMA(vma);
+ }
+ }
+}
+
+void VMManager::LogLayout(Log::Level log_level) const {
+ for (const auto& p : vma_map) {
+ const VirtualMemoryArea& vma = p.second;
+ LOG_GENERIC(Log::Class::Kernel, log_level, "%08X - %08X size: %8X %c%c%c %s",
+ vma.base, vma.base + vma.size, vma.size,
(u8)vma.permissions & (u8)VMAPermission::Read ? 'R' : '-',
(u8)vma.permissions & (u8)VMAPermission::Write ? 'W' : '-',
- (u8)vma.permissions & (u8)VMAPermission::Execute ? 'X' : '-');
+ (u8)vma.permissions & (u8)VMAPermission::Execute ? 'X' : '-', GetMemoryStateName(vma.meminfo_state));
}
}
@@ -151,21 +203,19 @@ VMManager::VMAIter VMManager::StripIterConstness(const VMAHandle & iter) {
}
ResultVal<VMManager::VMAIter> VMManager::CarveVMA(VAddr base, u32 size) {
- ASSERT_MSG((size & Memory::PAGE_MASK) == 0, "non-page aligned size: %8X", size);
- ASSERT_MSG((base & Memory::PAGE_MASK) == 0, "non-page aligned base: %08X", base);
+ ASSERT_MSG((size & Memory::PAGE_MASK) == 0, "non-page aligned size: 0x%8X", size);
+ ASSERT_MSG((base & Memory::PAGE_MASK) == 0, "non-page aligned base: 0x%08X", base);
VMAIter vma_handle = StripIterConstness(FindVMA(base));
if (vma_handle == vma_map.end()) {
// Target address is outside the range managed by the kernel
- return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS,
- ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E01BF5
+ return ERR_INVALID_ADDRESS;
}
VirtualMemoryArea& vma = vma_handle->second;
if (vma.type != VMAType::Free) {
// Region is already allocated
- return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS,
- ErrorSummary::InvalidState, ErrorLevel::Usage); // 0xE0A01BF5
+ return ERR_INVALID_ADDRESS_STATE;
}
u32 start_in_vma = base - vma.base;
@@ -173,8 +223,7 @@ ResultVal<VMManager::VMAIter> VMManager::CarveVMA(VAddr base, u32 size) {
if (end_in_vma > vma.size) {
// Requested allocation doesn't fit inside VMA
- return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS,
- ErrorSummary::InvalidState, ErrorLevel::Usage); // 0xE0A01BF5
+ return ERR_INVALID_ADDRESS_STATE;
}
if (end_in_vma != vma.size) {
@@ -189,6 +238,35 @@ ResultVal<VMManager::VMAIter> VMManager::CarveVMA(VAddr base, u32 size) {
return MakeResult<VMAIter>(vma_handle);
}
+ResultVal<VMManager::VMAIter> VMManager::CarveVMARange(VAddr target, u32 size) {
+ ASSERT_MSG((size & Memory::PAGE_MASK) == 0, "non-page aligned size: 0x%8X", size);
+ ASSERT_MSG((target & Memory::PAGE_MASK) == 0, "non-page aligned base: 0x%08X", target);
+
+ VAddr target_end = target + size;
+ ASSERT(target_end >= target);
+ ASSERT(target_end <= MAX_ADDRESS);
+ ASSERT(size > 0);
+
+ VMAIter begin_vma = StripIterConstness(FindVMA(target));
+ VMAIter i_end = vma_map.lower_bound(target_end);
+ for (auto i = begin_vma; i != i_end; ++i) {
+ if (i->second.type == VMAType::Free) {
+ return ERR_INVALID_ADDRESS_STATE;
+ }
+ }
+
+ if (target != begin_vma->second.base) {
+ begin_vma = SplitVMA(begin_vma, target - begin_vma->second.base);
+ }
+
+ VMAIter end_vma = StripIterConstness(FindVMA(target_end));
+ if (end_vma != vma_map.end() && target_end != end_vma->second.base) {
+ end_vma = SplitVMA(end_vma, target_end - end_vma->second.base);
+ }
+
+ return MakeResult<VMAIter>(begin_vma);
+}
+
VMManager::VMAIter VMManager::SplitVMA(VMAIter vma_handle, u32 offset_in_vma) {
VirtualMemoryArea& old_vma = vma_handle->second;
VirtualMemoryArea new_vma = old_vma; // Make a copy of the VMA
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index b3795a94a..4e95f1f0c 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -14,6 +14,14 @@
namespace Kernel {
+const ResultCode ERR_INVALID_ADDRESS{ // 0xE0E01BF5
+ ErrorDescription::InvalidAddress, ErrorModule::OS,
+ ErrorSummary::InvalidArgument, ErrorLevel::Usage};
+
+const ResultCode ERR_INVALID_ADDRESS_STATE{ // 0xE0A01BF5
+ ErrorDescription::InvalidAddress, ErrorModule::OS,
+ ErrorSummary::InvalidState, ErrorLevel::Usage};
+
enum class VMAType : u8 {
/// VMA represents an unmapped region of the address space.
Free,
@@ -75,7 +83,7 @@ struct VirtualMemoryArea {
/// Memory block backing this VMA.
std::shared_ptr<std::vector<u8>> backing_block = nullptr;
/// Offset into the backing_memory the mapping starts from.
- u32 offset = 0;
+ size_t offset = 0;
// Settings for type = BackingMemory
/// Pointer backing this VMA. It will not be destroyed or freed when the VMA is removed.
@@ -141,7 +149,7 @@ public:
* @param state MemoryState tag to attach to the VMA.
*/
ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block,
- u32 offset, u32 size, MemoryState state);
+ size_t offset, u32 size, MemoryState state);
/**
* Maps an unmanaged host memory pointer at a given address.
@@ -163,14 +171,23 @@ public:
*/
ResultVal<VMAHandle> MapMMIO(VAddr target, PAddr paddr, u32 size, MemoryState state);
- /// Unmaps the given VMA.
- void Unmap(VMAHandle vma);
+ /// Unmaps a range of addresses, splitting VMAs as necessary.
+ ResultCode UnmapRange(VAddr target, u32 size);
/// Changes the permissions of the given VMA.
- void Reprotect(VMAHandle vma, VMAPermission new_perms);
+ VMAHandle Reprotect(VMAHandle vma, VMAPermission new_perms);
+
+ /// Changes the permissions of a range of addresses, splitting VMAs as necessary.
+ ResultCode ReprotectRange(VAddr target, u32 size, VMAPermission new_perms);
+
+ /**
+ * Scans all VMAs and updates the page table range of any that use the given vector as backing
+ * memory. This should be called after any operation that causes reallocation of the vector.
+ */
+ void RefreshMemoryBlockMappings(const std::vector<u8>* block);
/// Dumps the address space layout to the log, for debugging
- void LogLayout() const;
+ void LogLayout(Log::Level log_level) const;
private:
using VMAIter = decltype(vma_map)::iterator;
@@ -178,6 +195,9 @@ private:
/// Converts a VMAHandle to a mutable VMAIter.
VMAIter StripIterConstness(const VMAHandle& iter);
+ /// Unmaps the given VMA.
+ VMAIter Unmap(VMAIter vma);
+
/**
* Carves a VMA of a specific size at the specified address by splitting Free VMAs while doing
* the appropriate error checking.
@@ -185,6 +205,12 @@ private:
ResultVal<VMAIter> CarveVMA(VAddr base, u32 size);
/**
+ * Splits the edges of the given range of non-Free VMAs so that there is a VMA split at each
+ * end of the range.
+ */
+ ResultVal<VMAIter> CarveVMARange(VAddr base, u32 size);
+
+ /**
* Splits a VMA in two, at the specified offset.
* @returns the right side of the split, with the original iterator becoming the left side.
*/
diff --git a/src/core/hle/service/apt/apt.cpp b/src/core/hle/service/apt/apt.cpp
index 35402341b..ba66569b4 100644
--- a/src/core/hle/service/apt/apt.cpp
+++ b/src/core/hle/service/apt/apt.cpp
@@ -16,6 +16,7 @@
#include "core/hle/hle.h"
#include "core/hle/kernel/event.h"
#include "core/hle/kernel/mutex.h"
+#include "core/hle/kernel/process.h"
#include "core/hle/kernel/shared_memory.h"
#include "core/hle/kernel/thread.h"
@@ -37,7 +38,7 @@ static Kernel::SharedPtr<Kernel::Mutex> lock;
static Kernel::SharedPtr<Kernel::Event> notification_event; ///< APT notification event
static Kernel::SharedPtr<Kernel::Event> parameter_event; ///< APT parameter event
-static std::vector<u8> shared_font;
+static std::shared_ptr<std::vector<u8>> shared_font;
static u32 cpu_percent; ///< CPU time available to the running application
@@ -74,11 +75,12 @@ void Initialize(Service::Interface* self) {
void GetSharedFont(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
- if (!shared_font.empty()) {
- // TODO(bunnei): This function shouldn't copy the shared font every time it's called.
- // Instead, it should probably map the shared font as RO memory. We don't currently have
- // an easy way to do this, but the copy should be sufficient for now.
- memcpy(Memory::GetPointer(SHARED_FONT_VADDR), shared_font.data(), shared_font.size());
+ if (shared_font != nullptr) {
+ // TODO(yuriks): This is a hack to keep this working right now even with our completely
+ // broken shared memory system.
+ shared_font_mem->fixed_address = SHARED_FONT_VADDR;
+ Kernel::g_current_process->vm_manager.MapMemoryBlock(shared_font_mem->fixed_address,
+ shared_font, 0, shared_font_mem->size, Kernel::MemoryState::Shared);
cmd_buff[0] = IPC::MakeHeader(0x44, 2, 2);
cmd_buff[1] = RESULT_SUCCESS.raw; // No error
@@ -391,7 +393,6 @@ void Init() {
// a homebrew app to do this: https://github.com/citra-emu/3dsutils. Put the resulting file
// "shared_font.bin" in the Citra "sysdata" directory.
- shared_font.clear();
std::string filepath = FileUtil::GetUserPath(D_SYSDATA_IDX) + SHARED_FONT;
FileUtil::CreateFullPath(filepath); // Create path if not already created
@@ -399,8 +400,8 @@ void Init() {
if (file.IsOpen()) {
// Read shared font data
- shared_font.resize((size_t)file.GetSize());
- file.ReadBytes(shared_font.data(), (size_t)file.GetSize());
+ shared_font = std::make_shared<std::vector<u8>>((size_t)file.GetSize());
+ file.ReadBytes(shared_font->data(), shared_font->size());
// Create shared font memory object
using Kernel::MemoryPermission;
@@ -424,7 +425,7 @@ void Init() {
}
void Shutdown() {
- shared_font.clear();
+ shared_font = nullptr;
shared_font_mem = nullptr;
lock = nullptr;
notification_event = nullptr;
diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp
index e93c1b436..c3d0d28a5 100644
--- a/src/core/hle/service/gsp_gpu.cpp
+++ b/src/core/hle/service/gsp_gpu.cpp
@@ -3,8 +3,8 @@
// Refer to the license.txt file included.
#include "common/bit_field.h"
+#include "common/microprofile.h"
-#include "core/mem_map.h"
#include "core/memory.h"
#include "core/hle/kernel/event.h"
#include "core/hle/kernel/shared_memory.h"
@@ -230,6 +230,10 @@ void SetBufferSwap(u32 screen_id, const FrameBufferInfo& info) {
if (Pica::g_debug_context)
Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::BufferSwapped, nullptr);
+
+ if (screen_id == 0) {
+ MicroProfileFlip();
+ }
}
/**
@@ -418,7 +422,7 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
case CommandId::SET_DISPLAY_TRANSFER:
{
- auto& params = command.image_copy;
+ auto& params = command.display_transfer;
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)),
Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)),
@@ -433,17 +437,22 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
// TODO: Check if texture copies are implemented correctly..
case CommandId::SET_TEXTURE_COPY:
{
- auto& params = command.image_copy;
- WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)),
+ auto& params = command.texture_copy;
+ WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.input_address),
Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
- WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)),
+ WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.output_address),
Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3);
- WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_size)), params.in_buffer_size);
- WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_size)), params.out_buffer_size);
- WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.flags)), params.flags);
-
- // TODO: Should this register be set to 1 or should instead its value be OR-ed with 1?
- WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.trigger)), 1);
+ WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.size),
+ params.size);
+ WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.input_size),
+ params.in_width_gap);
+ WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.output_size),
+ params.out_width_gap);
+ WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.flags),
+ params.flags);
+
+ // NOTE: Actual GSP ORs 1 with current register instead of overwriting. Doesn't seem to matter.
+ WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.trigger), 1);
break;
}
diff --git a/src/core/hle/service/gsp_gpu.h b/src/core/hle/service/gsp_gpu.h
index c89d0a467..8bcb30ad1 100644
--- a/src/core/hle/service/gsp_gpu.h
+++ b/src/core/hle/service/gsp_gpu.h
@@ -127,7 +127,16 @@ struct Command {
u32 in_buffer_size;
u32 out_buffer_size;
u32 flags;
- } image_copy;
+ } display_transfer;
+
+ struct {
+ u32 in_buffer_address;
+ u32 out_buffer_address;
+ u32 size;
+ u32 in_width_gap;
+ u32 out_width_gap;
+ u32 flags;
+ } texture_copy;
u8 raw_data[0x1C];
};
diff --git a/src/core/hle/service/y2r_u.cpp b/src/core/hle/service/y2r_u.cpp
index 6e7dafaad..6b1b71fe4 100644
--- a/src/core/hle/service/y2r_u.cpp
+++ b/src/core/hle/service/y2r_u.cpp
@@ -10,7 +10,6 @@
#include "core/hle/kernel/event.h"
#include "core/hle/service/y2r_u.h"
#include "core/hw/y2r.h"
-#include "core/mem_map.h"
#include "video_core/renderer_base.h"
#include "video_core/utils.h"
diff --git a/src/core/hle/shared_page.cpp b/src/core/hle/shared_page.cpp
index 26d87c7e2..50c5bc01b 100644
--- a/src/core/hle/shared_page.cpp
+++ b/src/core/hle/shared_page.cpp
@@ -18,7 +18,4 @@ void Init() {
shared_page.running_hw = 0x1; // product
}
-void Shutdown() {
-}
-
} // namespace
diff --git a/src/core/hle/shared_page.h b/src/core/hle/shared_page.h
index db6a5340b..379bb7b63 100644
--- a/src/core/hle/shared_page.h
+++ b/src/core/hle/shared_page.h
@@ -54,6 +54,5 @@ static_assert(sizeof(SharedPageDef) == Memory::SHARED_PAGE_SIZE, "Shared page st
extern SharedPageDef shared_page;
void Init();
-void Shutdown();
} // namespace
diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp
index bb64fdfb7..19f750d72 100644
--- a/src/core/hle/svc.cpp
+++ b/src/core/hle/svc.cpp
@@ -5,16 +5,17 @@
#include <map>
#include "common/logging/log.h"
+#include "common/microprofile.h"
#include "common/profiler.h"
#include "common/string_util.h"
#include "common/symbols.h"
#include "core/core_timing.h"
-#include "core/mem_map.h"
#include "core/arm/arm_interface.h"
#include "core/hle/kernel/address_arbiter.h"
#include "core/hle/kernel/event.h"
+#include "core/hle/kernel/memory.h"
#include "core/hle/kernel/mutex.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/resource_limit.h"
@@ -41,32 +42,114 @@ const ResultCode ERR_NOT_FOUND(ErrorDescription::NotFound, ErrorModule::Kernel,
const ResultCode ERR_PORT_NAME_TOO_LONG(ErrorDescription(30), ErrorModule::OS,
ErrorSummary::InvalidArgument, ErrorLevel::Usage); // 0xE0E0181E
+const ResultCode ERR_MISALIGNED_ADDRESS{ // 0xE0E01BF1
+ ErrorDescription::MisalignedAddress, ErrorModule::OS,
+ ErrorSummary::InvalidArgument, ErrorLevel::Usage};
+const ResultCode ERR_MISALIGNED_SIZE{ // 0xE0E01BF2
+ ErrorDescription::MisalignedSize, ErrorModule::OS,
+ ErrorSummary::InvalidArgument, ErrorLevel::Usage};
+const ResultCode ERR_INVALID_COMBINATION{ // 0xE0E01BEE
+ ErrorDescription::InvalidCombination, ErrorModule::OS,
+ ErrorSummary::InvalidArgument, ErrorLevel::Usage};
+
enum ControlMemoryOperation {
- MEMORY_OPERATION_HEAP = 0x00000003,
- MEMORY_OPERATION_GSP_HEAP = 0x00010003,
+ MEMOP_FREE = 1,
+ MEMOP_RESERVE = 2, // This operation seems to be unsupported in the kernel
+ MEMOP_COMMIT = 3,
+ MEMOP_MAP = 4,
+ MEMOP_UNMAP = 5,
+ MEMOP_PROTECT = 6,
+ MEMOP_OPERATION_MASK = 0xFF,
+
+ MEMOP_REGION_APP = 0x100,
+ MEMOP_REGION_SYSTEM = 0x200,
+ MEMOP_REGION_BASE = 0x300,
+ MEMOP_REGION_MASK = 0xF00,
+
+ MEMOP_LINEAR = 0x10000,
};
/// Map application or GSP heap memory
static ResultCode ControlMemory(u32* out_addr, u32 operation, u32 addr0, u32 addr1, u32 size, u32 permissions) {
- LOG_TRACE(Kernel_SVC,"called operation=0x%08X, addr0=0x%08X, addr1=0x%08X, size=%08X, permissions=0x%08X",
+ using namespace Kernel;
+
+ LOG_DEBUG(Kernel_SVC,"called operation=0x%08X, addr0=0x%08X, addr1=0x%08X, size=0x%X, permissions=0x%08X",
operation, addr0, addr1, size, permissions);
- switch (operation) {
+ if ((addr0 & Memory::PAGE_MASK) != 0 || (addr1 & Memory::PAGE_MASK) != 0) {
+ return ERR_MISALIGNED_ADDRESS;
+ }
+ if ((size & Memory::PAGE_MASK) != 0) {
+ return ERR_MISALIGNED_SIZE;
+ }
+
+ u32 region = operation & MEMOP_REGION_MASK;
+ operation &= ~MEMOP_REGION_MASK;
+
+ if (region != 0) {
+ LOG_WARNING(Kernel_SVC, "ControlMemory with specified region not supported, region=%X", region);
+ }
- // Map normal heap memory
- case MEMORY_OPERATION_HEAP:
- *out_addr = Memory::MapBlock_Heap(size, operation, permissions);
+ if ((permissions & (u32)MemoryPermission::ReadWrite) != permissions) {
+ return ERR_INVALID_COMBINATION;
+ }
+ VMAPermission vma_permissions = (VMAPermission)permissions;
+
+ auto& process = *g_current_process;
+
+ switch (operation & MEMOP_OPERATION_MASK) {
+ case MEMOP_FREE:
+ {
+ if (addr0 >= Memory::HEAP_VADDR && addr0 < Memory::HEAP_VADDR_END) {
+ ResultCode result = process.HeapFree(addr0, size);
+ if (result.IsError()) return result;
+ } else if (addr0 >= process.GetLinearHeapBase() && addr0 < process.GetLinearHeapLimit()) {
+ ResultCode result = process.LinearFree(addr0, size);
+ if (result.IsError()) return result;
+ } else {
+ return ERR_INVALID_ADDRESS;
+ }
+ *out_addr = addr0;
break;
+ }
- // Map GSP heap memory
- case MEMORY_OPERATION_GSP_HEAP:
- *out_addr = Memory::MapBlock_HeapLinear(size, operation, permissions);
+ case MEMOP_COMMIT:
+ {
+ if (operation & MEMOP_LINEAR) {
+ CASCADE_RESULT(*out_addr, process.LinearAllocate(addr0, size, vma_permissions));
+ } else {
+ CASCADE_RESULT(*out_addr, process.HeapAllocate(addr0, size, vma_permissions));
+ }
+ break;
+ }
+
+ case MEMOP_MAP: // TODO: This is just a hack to avoid regressions until memory aliasing is implemented
+ {
+ CASCADE_RESULT(*out_addr, process.HeapAllocate(addr0, size, vma_permissions));
break;
+ }
+
+ case MEMOP_UNMAP: // TODO: This is just a hack to avoid regressions until memory aliasing is implemented
+ {
+ ResultCode result = process.HeapFree(addr0, size);
+ if (result.IsError()) return result;
+ break;
+ }
+
+ case MEMOP_PROTECT:
+ {
+ ResultCode result = process.vm_manager.ReprotectRange(addr0, size, vma_permissions);
+ if (result.IsError()) return result;
+ break;
+ }
- // Unknown ControlMemory operation
default:
LOG_ERROR(Kernel_SVC, "unknown operation=0x%08X", operation);
+ return ERR_INVALID_COMBINATION;
}
+
+ process.vm_manager.LogLayout(Log::Level::Trace);
+
return RESULT_SUCCESS;
}
@@ -537,9 +620,9 @@ static ResultCode QueryProcessMemory(MemoryInfo* memory_info, PageInfo* page_inf
if (process == nullptr)
return ERR_INVALID_HANDLE;
- auto vma = process->address_space->FindVMA(addr);
+ auto vma = process->vm_manager.FindVMA(addr);
- if (vma == process->address_space->vma_map.end())
+ if (vma == Kernel::g_current_process->vm_manager.vma_map.end())
return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::OS, ErrorSummary::InvalidArgument, ErrorLevel::Usage);
memory_info->base_address = vma->second.base;
@@ -692,6 +775,52 @@ static ResultCode CreateMemoryBlock(Handle* out_handle, u32 addr, u32 size, u32
return RESULT_SUCCESS;
}
+static ResultCode GetProcessInfo(s64* out, Handle process_handle, u32 type) {
+ LOG_TRACE(Kernel_SVC, "called process=0x%08X type=%u", process_handle, type);
+
+ using Kernel::Process;
+ Kernel::SharedPtr<Process> process = Kernel::g_handle_table.Get<Process>(process_handle);
+ if (process == nullptr)
+ return ERR_INVALID_HANDLE;
+
+ switch (type) {
+ case 0:
+ case 2:
+ // TODO(yuriks): Type 0 returns a slightly higher number than type 2, but I'm not sure
+ // what's the difference between them.
+ *out = process->heap_used + process->linear_heap_used + process->misc_memory_used;
+ break;
+ case 1:
+ case 3:
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ case 8:
+ // These are valid, but not implemented yet
+ LOG_ERROR(Kernel_SVC, "unimplemented GetProcessInfo type=%u", type);
+ break;
+ case 20:
+ *out = Memory::FCRAM_PADDR - process->GetLinearHeapBase();
+ break;
+ default:
+ LOG_ERROR(Kernel_SVC, "unknown GetProcessInfo type=%u", type);
+
+ if (type >= 21 && type <= 23) {
+ return ResultCode( // 0xE0E01BF4
+ ErrorDescription::NotImplemented, ErrorModule::OS,
+ ErrorSummary::InvalidArgument, ErrorLevel::Usage);
+ } else {
+ return ResultCode( // 0xD8E007ED
+ ErrorDescription::InvalidEnumValue, ErrorModule::Kernel,
+ ErrorSummary::InvalidArgument, ErrorLevel::Permanent);
+ }
+ break;
+ }
+
+ return RESULT_SUCCESS;
+}
+
namespace {
struct FunctionDef {
using Func = void();
@@ -746,7 +875,7 @@ static const FunctionDef SVC_Table[] = {
{0x28, HLE::Wrap<GetSystemTick>, "GetSystemTick"},
{0x29, nullptr, "GetHandleInfo"},
{0x2A, nullptr, "GetSystemInfo"},
- {0x2B, nullptr, "GetProcessInfo"},
+ {0x2B, HLE::Wrap<GetProcessInfo>, "GetProcessInfo"},
{0x2C, nullptr, "GetThreadInfo"},
{0x2D, HLE::Wrap<ConnectToPort>, "ConnectToPort"},
{0x2E, nullptr, "SendSyncRequest1"},
@@ -841,8 +970,11 @@ static const FunctionDef* GetSVCInfo(u32 func_num) {
return &SVC_Table[func_num];
}
+MICROPROFILE_DEFINE(Kernel_SVC, "Kernel", "SVC", MP_RGB(70, 200, 70));
+
void CallSVC(u32 immediate) {
Common::Profiling::ScopeTimer timer_svc(profiler_svc);
+ MICROPROFILE_SCOPE(Kernel_SVC);
const FunctionDef* info = GetSVCInfo(immediate);
if (info) {
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index 3ccbc03b2..bc7bde903 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -3,11 +3,13 @@
// Refer to the license.txt file included.
#include <cstring>
+#include <numeric>
#include <type_traits>
#include "common/color.h"
#include "common/common_types.h"
#include "common/logging/log.h"
+#include "common/microprofile.h"
#include "common/vector_math.h"
#include "core/settings.h"
@@ -84,6 +86,9 @@ static Math::Vec4<u8> DecodePixel(Regs::PixelFormat input_format, const u8* src_
}
}
+MICROPROFILE_DEFINE(GPU_DisplayTransfer, "GPU", "DisplayTransfer", MP_RGB(100, 100, 255));
+MICROPROFILE_DEFINE(GPU_CmdlistProcessing, "GPU", "Cmdlist Processing", MP_RGB(100, 255, 100));
+
template <typename T>
inline void Write(u32 addr, const T data) {
addr -= HW::VADDR_GPU;
@@ -149,6 +154,8 @@ inline void Write(u32 addr, const T data) {
case GPU_REG_INDEX(display_transfer_config.trigger):
{
+ MICROPROFILE_SCOPE(GPU_DisplayTransfer);
+
const auto& config = g_regs.display_transfer_config;
if (config.trigger & 1) {
@@ -158,14 +165,59 @@ inline void Write(u32 addr, const T data) {
u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress());
u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress());
+ if (config.is_texture_copy) {
+ u32 input_width = config.texture_copy.input_width * 16;
+ u32 input_gap = config.texture_copy.input_gap * 16;
+ u32 output_width = config.texture_copy.output_width * 16;
+ u32 output_gap = config.texture_copy.output_gap * 16;
+
+ size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap);
+ VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), contiguous_input_size);
+
+ u32 remaining_size = config.texture_copy.size;
+ u32 remaining_input = input_width;
+ u32 remaining_output = output_width;
+ while (remaining_size > 0) {
+ u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size });
+
+ std::memcpy(dst_pointer, src_pointer, copy_size);
+ src_pointer += copy_size;
+ dst_pointer += copy_size;
+
+ remaining_input -= copy_size;
+ remaining_output -= copy_size;
+ remaining_size -= copy_size;
+
+ if (remaining_input == 0) {
+ remaining_input = input_width;
+ src_pointer += input_gap;
+ }
+ if (remaining_output == 0) {
+ remaining_output = output_width;
+ dst_pointer += output_gap;
+ }
+ }
+
+ LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X",
+ config.texture_copy.size,
+ config.GetPhysicalInputAddress(), input_width, input_gap,
+ config.GetPhysicalOutputAddress(), output_width, output_gap,
+ config.flags);
+
+ size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap);
+ VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), contiguous_output_size);
+
+ GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
+ break;
+ }
+
if (config.scaling > config.ScaleXY) {
LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value());
UNIMPLEMENTED();
break;
}
- if (config.output_tiled &&
- (config.scaling == config.ScaleXY || config.scaling == config.ScaleX)) {
+ if (config.input_linear && config.scaling != config.NoScale) {
LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input");
UNIMPLEMENTED();
break;
@@ -182,23 +234,6 @@ inline void Write(u32 addr, const T data) {
VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), input_size);
- if (config.raw_copy) {
- // Raw copies do not perform color conversion nor tiled->linear / linear->tiled conversions
- // TODO(Subv): Verify if raw copies perform scaling
- memcpy(dst_pointer, src_pointer, output_size);
-
- LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), output format: %x, flags 0x%08X, Raw copy",
- output_size,
- config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(),
- config.GetPhysicalOutputAddress(), config.output_width.Value(), config.output_height.Value(),
- config.output_format.Value(), config.flags);
-
- GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
-
- VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), output_size);
- break;
- }
-
for (u32 y = 0; y < output_height; ++y) {
for (u32 x = 0; x < output_width; ++x) {
Math::Vec4<u8> src_color;
@@ -220,7 +255,7 @@ inline void Write(u32 addr, const T data) {
u32 src_offset;
u32 dst_offset;
- if (config.output_tiled) {
+ if (config.input_linear) {
if (!config.dont_swizzle) {
// Interpret the input as linear and the output as tiled
u32 coarse_y = y & ~7;
@@ -315,6 +350,8 @@ inline void Write(u32 addr, const T data) {
const auto& config = g_regs.command_processor_config;
if (config.trigger & 1)
{
+ MICROPROFILE_SCOPE(GPU_CmdlistProcessing);
+
u32* buffer = (u32*)Memory::GetPhysicalPointer(config.GetPhysicalAddress());
if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index daad506fe..2e3a9f779 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -201,12 +201,14 @@ struct Regs {
u32 flags;
BitField< 0, 1, u32> flip_vertically; // flips input data vertically
- BitField< 1, 1, u32> output_tiled; // Converts from linear to tiled format
- BitField< 3, 1, u32> raw_copy; // Copies the data without performing any processing
+ BitField< 1, 1, u32> input_linear; // Converts from linear to tiled format
+ BitField< 2, 1, u32> crop_input_lines;
+ BitField< 3, 1, u32> is_texture_copy; // Copies the data without performing any processing and respecting texture copy fields
BitField< 5, 1, u32> dont_swizzle;
BitField< 8, 3, PixelFormat> input_format;
BitField<12, 3, PixelFormat> output_format;
-
+ /// Uses some kind of 32x32 block swizzling mode, instead of the usual 8x8 one.
+ BitField<16, 1, u32> block_32; // TODO(yuriks): unimplemented
BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer
};
@@ -214,10 +216,30 @@ struct Regs {
// it seems that writing to this field triggers the display transfer
u32 trigger;
+
+ INSERT_PADDING_WORDS(0x1);
+
+ struct {
+ u32 size;
+
+ union {
+ u32 input_size;
+
+ BitField< 0, 16, u32> input_width;
+ BitField<16, 16, u32> input_gap;
+ };
+
+ union {
+ u32 output_size;
+
+ BitField< 0, 16, u32> output_width;
+ BitField<16, 16, u32> output_gap;
+ };
+ } texture_copy;
} display_transfer_config;
- ASSERT_MEMBER_SIZE(display_transfer_config, 0x1c);
+ ASSERT_MEMBER_SIZE(display_transfer_config, 0x2c);
- INSERT_PADDING_WORDS(0x331);
+ INSERT_PADDING_WORDS(0x32D);
struct {
// command list size (in bytes)
diff --git a/src/core/mem_map.cpp b/src/core/mem_map.cpp
deleted file mode 100644
index cbe993fbe..000000000
--- a/src/core/mem_map.cpp
+++ /dev/null
@@ -1,163 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <map>
-#include <memory>
-#include <utility>
-#include <vector>
-
-#include "common/common_types.h"
-#include "common/logging/log.h"
-
-#include "core/hle/config_mem.h"
-#include "core/hle/kernel/vm_manager.h"
-#include "core/hle/result.h"
-#include "core/hle/shared_page.h"
-#include "core/mem_map.h"
-#include "core/memory.h"
-#include "core/memory_setup.h"
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace Memory {
-
-namespace {
-
-struct MemoryArea {
- u32 base;
- u32 size;
- const char* name;
-};
-
-// We don't declare the IO regions in here since its handled by other means.
-static MemoryArea memory_areas[] = {
- {HEAP_VADDR, HEAP_SIZE, "Heap"}, // Application heap (main memory)
- {SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE, "Shared Memory"}, // Shared memory
- {LINEAR_HEAP_VADDR, LINEAR_HEAP_SIZE, "Linear Heap"}, // Linear heap (main memory)
- {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM)
- {DSP_RAM_VADDR, DSP_RAM_SIZE, "DSP RAM"}, // DSP memory
- {TLS_AREA_VADDR, TLS_AREA_SIZE, "TLS Area"}, // TLS memory
-};
-
-/// Represents a block of memory mapped by ControlMemory/MapMemoryBlock
-struct MemoryBlock {
- MemoryBlock() : handle(0), base_address(0), address(0), size(0), operation(0), permissions(0) {
- }
- u32 handle;
- u32 base_address;
- u32 address;
- u32 size;
- u32 operation;
- u32 permissions;
-
- const u32 GetVirtualAddress() const{
- return base_address + address;
- }
-};
-
-static std::map<u32, MemoryBlock> heap_map;
-static std::map<u32, MemoryBlock> heap_linear_map;
-
-}
-
-u32 MapBlock_Heap(u32 size, u32 operation, u32 permissions) {
- MemoryBlock block;
-
- block.base_address = HEAP_VADDR;
- block.size = size;
- block.operation = operation;
- block.permissions = permissions;
-
- if (heap_map.size() > 0) {
- const MemoryBlock last_block = heap_map.rbegin()->second;
- block.address = last_block.address + last_block.size;
- }
- heap_map[block.GetVirtualAddress()] = block;
-
- return block.GetVirtualAddress();
-}
-
-u32 MapBlock_HeapLinear(u32 size, u32 operation, u32 permissions) {
- MemoryBlock block;
-
- block.base_address = LINEAR_HEAP_VADDR;
- block.size = size;
- block.operation = operation;
- block.permissions = permissions;
-
- if (heap_linear_map.size() > 0) {
- const MemoryBlock last_block = heap_linear_map.rbegin()->second;
- block.address = last_block.address + last_block.size;
- }
- heap_linear_map[block.GetVirtualAddress()] = block;
-
- return block.GetVirtualAddress();
-}
-
-PAddr VirtualToPhysicalAddress(const VAddr addr) {
- if (addr == 0) {
- return 0;
- } else if (addr >= VRAM_VADDR && addr < VRAM_VADDR_END) {
- return addr - VRAM_VADDR + VRAM_PADDR;
- } else if (addr >= LINEAR_HEAP_VADDR && addr < LINEAR_HEAP_VADDR_END) {
- return addr - LINEAR_HEAP_VADDR + FCRAM_PADDR;
- } else if (addr >= DSP_RAM_VADDR && addr < DSP_RAM_VADDR_END) {
- return addr - DSP_RAM_VADDR + DSP_RAM_PADDR;
- } else if (addr >= IO_AREA_VADDR && addr < IO_AREA_VADDR_END) {
- return addr - IO_AREA_VADDR + IO_AREA_PADDR;
- }
-
- LOG_ERROR(HW_Memory, "Unknown virtual address @ 0x%08x", addr);
- // To help with debugging, set bit on address so that it's obviously invalid.
- return addr | 0x80000000;
-}
-
-VAddr PhysicalToVirtualAddress(const PAddr addr) {
- if (addr == 0) {
- return 0;
- } else if (addr >= VRAM_PADDR && addr < VRAM_PADDR_END) {
- return addr - VRAM_PADDR + VRAM_VADDR;
- } else if (addr >= FCRAM_PADDR && addr < FCRAM_PADDR_END) {
- return addr - FCRAM_PADDR + LINEAR_HEAP_VADDR;
- } else if (addr >= DSP_RAM_PADDR && addr < DSP_RAM_PADDR_END) {
- return addr - DSP_RAM_PADDR + DSP_RAM_VADDR;
- } else if (addr >= IO_AREA_PADDR && addr < IO_AREA_PADDR_END) {
- return addr - IO_AREA_PADDR + IO_AREA_VADDR;
- }
-
- LOG_ERROR(HW_Memory, "Unknown physical address @ 0x%08x", addr);
- // To help with debugging, set bit on address so that it's obviously invalid.
- return addr | 0x80000000;
-}
-
-void Init() {
- InitMemoryMap();
- LOG_DEBUG(HW_Memory, "initialized OK");
-}
-
-void InitLegacyAddressSpace(Kernel::VMManager& address_space) {
- using namespace Kernel;
-
- for (MemoryArea& area : memory_areas) {
- auto block = std::make_shared<std::vector<u8>>(area.size);
- address_space.MapMemoryBlock(area.base, std::move(block), 0, area.size, MemoryState::Private).Unwrap();
- }
-
- auto cfg_mem_vma = address_space.MapBackingMemory(CONFIG_MEMORY_VADDR,
- (u8*)&ConfigMem::config_mem, CONFIG_MEMORY_SIZE, MemoryState::Shared).MoveFrom();
- address_space.Reprotect(cfg_mem_vma, VMAPermission::Read);
-
- auto shared_page_vma = address_space.MapBackingMemory(SHARED_PAGE_VADDR,
- (u8*)&SharedPage::shared_page, SHARED_PAGE_SIZE, MemoryState::Shared).MoveFrom();
- address_space.Reprotect(shared_page_vma, VMAPermission::Read);
-}
-
-void Shutdown() {
- heap_map.clear();
- heap_linear_map.clear();
-
- LOG_DEBUG(HW_Memory, "shutdown OK");
-}
-
-} // namespace
diff --git a/src/core/mem_map.h b/src/core/mem_map.h
deleted file mode 100644
index 229ef82c5..000000000
--- a/src/core/mem_map.h
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include "common/common_types.h"
-
-namespace Kernel {
-class VMManager;
-}
-
-namespace Memory {
-
-void Init();
-void InitLegacyAddressSpace(Kernel::VMManager& address_space);
-void Shutdown();
-
-/**
- * Maps a block of memory on the heap
- * @param size Size of block in bytes
- * @param operation Memory map operation type
- * @param permissions Memory allocation permissions
- */
-u32 MapBlock_Heap(u32 size, u32 operation, u32 permissions);
-
-/**
- * Maps a block of memory on the GSP heap
- * @param size Size of block in bytes
- * @param operation Memory map operation type
- * @param permissions Control memory permissions
- */
-u32 MapBlock_HeapLinear(u32 size, u32 operation, u32 permissions);
-
-/**
- * Converts a virtual address inside a region with 1:1 mapping to physical memory to a physical
- * address. This should be used by services to translate addresses for use by the hardware.
- */
-PAddr VirtualToPhysicalAddress(VAddr addr);
-
-/**
- * Undoes a mapping performed by VirtualToPhysicalAddress().
- */
-VAddr PhysicalToVirtualAddress(PAddr addr);
-
-} // namespace
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 1f66bb27d..cde390b8a 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -9,7 +9,7 @@
#include "common/logging/log.h"
#include "common/swap.h"
-#include "core/mem_map.h"
+#include "core/hle/kernel/process.h"
#include "core/memory.h"
#include "core/memory_setup.h"
@@ -198,4 +198,42 @@ void WriteBlock(const VAddr addr, const u8* data, const size_t size) {
Write8(addr + offset, data[offset]);
}
+PAddr VirtualToPhysicalAddress(const VAddr addr) {
+ if (addr == 0) {
+ return 0;
+ } else if (addr >= VRAM_VADDR && addr < VRAM_VADDR_END) {
+ return addr - VRAM_VADDR + VRAM_PADDR;
+ } else if (addr >= LINEAR_HEAP_VADDR && addr < LINEAR_HEAP_VADDR_END) {
+ return addr - LINEAR_HEAP_VADDR + FCRAM_PADDR;
+ } else if (addr >= DSP_RAM_VADDR && addr < DSP_RAM_VADDR_END) {
+ return addr - DSP_RAM_VADDR + DSP_RAM_PADDR;
+ } else if (addr >= IO_AREA_VADDR && addr < IO_AREA_VADDR_END) {
+ return addr - IO_AREA_VADDR + IO_AREA_PADDR;
+ } else if (addr >= NEW_LINEAR_HEAP_VADDR && addr < NEW_LINEAR_HEAP_VADDR_END) {
+ return addr - NEW_LINEAR_HEAP_VADDR + FCRAM_PADDR;
+ }
+
+ LOG_ERROR(HW_Memory, "Unknown virtual address @ 0x%08X", addr);
+ // To help with debugging, set bit on address so that it's obviously invalid.
+ return addr | 0x80000000;
+}
+
+VAddr PhysicalToVirtualAddress(const PAddr addr) {
+ if (addr == 0) {
+ return 0;
+ } else if (addr >= VRAM_PADDR && addr < VRAM_PADDR_END) {
+ return addr - VRAM_PADDR + VRAM_VADDR;
+ } else if (addr >= FCRAM_PADDR && addr < FCRAM_PADDR_END) {
+ return addr - FCRAM_PADDR + Kernel::g_current_process->GetLinearHeapBase();
+ } else if (addr >= DSP_RAM_PADDR && addr < DSP_RAM_PADDR_END) {
+ return addr - DSP_RAM_PADDR + DSP_RAM_VADDR;
+ } else if (addr >= IO_AREA_PADDR && addr < IO_AREA_PADDR_END) {
+ return addr - IO_AREA_PADDR + IO_AREA_VADDR;
+ }
+
+ LOG_ERROR(HW_Memory, "Unknown physical address @ 0x%08X", addr);
+ // To help with debugging, set bit on address so that it's obviously invalid.
+ return addr | 0x80000000;
+}
+
} // namespace
diff --git a/src/core/memory.h b/src/core/memory.h
index 418609de0..5af72b7a7 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -15,6 +15,8 @@ namespace Memory {
* be mapped.
*/
const u32 PAGE_SIZE = 0x1000;
+const u32 PAGE_MASK = PAGE_SIZE - 1;
+const int PAGE_BITS = 12;
/// Physical memory regions as seen from the ARM11
enum : PAddr {
@@ -103,8 +105,15 @@ enum : VAddr {
// hardcoded value.
/// Area where TLS (Thread-Local Storage) buffers are allocated.
TLS_AREA_VADDR = 0x1FF82000,
- TLS_AREA_SIZE = 0x00030000, // Each TLS buffer is 0x200 bytes, allows for 300 threads
+ TLS_ENTRY_SIZE = 0x200,
+ TLS_AREA_SIZE = 300 * TLS_ENTRY_SIZE + 0x800, // Space for up to 300 threads + round to page size
TLS_AREA_VADDR_END = TLS_AREA_VADDR + TLS_AREA_SIZE,
+
+
+ /// Equivalent to LINEAR_HEAP_VADDR, but expanded to cover the extra memory in the New 3DS.
+ NEW_LINEAR_HEAP_VADDR = 0x30000000,
+ NEW_LINEAR_HEAP_SIZE = 0x10000000,
+ NEW_LINEAR_HEAP_VADDR_END = NEW_LINEAR_HEAP_VADDR + NEW_LINEAR_HEAP_SIZE,
};
u8 Read8(VAddr addr);
@@ -122,6 +131,17 @@ void WriteBlock(VAddr addr, const u8* data, size_t size);
u8* GetPointer(VAddr virtual_address);
/**
+* Converts a virtual address inside a region with 1:1 mapping to physical memory to a physical
+* address. This should be used by services to translate addresses for use by the hardware.
+*/
+PAddr VirtualToPhysicalAddress(VAddr addr);
+
+/**
+* Undoes a mapping performed by VirtualToPhysicalAddress().
+*/
+VAddr PhysicalToVirtualAddress(PAddr addr);
+
+/**
* Gets a pointer to the memory region beginning at the specified physical address.
*
* @note This is currently implemented using PhysicalToVirtualAddress().
diff --git a/src/core/memory_setup.h b/src/core/memory_setup.h
index 361bfc816..84ff30120 100644
--- a/src/core/memory_setup.h
+++ b/src/core/memory_setup.h
@@ -10,9 +10,6 @@
namespace Memory {
-const u32 PAGE_MASK = PAGE_SIZE - 1;
-const int PAGE_BITS = 12;
-
void InitMemoryMap();
/**
diff --git a/src/core/system.cpp b/src/core/system.cpp
index 561ff82f0..3cd84bf5e 100644
--- a/src/core/system.cpp
+++ b/src/core/system.cpp
@@ -4,11 +4,11 @@
#include "core/core.h"
#include "core/core_timing.h"
-#include "core/mem_map.h"
#include "core/system.h"
#include "core/hw/hw.h"
#include "core/hle/hle.h"
#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/memory.h"
#include "video_core/video_core.h"
@@ -29,7 +29,6 @@ void Shutdown() {
HLE::Shutdown();
Kernel::Shutdown();
HW::Shutdown();
- Memory::Shutdown();
CoreTiming::Shutdown();
Core::Shutdown();
}
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index d82e20f86..a78985510 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -4,6 +4,7 @@
#include <boost/range/algorithm/fill.hpp>
+#include "common/microprofile.h"
#include "common/profiler.h"
#include "core/hle/service/gsp_gpu.h"
@@ -43,6 +44,8 @@ static const u32 expand_bits_to_bytes[] = {
0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff
};
+MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240));
+
static void WritePicaReg(u32 id, u32 value, u32 mask) {
auto& regs = g_state.regs;
@@ -126,6 +129,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
case PICA_REG_INDEX(trigger_draw_indexed):
{
Common::Profiling::ScopeTimer scope_timer(category_drawing);
+ MICROPROFILE_SCOPE(GPU_Drawing);
#if PICA_LOG_TEV
DebugUtils::DumpTevStageConfig(regs.GetTevStages());
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 8ad77f0c8..059445f7d 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -25,6 +25,8 @@
#include "common/math_util.h"
#include "common/vector_math.h"
+#include "core/settings.h"
+
#include "video_core/pica.h"
#include "video_core/renderer_base.h"
#include "video_core/utils.h"
@@ -45,8 +47,10 @@ void DebugContext::OnEvent(Event event, void* data) {
{
std::unique_lock<std::mutex> lock(breakpoint_mutex);
- // Commit the hardware renderer's framebuffer so it will show on debug widgets
- VideoCore::g_renderer->hw_rasterizer->CommitFramebuffer();
+ if (Settings::values.use_hw_renderer) {
+ // Commit the hardware renderer's framebuffer so it will show on debug widgets
+ VideoCore::g_renderer->hw_rasterizer->CommitFramebuffer();
+ }
// TODO: Should stop the CPU thread here once we multithread emulation.
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 87cf705e7..f40684d83 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -1033,12 +1033,20 @@ struct float24 {
return ret;
}
+ static float24 Zero() {
+ return FromFloat32(0.f);
+ }
+
// Not recommended for anything but logging
float ToFloat32() const {
return value;
}
float24 operator * (const float24& flt) const {
+ if ((this->value == 0.f && !std::isnan(flt.value)) ||
+ (flt.value == 0.f && !std::isnan(this->value)))
+ // PICA gives 0 instead of NaN when multiplying by inf
+ return Zero();
return float24::FromFloat32(ToFloat32() * flt.ToFloat32());
}
@@ -1055,7 +1063,11 @@ struct float24 {
}
float24& operator *= (const float24& flt) {
- value *= flt.ToFloat32();
+ if ((this->value == 0.f && !std::isnan(flt.value)) ||
+ (flt.value == 0.f && !std::isnan(this->value)))
+ // PICA gives 0 instead of NaN when multiplying by inf
+ *this = Zero();
+ else value *= flt.ToFloat32();
return *this;
}
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index b6c0e1bff..77eadda9e 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -7,6 +7,7 @@
#include "common/color.h"
#include "common/common_types.h"
#include "common/math_util.h"
+#include "common/microprofile.h"
#include "common/profiler.h"
#include "core/hw/gpu.h"
@@ -286,6 +287,7 @@ static int SignedArea (const Math::Vec2<Fix12P4>& vtx1,
};
static Common::Profiling::TimingCategory rasterization_category("Rasterization");
+MICROPROFILE_DEFINE(GPU_Rasterization, "GPU", "Rasterization", MP_RGB(50, 50, 240));
/**
* Helper function for ProcessTriangle with the "reversed" flag to allow for implementing
@@ -298,6 +300,7 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
{
const auto& regs = g_state.regs;
Common::Profiling::ScopeTimer timer(rasterization_category);
+ MICROPROFILE_SCOPE(GPU_Rasterization);
// vertex positions in rasterizer coordinates
static auto FloatToFix = [](float24 flt) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index c3829d5c6..d29049508 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -7,6 +7,7 @@
#include "common/color.h"
#include "common/math_util.h"
+#include "common/microprofile.h"
#include "common/profiler.h"
#include "core/hw/gpu.h"
@@ -230,8 +231,8 @@ void RasterizerOpenGL::DrawTriangles() {
u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format)
* regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight();
- res_cache.NotifyFlush(cur_fb_color_addr, cur_fb_color_size);
- res_cache.NotifyFlush(cur_fb_depth_addr, cur_fb_depth_size);
+ res_cache.NotifyFlush(cur_fb_color_addr, cur_fb_color_size, true);
+ res_cache.NotifyFlush(cur_fb_depth_addr, cur_fb_depth_size, true);
}
void RasterizerOpenGL::CommitFramebuffer() {
@@ -786,12 +787,16 @@ void RasterizerOpenGL::SyncDrawState() {
state.Apply();
}
+MICROPROFILE_DEFINE(OpenGL_FramebufferReload, "OpenGL", "FB Reload", MP_RGB(70, 70, 200));
+
void RasterizerOpenGL::ReloadColorBuffer() {
u8* color_buffer = Memory::GetPhysicalPointer(Pica::g_state.regs.framebuffer.GetColorBufferPhysicalAddress());
if (color_buffer == nullptr)
return;
+ MICROPROFILE_SCOPE(OpenGL_FramebufferReload);
+
u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format);
std::unique_ptr<u8[]> temp_fb_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]);
@@ -831,6 +836,8 @@ void RasterizerOpenGL::ReloadDepthBuffer() {
if (depth_buffer == nullptr)
return;
+ MICROPROFILE_SCOPE(OpenGL_FramebufferReload);
+
u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format);
// OpenGL needs 4 bpp alignment for D24
@@ -884,6 +891,7 @@ void RasterizerOpenGL::ReloadDepthBuffer() {
}
Common::Profiling::TimingCategory buffer_commit_category("Framebuffer Commit");
+MICROPROFILE_DEFINE(OpenGL_FramebufferCommit, "OpenGL", "FB Commit", MP_RGB(70, 70, 200));
void RasterizerOpenGL::CommitColorBuffer() {
if (last_fb_color_addr != 0) {
@@ -891,6 +899,7 @@ void RasterizerOpenGL::CommitColorBuffer() {
if (color_buffer != nullptr) {
Common::Profiling::ScopeTimer timer(buffer_commit_category);
+ MICROPROFILE_SCOPE(OpenGL_FramebufferCommit);
u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format);
@@ -927,6 +936,7 @@ void RasterizerOpenGL::CommitDepthBuffer() {
if (depth_buffer != nullptr) {
Common::Profiling::ScopeTimer timer(buffer_commit_category);
+ MICROPROFILE_SCOPE(OpenGL_FramebufferCommit);
u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 70f0ba5f1..1e38c2e6d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -2,8 +2,10 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include "common/hash.h"
#include "common/make_unique.h"
#include "common/math_util.h"
+#include "common/microprofile.h"
#include "common/vector_math.h"
#include "core/memory.h"
@@ -16,15 +18,18 @@ RasterizerCacheOpenGL::~RasterizerCacheOpenGL() {
FullFlush();
}
+MICROPROFILE_DEFINE(OpenGL_TextureUpload, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192));
+
void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::Regs::FullTextureConfig& config) {
PAddr texture_addr = config.config.GetPhysicalAddress();
-
const auto cached_texture = texture_cache.find(texture_addr);
if (cached_texture != texture_cache.end()) {
state.texture_units[texture_unit].texture_2d = cached_texture->second->texture.handle;
state.Apply();
} else {
+ MICROPROFILE_SCOPE(OpenGL_TextureUpload);
+
std::unique_ptr<CachedTexture> new_texture = Common::make_unique<CachedTexture>();
new_texture->texture.Create();
@@ -46,12 +51,14 @@ void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned text
}
const auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format);
+ u8* texture_src_data = Memory::GetPhysicalPointer(texture_addr);
new_texture->width = info.width;
new_texture->height = info.height;
- new_texture->size = info.width * info.height * Pica::Regs::NibblesPerPixel(info.format);
+ new_texture->size = info.stride * info.height;
+ new_texture->addr = texture_addr;
+ new_texture->hash = Common::ComputeHash64(texture_src_data, new_texture->size);
- u8* texture_src_data = Memory::GetPhysicalPointer(texture_addr);
std::unique_ptr<Math::Vec4<u8>[]> temp_texture_buffer_rgba(new Math::Vec4<u8>[info.width * info.height]);
for (int y = 0; y < info.height; ++y) {
@@ -66,12 +73,18 @@ void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned text
}
}
-void RasterizerCacheOpenGL::NotifyFlush(PAddr addr, u32 size) {
+void RasterizerCacheOpenGL::NotifyFlush(PAddr addr, u32 size, bool ignore_hash) {
// Flush any texture that falls in the flushed region
// TODO: Optimize by also inserting upper bound (addr + size) of each texture into the same map and also narrow using lower_bound
auto cache_upper_bound = texture_cache.upper_bound(addr + size);
+
for (auto it = texture_cache.begin(); it != cache_upper_bound;) {
- if (MathUtil::IntervalsIntersect(addr, size, it->first, it->second->size)) {
+ const auto& info = *it->second;
+
+ // Flush the texture only if the memory region intersects and a change is detected
+ if (MathUtil::IntervalsIntersect(addr, size, info.addr, info.size) &&
+ (ignore_hash || info.hash != Common::ComputeHash64(Memory::GetPhysicalPointer(info.addr), info.size))) {
+
it = texture_cache.erase(it);
} else {
++it;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 96f3a925c..d8f9edf59 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -19,7 +19,7 @@ public:
void LoadAndBindTexture(OpenGLState &state, unsigned texture_unit, const Pica::Regs::FullTextureConfig& config);
/// Flush any cached resource that touches the flushed region
- void NotifyFlush(PAddr addr, u32 size);
+ void NotifyFlush(PAddr addr, u32 size, bool ignore_hash = false);
/// Flush all cached OpenGL resources tracked by this cache manager
void FullFlush();
@@ -30,6 +30,8 @@ private:
GLuint width;
GLuint height;
u32 size;
+ u64 hash;
+ PAddr addr;
};
std::map<PAddr, std::unique_ptr<CachedTexture>> texture_cache;
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 4e9836c80..f89117521 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -9,6 +9,7 @@
#include "common/hash.h"
#include "common/make_unique.h"
+#include "common/microprofile.h"
#include "common/profiler.h"
#include "video_core/debug_utils/debug_utils.h"
@@ -51,15 +52,19 @@ void Setup(UnitState<false>& state) {
}
void Shutdown() {
+#ifdef ARCHITECTURE_x86_64
shader_map.clear();
+#endif // ARCHITECTURE_x86_64
}
static Common::Profiling::TimingCategory shader_category("Vertex Shader");
+MICROPROFILE_DEFINE(GPU_VertexShader, "GPU", "Vertex Shader", MP_RGB(50, 50, 240));
OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) {
auto& config = g_state.regs.vs;
Common::Profiling::ScopeTimer timer(shader_category);
+ MICROPROFILE_SCOPE(GPU_VertexShader);
state.program_counter = config.main_offset;
state.debug.max_offset = 0;
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index e14de0768..69e4efa68 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -177,7 +177,10 @@ void RunInterpreter(UnitState<Debug>& state) {
if (!swizzle.DestComponentEnabled(i))
continue;
- dest[i] = std::max(src1[i], src2[i]);
+ // NOTE: Exact form required to match NaN semantics to hardware:
+ // max(0, NaN) -> NaN
+ // max(NaN, 0) -> 0
+ dest[i] = (src1[i] > src2[i]) ? src1[i] : src2[i];
}
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
break;
@@ -190,19 +193,29 @@ void RunInterpreter(UnitState<Debug>& state) {
if (!swizzle.DestComponentEnabled(i))
continue;
- dest[i] = std::min(src1[i], src2[i]);
+ // NOTE: Exact form required to match NaN semantics to hardware:
+ // min(0, NaN) -> NaN
+ // min(NaN, 0) -> 0
+ dest[i] = (src1[i] < src2[i]) ? src1[i] : src2[i];
}
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
break;
case OpCode::Id::DP3:
case OpCode::Id::DP4:
+ case OpCode::Id::DPH:
+ case OpCode::Id::DPHI:
{
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
+
+ OpCode::Id opcode = instr.opcode.Value().EffectiveOpCode();
+ if (opcode == OpCode::Id::DPH || opcode == OpCode::Id::DPHI)
+ src1[3] = float24::FromFloat32(1.0f);
+
float24 dot = float24::FromFloat32(0.f);
- int num_components = (instr.opcode.Value() == OpCode::Id::DP3) ? 3 : 4;
+ int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4;
for (int i = 0; i < num_components; ++i)
dot = dot + src1[i] * src2[i];
@@ -221,13 +234,12 @@ void RunInterpreter(UnitState<Debug>& state) {
{
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
+ float24 rcp_res = float24::FromFloat32(1.0f / src1[0].ToFloat32());
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
continue;
- // TODO: Be stable against division by zero!
- // TODO: I think this might be wrong... we should only use one component here
- dest[i] = float24::FromFloat32(1.0f / src1[i].ToFloat32());
+ dest[i] = rcp_res;
}
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
break;
@@ -238,13 +250,12 @@ void RunInterpreter(UnitState<Debug>& state) {
{
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
+ float24 rsq_res = float24::FromFloat32(1.0f / std::sqrt(src1[0].ToFloat32()));
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
continue;
- // TODO: Be stable against division by zero!
- // TODO: I think this might be wrong... we should only use one component here
- dest[i] = float24::FromFloat32(1.0f / sqrt(src1[i].ToFloat32()));
+ dest[i] = rsq_res;
}
Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
break;
@@ -278,6 +289,20 @@ void RunInterpreter(UnitState<Debug>& state) {
break;
}
+ case OpCode::Id::SGE:
+ case OpCode::Id::SGEI:
+ Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
+ Record<DebugDataRecord::SRC2>(state.debug, iteration, src2);
+ Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
+ for (int i = 0; i < 4; ++i) {
+ if (!swizzle.DestComponentEnabled(i))
+ continue;
+
+ dest[i] = (src1[i] >= src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
+ }
+ Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
+ break;
+
case OpCode::Id::SLT:
case OpCode::Id::SLTI:
Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
@@ -334,6 +359,42 @@ void RunInterpreter(UnitState<Debug>& state) {
Record<DebugDataRecord::CMP_RESULT>(state.debug, iteration, state.conditional_code);
break;
+ case OpCode::Id::EX2:
+ {
+ Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
+ Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
+
+ // EX2 only takes first component exp2 and writes it to all dest components
+ float24 ex2_res = float24::FromFloat32(std::exp2(src1[0].ToFloat32()));
+ for (int i = 0; i < 4; ++i) {
+ if (!swizzle.DestComponentEnabled(i))
+ continue;
+
+ dest[i] = ex2_res;
+ }
+
+ Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
+ break;
+ }
+
+ case OpCode::Id::LG2:
+ {
+ Record<DebugDataRecord::SRC1>(state.debug, iteration, src1);
+ Record<DebugDataRecord::DEST_IN>(state.debug, iteration, dest);
+
+ // LG2 only takes the first component log2 and writes it to all dest components
+ float24 lg2_res = float24::FromFloat32(std::log2(src1[0].ToFloat32()));
+ for (int i = 0; i < 4; ++i) {
+ if (!swizzle.DestComponentEnabled(i))
+ continue;
+
+ dest[i] = lg2_res;
+ }
+
+ Record<DebugDataRecord::DEST_OUT>(state.debug, iteration, dest);
+ break;
+ }
+
default:
LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",
(int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 836942c6b..d3cfe109e 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -23,14 +23,14 @@ const JitFunction instr_table[64] = {
&JitCompiler::Compile_ADD, // add
&JitCompiler::Compile_DP3, // dp3
&JitCompiler::Compile_DP4, // dp4
- nullptr, // dph
+ &JitCompiler::Compile_DPH, // dph
nullptr, // unknown
- nullptr, // ex2
- nullptr, // lg2
+ &JitCompiler::Compile_EX2, // ex2
+ &JitCompiler::Compile_LG2, // lg2
nullptr, // unknown
&JitCompiler::Compile_MUL, // mul
- nullptr, // lge
- nullptr, // slt
+ &JitCompiler::Compile_SGE, // sge
+ &JitCompiler::Compile_SLT, // slt
&JitCompiler::Compile_FLR, // flr
&JitCompiler::Compile_MAX, // max
&JitCompiler::Compile_MIN, // min
@@ -44,10 +44,10 @@ const JitFunction instr_table[64] = {
nullptr, // unknown
nullptr, // unknown
nullptr, // unknown
- nullptr, // dphi
+ &JitCompiler::Compile_DPH, // dphi
nullptr, // unknown
- nullptr, // sgei
- &JitCompiler::Compile_SLTI, // slti
+ &JitCompiler::Compile_SGE, // sgei
+ &JitCompiler::Compile_SLT, // slti
nullptr, // unknown
nullptr, // unknown
nullptr, // unknown
@@ -115,6 +115,8 @@ static const X64Reg SRC1 = XMM1;
static const X64Reg SRC2 = XMM2;
/// Loaded with the third swizzled source register, otherwise can be used as a scratch register
static const X64Reg SRC3 = XMM3;
+/// Additional scratch register
+static const X64Reg SCRATCH2 = XMM4;
/// Constant vector of [1.0f, 1.0f, 1.0f, 1.0f], used to efficiently set a vector to one
static const X64Reg ONE = XMM14;
/// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR
@@ -227,8 +229,8 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) {
u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
BLENDPS(SCRATCH, R(src), mask);
} else {
- MOVAPS(XMM4, R(src));
- UNPCKHPS(XMM4, R(SCRATCH)); // Unpack X/Y components of source and destination
+ MOVAPS(SCRATCH2, R(src));
+ UNPCKHPS(SCRATCH2, R(SCRATCH)); // Unpack X/Y components of source and destination
UNPCKLPS(SCRATCH, R(src)); // Unpack Z/W components of source and destination
// Compute selector to selectively copy source components to destination for SHUFPS instruction
@@ -236,7 +238,7 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) {
((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) |
((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) |
((swiz.DestComponentEnabled(3) ? 2 : 3) << 6);
- SHUFPS(SCRATCH, R(XMM4), sel);
+ SHUFPS(SCRATCH, R(SCRATCH2), sel);
}
// Store dest back to memory
@@ -244,6 +246,19 @@ void JitCompiler::Compile_DestEnable(Instruction instr,X64Reg src) {
}
}
+void JitCompiler::Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch) {
+ MOVAPS(scratch, R(src1));
+ CMPPS(scratch, R(src2), CMP_ORD);
+
+ MULPS(src1, R(src2));
+
+ MOVAPS(src2, R(src1));
+ CMPPS(src2, R(src2), CMP_UNORD);
+
+ XORPS(scratch, R(src2));
+ ANDPS(src1, R(scratch));
+}
+
void JitCompiler::Compile_EvaluateCondition(Instruction instr) {
// Note: NXOR is used below to check for equality
switch (instr.flow_control.op) {
@@ -280,6 +295,22 @@ void JitCompiler::Compile_UniformCondition(Instruction instr) {
CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0));
}
+void JitCompiler::Compile_PushCallerSavedXMM() {
+#ifndef _WIN32
+ SUB(64, R(RSP), Imm8(2 * 16));
+ MOVUPS(MDisp(RSP, 16), ONE);
+ MOVUPS(MDisp(RSP, 0), NEGBIT);
+#endif
+}
+
+void JitCompiler::Compile_PopCallerSavedXMM() {
+#ifndef _WIN32
+ MOVUPS(NEGBIT, MDisp(RSP, 0));
+ MOVUPS(ONE, MDisp(RSP, 16));
+ ADD(64, R(RSP), Imm8(2 * 16));
+#endif
+}
+
void JitCompiler::Compile_ADD(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
@@ -291,21 +322,17 @@ void JitCompiler::Compile_DP3(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
- if (Common::GetCPUCaps().sse4_1) {
- DPPS(SRC1, R(SRC2), 0x7f);
- } else {
- MULPS(SRC1, R(SRC2));
+ Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
- MOVAPS(SRC2, R(SRC1));
- SHUFPS(SRC2, R(SRC2), _MM_SHUFFLE(1, 1, 1, 1));
+ MOVAPS(SRC2, R(SRC1));
+ SHUFPS(SRC2, R(SRC2), _MM_SHUFFLE(1, 1, 1, 1));
- MOVAPS(SRC3, R(SRC1));
- SHUFPS(SRC3, R(SRC3), _MM_SHUFFLE(2, 2, 2, 2));
+ MOVAPS(SRC3, R(SRC1));
+ SHUFPS(SRC3, R(SRC3), _MM_SHUFFLE(2, 2, 2, 2));
- SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0));
- ADDPS(SRC1, R(SRC2));
- ADDPS(SRC1, R(SRC3));
- }
+ SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0));
+ ADDPS(SRC1, R(SRC2));
+ ADDPS(SRC1, R(SRC3));
Compile_DestEnable(instr, SRC1);
}
@@ -314,27 +341,117 @@ void JitCompiler::Compile_DP4(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
+ Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
+
+ MOVAPS(SRC2, R(SRC1));
+ SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
+ ADDPS(SRC1, R(SRC2));
+
+ MOVAPS(SRC2, R(SRC1));
+ SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
+ ADDPS(SRC1, R(SRC2));
+
+ Compile_DestEnable(instr, SRC1);
+}
+
+void JitCompiler::Compile_DPH(Instruction instr) {
+ if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
+ } else {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
+ }
+
if (Common::GetCPUCaps().sse4_1) {
- DPPS(SRC1, R(SRC2), 0xff);
+ // Set 4th component to 1.0
+ BLENDPS(SRC1, R(ONE), 0x8); // 0b1000
} else {
- MULPS(SRC1, R(SRC2));
+ // Set 4th component to 1.0
+ MOVAPS(SCRATCH, R(SRC1));
+ UNPCKHPS(SCRATCH, R(ONE)); // XYZW, 1111 -> Z1__
+ UNPCKLPD(SRC1, R(SCRATCH)); // XYZW, Z1__ -> XYZ1
+ }
- MOVAPS(SRC2, R(SRC1));
- SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
- ADDPS(SRC1, R(SRC2));
+ Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
- MOVAPS(SRC2, R(SRC1));
- SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
- ADDPS(SRC1, R(SRC2));
- }
+ MOVAPS(SRC2, R(SRC1));
+ SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
+ ADDPS(SRC1, R(SRC2));
+
+ MOVAPS(SRC2, R(SRC1));
+ SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
+ ADDPS(SRC1, R(SRC2));
+
+ Compile_DestEnable(instr, SRC1);
+}
+
+void JitCompiler::Compile_EX2(Instruction instr) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ MOVSS(XMM0, R(SRC1));
+ // The following will actually break the stack alignment
+ ABI_PushAllCallerSavedRegsAndAdjustStack();
+ Compile_PushCallerSavedXMM();
+ ABI_CallFunction(reinterpret_cast<const void*>(exp2f));
+ Compile_PopCallerSavedXMM();
+ ABI_PopAllCallerSavedRegsAndAdjustStack();
+
+ SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0));
+ MOVAPS(SRC1, R(XMM0));
+ Compile_DestEnable(instr, SRC1);
+}
+
+void JitCompiler::Compile_LG2(Instruction instr) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ MOVSS(XMM0, R(SRC1));
+
+ // The following will actually break the stack alignment
+ ABI_PushAllCallerSavedRegsAndAdjustStack();
+ Compile_PushCallerSavedXMM();
+ ABI_CallFunction(reinterpret_cast<const void*>(log2f));
+ Compile_PopCallerSavedXMM();
+ ABI_PopAllCallerSavedRegsAndAdjustStack();
+
+ SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0));
+ MOVAPS(SRC1, R(XMM0));
Compile_DestEnable(instr, SRC1);
}
void JitCompiler::Compile_MUL(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
- MULPS(SRC1, R(SRC2));
+ Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
+ Compile_DestEnable(instr, SRC1);
+}
+
+void JitCompiler::Compile_SGE(Instruction instr) {
+ if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
+ } else {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
+ }
+
+ CMPPS(SRC1, R(SRC2), CMP_NLT);
+ ANDPS(SRC1, R(ONE));
+
+ Compile_DestEnable(instr, SRC1);
+}
+
+void JitCompiler::Compile_SLT(Instruction instr) {
+ if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
+ } else {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
+ }
+
+ CMPPS(SRC1, R(SRC2), CMP_LT);
+ ANDPS(SRC1, R(ONE));
+
Compile_DestEnable(instr, SRC1);
}
@@ -354,6 +471,7 @@ void JitCompiler::Compile_FLR(Instruction instr) {
void JitCompiler::Compile_MAX(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
+ // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
MAXPS(SRC1, R(SRC2));
Compile_DestEnable(instr, SRC1);
}
@@ -361,6 +479,7 @@ void JitCompiler::Compile_MAX(Instruction instr) {
void JitCompiler::Compile_MIN(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
+ // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
MINPS(SRC1, R(SRC2));
Compile_DestEnable(instr, SRC1);
}
@@ -374,8 +493,8 @@ void JitCompiler::Compile_MOVA(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- // Convert floats to integers (only care about X and Y components)
- CVTPS2DQ(SRC1, R(SRC1));
+ // Convert floats to integers using truncation (only care about X and Y components)
+ CVTTPS2DQ(SRC1, R(SRC1));
// Get result
MOVQ_xmm(R(RAX), SRC1);
@@ -415,22 +534,13 @@ void JitCompiler::Compile_MOV(Instruction instr) {
Compile_DestEnable(instr, SRC1);
}
-void JitCompiler::Compile_SLTI(Instruction instr) {
- Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
- Compile_SwizzleSrc(instr, 1, instr.common.src2i, SRC2);
-
- CMPSS(SRC1, R(SRC2), CMP_LT);
- ANDPS(SRC1, R(ONE));
-
- Compile_DestEnable(instr, SRC1);
-}
-
void JitCompiler::Compile_RCP(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- // TODO(bunnei): RCPPS is a pretty rough approximation, this might cause problems if Pica
+ // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica
// performs this operation more accurately. This should be checked on hardware.
- RCPPS(SRC1, R(SRC1));
+ RCPSS(SRC1, R(SRC1));
+ SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX
Compile_DestEnable(instr, SRC1);
}
@@ -438,9 +548,10 @@ void JitCompiler::Compile_RCP(Instruction instr) {
void JitCompiler::Compile_RSQ(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- // TODO(bunnei): RSQRTPS is a pretty rough approximation, this might cause problems if Pica
+ // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica
// performs this operation more accurately. This should be checked on hardware.
- RSQRTPS(SRC1, R(SRC1));
+ RSQRTSS(SRC1, R(SRC1));
+ SHUFPS(SRC1, R(SRC1), _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX
Compile_DestEnable(instr, SRC1);
}
@@ -475,27 +586,42 @@ void JitCompiler::Compile_CALLU(Instruction instr) {
}
void JitCompiler::Compile_CMP(Instruction instr) {
+ using Op = Instruction::Common::CompareOpType::Op;
+ Op op_x = instr.common.compare_op.x;
+ Op op_y = instr.common.compare_op.y;
+
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
- static const u8 cmp[] = { CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_NLE, CMP_NLT };
+ // SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to
+ // emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here
+ // because they don't match when used with NaNs.
+ static const u8 cmp[] = { CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE };
+
+ bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual);
+ Gen::X64Reg lhs_x = invert_op_x ? SRC2 : SRC1;
+ Gen::X64Reg rhs_x = invert_op_x ? SRC1 : SRC2;
- if (instr.common.compare_op.x == instr.common.compare_op.y) {
+ if (op_x == op_y) {
// Compare X-component and Y-component together
- CMPPS(SRC1, R(SRC2), cmp[instr.common.compare_op.x]);
+ CMPPS(lhs_x, R(rhs_x), cmp[op_x]);
+ MOVQ_xmm(R(COND0), lhs_x);
- MOVQ_xmm(R(COND0), SRC1);
MOV(64, R(COND1), R(COND0));
} else {
+ bool invert_op_y = (op_y == Op::GreaterThan || op_y == Op::GreaterEqual);
+ Gen::X64Reg lhs_y = invert_op_y ? SRC2 : SRC1;
+ Gen::X64Reg rhs_y = invert_op_y ? SRC1 : SRC2;
+
// Compare X-component
- MOVAPS(SCRATCH, R(SRC1));
- CMPSS(SCRATCH, R(SRC2), cmp[instr.common.compare_op.x]);
+ MOVAPS(SCRATCH, R(lhs_x));
+ CMPSS(SCRATCH, R(rhs_x), cmp[op_x]);
// Compare Y-component
- CMPPS(SRC1, R(SRC2), cmp[instr.common.compare_op.y]);
+ CMPPS(lhs_y, R(rhs_y), cmp[op_y]);
MOVQ_xmm(R(COND0), SCRATCH);
- MOVQ_xmm(R(COND1), SRC1);
+ MOVQ_xmm(R(COND1), lhs_y);
}
SHR(32, R(COND0), Imm8(31));
@@ -513,12 +639,8 @@ void JitCompiler::Compile_MAD(Instruction instr) {
Compile_SwizzleSrc(instr, 3, instr.mad.src3, SRC3);
}
- if (Common::GetCPUCaps().fma) {
- VFMADD213PS(SRC1, SRC2, R(SRC3));
- } else {
- MULPS(SRC1, R(SRC2));
- ADDPS(SRC1, R(SRC3));
- }
+ Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
+ ADDPS(SRC1, R(SRC3));
Compile_DestEnable(instr, SRC1);
}
@@ -646,12 +768,12 @@ CompiledShader* JitCompiler::Compile() {
// Used to set a register to one
static const __m128 one = { 1.f, 1.f, 1.f, 1.f };
MOV(PTRBITS, R(RAX), ImmPtr(&one));
- MOVAPS(ONE, MDisp(RAX, 0));
+ MOVAPS(ONE, MatR(RAX));
// Used to negate registers
static const __m128 neg = { -0.f, -0.f, -0.f, -0.f };
MOV(PTRBITS, R(RAX), ImmPtr(&neg));
- MOVAPS(NEGBIT, MDisp(RAX, 0));
+ MOVAPS(NEGBIT, MatR(RAX));
looping = false;
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index b88f2a0d2..58828ecc8 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -37,7 +37,12 @@ public:
void Compile_ADD(Instruction instr);
void Compile_DP3(Instruction instr);
void Compile_DP4(Instruction instr);
+ void Compile_DPH(Instruction instr);
+ void Compile_EX2(Instruction instr);
+ void Compile_LG2(Instruction instr);
void Compile_MUL(Instruction instr);
+ void Compile_SGE(Instruction instr);
+ void Compile_SLT(Instruction instr);
void Compile_FLR(Instruction instr);
void Compile_MAX(Instruction instr);
void Compile_MIN(Instruction instr);
@@ -45,7 +50,6 @@ public:
void Compile_RSQ(Instruction instr);
void Compile_MOVA(Instruction instr);
void Compile_MOV(Instruction instr);
- void Compile_SLTI(Instruction instr);
void Compile_NOP(Instruction instr);
void Compile_END(Instruction instr);
void Compile_CALL(Instruction instr);
@@ -64,9 +68,18 @@ private:
void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest);
void Compile_DestEnable(Instruction instr, Gen::X64Reg dest);
+ /**
+ * Compiles a `MUL src1, src2` operation, properly handling the PICA semantics when multiplying
+ * zero by inf. Clobbers `src2` and `scratch`.
+ */
+ void Compile_SanitizedMul(Gen::X64Reg src1, Gen::X64Reg src2, Gen::X64Reg scratch);
+
void Compile_EvaluateCondition(Instruction instr);
void Compile_UniformCondition(Instruction instr);
+ void Compile_PushCallerSavedXMM();
+ void Compile_PopCallerSavedXMM();
+
/// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks.
unsigned* offset_ptr = nullptr;