diff options
Diffstat (limited to '')
-rw-r--r-- | CMakeLists.txt | 9 | ||||
-rw-r--r-- | externals/CMakeLists.txt | 4 | ||||
m--------- | externals/dynarmic | 0 | ||||
-rw-r--r-- | src/core/hle/kernel/hle_ipc.cpp | 5 | ||||
-rw-r--r-- | src/core/hle/kernel/hle_ipc.h | 7 | ||||
-rw-r--r-- | src/core/hle/service/service.cpp | 3 | ||||
-rw-r--r-- | src/tests/CMakeLists.txt | 5 | ||||
-rw-r--r-- | src/tests/core/hle/kernel/hle_ipc.cpp | 193 | ||||
-rw-r--r-- | src/video_core/regs_lighting.h | 4 | ||||
-rw-r--r-- | src/video_core/regs_texturing.h | 8 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 69 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.h | 2 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/pica_to_gl.h | 13 | ||||
-rw-r--r-- | src/video_core/shader/shader_jit_x64_compiler.cpp | 26 | ||||
-rw-r--r-- | src/video_core/swrasterizer/rasterizer.cpp | 20 | ||||
-rw-r--r-- | src/video_core/swrasterizer/texturing.cpp | 19 |
16 files changed, 335 insertions, 52 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 1f0af2d41..a61dee6e0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,10 +92,13 @@ else() # /W3 - Level 3 warnings # /MP - Multi-threaded compilation # /Zi - Output debugging information - # /Zo - enahnced debug info for optimized builds - set(CMAKE_C_FLAGS "/W3 /MP /Zi /Zo" CACHE STRING "" FORCE) + # /Zo - enhanced debug info for optimized builds + # /permissive- - enables stricter C++ standards conformance checks + set(CMAKE_C_FLAGS "/W3 /MP /Zi /Zo /permissive-" CACHE STRING "" FORCE) # /EHsc - C++-only exception handling semantics - set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} /EHsc" CACHE STRING "" FORCE) + # /Zc:throwingNew - let codegen assume `operator new` will never return null + # /Zc:inline - let codegen omit inline functions in object files + set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} /EHsc /Zc:throwingNew,inline" CACHE STRING "" FORCE) # /MDd - Multi-threaded Debug Runtime DLL set(CMAKE_C_FLAGS_DEBUG "/Od /MDd" CACHE STRING "" FORCE) diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 1e04931ee..02e02350c 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -46,7 +46,5 @@ if (ARCHITECTURE_x86_64) # Defined before "dynarmic" above # add_library(xbyak INTERFACE) target_include_directories(xbyak INTERFACE ./xbyak/xbyak) - if (NOT MSVC) - target_compile_options(xbyak INTERFACE -fno-operator-names) - endif() + target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES) endif() diff --git a/externals/dynarmic b/externals/dynarmic -Subproject 7707ff13e981b0aecf87f3156ee0b641469f7bb +Subproject 8f15e3f70cb96e56705e5de6ba97b5d09423a56 diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp index 6cf1886cf..1cac1d0c9 100644 --- a/src/core/hle/kernel/hle_ipc.cpp +++ b/src/core/hle/kernel/hle_ipc.cpp @@ -23,6 +23,11 @@ void SessionRequestHandler::ClientDisconnected(SharedPtr<ServerSession> server_s boost::range::remove_erase(connected_sessions, server_session); } +HLERequestContext::HLERequestContext(SharedPtr<ServerSession> session) + : session(std::move(session)) { + cmd_buf[0] = 0; +} + HLERequestContext::~HLERequestContext() = default; SharedPtr<Object> HLERequestContext::GetIncomingHandle(u32 id_from_cmdbuf) const { diff --git a/src/core/hle/kernel/hle_ipc.h b/src/core/hle/kernel/hle_ipc.h index cbb109d8f..35795fc1d 100644 --- a/src/core/hle/kernel/hle_ipc.h +++ b/src/core/hle/kernel/hle_ipc.h @@ -84,6 +84,7 @@ protected: */ class HLERequestContext { public: + HLERequestContext(SharedPtr<ServerSession> session); ~HLERequestContext(); /// Returns a pointer to the IPC command buffer for this request. @@ -118,14 +119,14 @@ public: */ void ClearIncomingObjects(); -private: - friend class Service::ServiceFrameworkBase; - + /// Populates this context with data from the requesting process/thread. ResultCode PopulateFromIncomingCommandBuffer(const u32_le* src_cmdbuf, Process& src_process, HandleTable& src_table); + /// Writes data from this context back to the requesting process/thread. ResultCode WriteToOutgoingCommandBuffer(u32_le* dst_cmdbuf, Process& dst_process, HandleTable& dst_table) const; +private: std::array<u32, IPC::COMMAND_BUFFER_LENGTH> cmd_buf; SharedPtr<ServerSession> session; // TODO(yuriks): Check common usage of this and optimize size accordingly diff --git a/src/core/hle/service/service.cpp b/src/core/hle/service/service.cpp index 791a65c19..6754cfeea 100644 --- a/src/core/hle/service/service.cpp +++ b/src/core/hle/service/service.cpp @@ -173,8 +173,7 @@ void ServiceFrameworkBase::HandleSyncRequest(SharedPtr<ServerSession> server_ses // TODO(yuriks): The kernel should be the one handling this as part of translation after // everything else is migrated - Kernel::HLERequestContext context; - context.session = std::move(server_session); + Kernel::HLERequestContext context(std::move(server_session)); context.PopulateFromIncomingCommandBuffer(cmd_buf, *Kernel::g_current_process, Kernel::g_handle_table); diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 00d7c636a..a14df325a 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -1,8 +1,9 @@ set(SRCS - glad.cpp - tests.cpp common/param_package.cpp core/file_sys/path_parser.cpp + core/hle/kernel/hle_ipc.cpp + glad.cpp + tests.cpp ) set(HEADERS diff --git a/src/tests/core/hle/kernel/hle_ipc.cpp b/src/tests/core/hle/kernel/hle_ipc.cpp new file mode 100644 index 000000000..e07a28c5b --- /dev/null +++ b/src/tests/core/hle/kernel/hle_ipc.cpp @@ -0,0 +1,193 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <catch.hpp> +#include "core/hle/ipc.h" +#include "core/hle/kernel/client_port.h" +#include "core/hle/kernel/client_session.h" +#include "core/hle/kernel/event.h" +#include "core/hle/kernel/handle_table.h" +#include "core/hle/kernel/hle_ipc.h" +#include "core/hle/kernel/process.h" +#include "core/hle/kernel/server_session.h" + +namespace Kernel { + +static SharedPtr<Object> MakeObject() { + return Event::Create(ResetType::OneShot); +} + +TEST_CASE("HLERequestContext::PopoulateFromIncomingCommandBuffer", "[core][kernel]") { + auto session = std::get<SharedPtr<ServerSession>>(ServerSession::CreateSessionPair()); + HLERequestContext context(std::move(session)); + + auto process = Process::Create(CodeSet::Create("", 0)); + HandleTable handle_table; + + SECTION("works with empty cmdbuf") { + const u32_le input[]{ + IPC::MakeHeader(0x1234, 0, 0), + }; + + context.PopulateFromIncomingCommandBuffer(input, *process, handle_table); + + REQUIRE(context.CommandBuffer()[0] == 0x12340000); + } + + SECTION("translates regular params") { + const u32_le input[]{ + IPC::MakeHeader(0, 3, 0), 0x12345678, 0x21122112, 0xAABBCCDD, + }; + + context.PopulateFromIncomingCommandBuffer(input, *process, handle_table); + + auto* output = context.CommandBuffer(); + REQUIRE(output[1] == 0x12345678); + REQUIRE(output[2] == 0x21122112); + REQUIRE(output[3] == 0xAABBCCDD); + } + + SECTION("translates move handles") { + auto a = MakeObject(); + Handle a_handle = handle_table.Create(a).Unwrap(); + const u32_le input[]{ + IPC::MakeHeader(0, 0, 2), IPC::MoveHandleDesc(1), a_handle, + }; + + context.PopulateFromIncomingCommandBuffer(input, *process, handle_table); + + auto* output = context.CommandBuffer(); + REQUIRE(context.GetIncomingHandle(output[2]) == a); + REQUIRE(handle_table.GetGeneric(a_handle) == nullptr); + } + + SECTION("translates copy handles") { + auto a = MakeObject(); + Handle a_handle = handle_table.Create(a).Unwrap(); + const u32_le input[]{ + IPC::MakeHeader(0, 0, 2), IPC::CopyHandleDesc(1), a_handle, + }; + + context.PopulateFromIncomingCommandBuffer(input, *process, handle_table); + + auto* output = context.CommandBuffer(); + REQUIRE(context.GetIncomingHandle(output[2]) == a); + REQUIRE(handle_table.GetGeneric(a_handle) == a); + } + + SECTION("translates multi-handle descriptors") { + auto a = MakeObject(); + auto b = MakeObject(); + auto c = MakeObject(); + const u32_le input[]{ + IPC::MakeHeader(0, 0, 5), IPC::MoveHandleDesc(2), + handle_table.Create(a).Unwrap(), handle_table.Create(b).Unwrap(), + IPC::MoveHandleDesc(1), handle_table.Create(c).Unwrap(), + }; + + context.PopulateFromIncomingCommandBuffer(input, *process, handle_table); + + auto* output = context.CommandBuffer(); + REQUIRE(context.GetIncomingHandle(output[2]) == a); + REQUIRE(context.GetIncomingHandle(output[3]) == b); + REQUIRE(context.GetIncomingHandle(output[5]) == c); + } + + SECTION("translates CallingPid descriptors") { + const u32_le input[]{ + IPC::MakeHeader(0, 0, 2), IPC::CallingPidDesc(), 0x98989898, + }; + + context.PopulateFromIncomingCommandBuffer(input, *process, handle_table); + + REQUIRE(context.CommandBuffer()[2] == process->process_id); + } + + SECTION("translates mixed params") { + auto a = MakeObject(); + const u32_le input[]{ + IPC::MakeHeader(0, 2, 4), + 0x12345678, + 0xABCDEF00, + IPC::MoveHandleDesc(1), + handle_table.Create(a).Unwrap(), + IPC::CallingPidDesc(), + 0, + }; + + context.PopulateFromIncomingCommandBuffer(input, *process, handle_table); + + auto* output = context.CommandBuffer(); + REQUIRE(output[1] == 0x12345678); + REQUIRE(output[2] == 0xABCDEF00); + REQUIRE(context.GetIncomingHandle(output[4]) == a); + REQUIRE(output[6] == process->process_id); + } +} + +TEST_CASE("HLERequestContext::WriteToOutgoingCommandBuffer", "[core][kernel]") { + auto session = std::get<SharedPtr<ServerSession>>(ServerSession::CreateSessionPair()); + HLERequestContext context(std::move(session)); + + auto process = Process::Create(CodeSet::Create("", 0)); + HandleTable handle_table; + auto* input = context.CommandBuffer(); + u32_le output[IPC::COMMAND_BUFFER_LENGTH]; + + SECTION("works with empty cmdbuf") { + input[0] = IPC::MakeHeader(0x1234, 0, 0); + + context.WriteToOutgoingCommandBuffer(output, *process, handle_table); + + REQUIRE(output[0] == 0x12340000); + } + + SECTION("translates regular params") { + input[0] = IPC::MakeHeader(0, 3, 0); + input[1] = 0x12345678; + input[2] = 0x21122112; + input[3] = 0xAABBCCDD; + + context.WriteToOutgoingCommandBuffer(output, *process, handle_table); + + REQUIRE(output[1] == 0x12345678); + REQUIRE(output[2] == 0x21122112); + REQUIRE(output[3] == 0xAABBCCDD); + } + + SECTION("translates move/copy handles") { + auto a = MakeObject(); + auto b = MakeObject(); + input[0] = IPC::MakeHeader(0, 0, 4); + input[1] = IPC::MoveHandleDesc(1); + input[2] = context.AddOutgoingHandle(a); + input[3] = IPC::CopyHandleDesc(1); + input[4] = context.AddOutgoingHandle(b); + + context.WriteToOutgoingCommandBuffer(output, *process, handle_table); + + REQUIRE(handle_table.GetGeneric(output[2]) == a); + REQUIRE(handle_table.GetGeneric(output[4]) == b); + } + + SECTION("translates multi-handle descriptors") { + auto a = MakeObject(); + auto b = MakeObject(); + auto c = MakeObject(); + input[0] = IPC::MakeHeader(0, 0, 5); + input[1] = IPC::MoveHandleDesc(2); + input[2] = context.AddOutgoingHandle(a); + input[3] = context.AddOutgoingHandle(b); + input[4] = IPC::CopyHandleDesc(1); + input[5] = context.AddOutgoingHandle(c); + + context.WriteToOutgoingCommandBuffer(output, *process, handle_table); + + REQUIRE(handle_table.GetGeneric(output[2]) == a); + REQUIRE(handle_table.GetGeneric(output[3]) == b); + REQUIRE(handle_table.GetGeneric(output[5]) == c); + } +} + +} // namespace Kernel diff --git a/src/video_core/regs_lighting.h b/src/video_core/regs_lighting.h index fbfebc0a7..7221d1688 100644 --- a/src/video_core/regs_lighting.h +++ b/src/video_core/regs_lighting.h @@ -84,7 +84,7 @@ struct LightingRegs { NV = 2, // Cosine of the angle between the normal and the view vector LN = 3, // Cosine of the angle between the light and the normal vectors SP = 4, // Cosine of the angle between the light and the inverse spotlight vectors - CP = 5, // TODO: document and implement + CP = 5, // Cosine of the angle between the tangent and projection of half-angle vectors }; enum class LightingBumpMode : u32 { @@ -168,6 +168,8 @@ struct LightingRegs { union { BitField<0, 1, u32> directional; BitField<1, 1, u32> two_sided_diffuse; // When disabled, clamp dot-product to 0 + BitField<2, 1, u32> geometric_factor_0; + BitField<3, 1, u32> geometric_factor_1; } config; BitField<0, 20, u32> dist_atten_bias; diff --git a/src/video_core/regs_texturing.h b/src/video_core/regs_texturing.h index 3f5355fa9..0b09f2299 100644 --- a/src/video_core/regs_texturing.h +++ b/src/video_core/regs_texturing.h @@ -30,10 +30,10 @@ struct TexturingRegs { Repeat = 2, MirroredRepeat = 3, // Mode 4-7 produces some weird result and may be just invalid: - // 4: Positive coord: clamp to edge; negative coord: repeat - // 5: Positive coord: clamp to border; negative coord: repeat - // 6: Repeat - // 7: Repeat + ClampToEdge2 = 4, // Positive coord: clamp to edge; negative coord: repeat + ClampToBorder2 = 5, // Positive coord: clamp to border; negative coord: repeat + Repeat2 = 6, // Same as Repeat + Repeat3 = 7, // Same as Repeat }; enum TextureFilter : u32 { diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index db53710aa..540cbb9d0 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -73,6 +73,8 @@ PicaShaderConfig PicaShaderConfig::BuildFromRegs(const Pica::Regs& regs) { state.lighting.light[light_index].num = num; state.lighting.light[light_index].directional = light.config.directional != 0; state.lighting.light[light_index].two_sided_diffuse = light.config.two_sided_diffuse != 0; + state.lighting.light[light_index].geometric_factor_0 = light.config.geometric_factor_0 != 0; + state.lighting.light[light_index].geometric_factor_1 = light.config.geometric_factor_1 != 0; state.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num); state.lighting.light[light_index].spot_atten_enable = @@ -518,14 +520,16 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" "vec3 light_vector = vec3(0.0);\n" "vec3 refl_value = vec3(0.0);\n" - "vec3 spot_dir = vec3(0.0);\n;"; + "vec3 spot_dir = vec3(0.0);\n" + "vec3 half_vector = vec3(0.0);\n" + "float geo_factor = 1.0;\n"; - // Compute fragment normals + // Compute fragment normals and tangents + const std::string pertubation = + "2.0 * (" + SampleTexture(config, lighting.bump_selector) + ").rgb - 1.0"; if (lighting.bump_mode == LightingRegs::LightingBumpMode::NormalMap) { - // Bump mapping is enabled using a normal map, read perturbation vector from the selected - // texture - out += "vec3 surface_normal = 2.0 * (" + SampleTexture(config, lighting.bump_selector) + - ").rgb - 1.0;\n"; + // Bump mapping is enabled using a normal map + out += "vec3 surface_normal = " + pertubation + ";\n"; // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher // precision result @@ -534,31 +538,41 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n"; } + + // The tangent vector is not perturbed by the normal map and is just a unit vector. + out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; } else if (lighting.bump_mode == LightingRegs::LightingBumpMode::TangentMap) { // Bump mapping is enabled using a tangent map - LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)"); - UNIMPLEMENTED(); + out += "vec3 surface_tangent = " + pertubation + ";\n"; + // Mathematically, recomputing Z-component of the tangent vector won't affect the relevant + // computation below, which is also confirmed on 3DS. So we don't bother recomputing here + // even if 'renorm' is enabled. + + // The normal vector is not perturbed by the tangent map and is just a unit vector. + out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n"; } else { - // No bump mapping - surface local normal is just a unit normal + // No bump mapping - surface local normal and tangent are just unit vectors out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n"; + out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; } // Rotate the surface-local normal by the interpolated normal quaternion to convert it to // eyespace. - out += "vec3 normal = quaternion_rotate(normalize(normquat), surface_normal);\n"; + out += "vec4 normalized_normquat = normalize(normquat);\n"; + out += "vec3 normal = quaternion_rotate(normalized_normquat, surface_normal);\n"; + out += "vec3 tangent = quaternion_rotate(normalized_normquat, surface_tangent);\n"; // Gets the index into the specified lookup table for specular lighting auto GetLutIndex = [&lighting](unsigned light_num, LightingRegs::LightingLutInput input, bool abs) { - const std::string half_angle = "normalize(normalize(view) + light_vector)"; std::string index; switch (input) { case LightingRegs::LightingLutInput::NH: - index = "dot(normal, " + half_angle + ")"; + index = "dot(normal, normalize(half_vector))"; break; case LightingRegs::LightingLutInput::VH: - index = std::string("dot(normalize(view), " + half_angle + ")"); + index = std::string("dot(normalize(view), normalize(half_vector))"); break; case LightingRegs::LightingLutInput::NV: @@ -573,6 +587,22 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { index = std::string("dot(light_vector, spot_dir)"); break; + case LightingRegs::LightingLutInput::CP: + // CP input is only available with configuration 7 + if (lighting.config == LightingRegs::LightingConfig::Config7) { + // Note: even if the normal vector is modified by normal map, which is not the + // normal of the tangent plane anymore, the half angle vector is still projected + // using the modified normal vector. + std::string half_angle_proj = "normalize(half_vector) - normal / dot(normal, " + "normal) * dot(normal, normalize(half_vector))"; + // Note: the half angle vector projection is confirmed not normalized before the dot + // product. The result is in fact not cos(phi) as the name suggested. + index = "dot(" + half_angle_proj + ", tangent)"; + } else { + index = "0.0"; + } + break; + default: LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %d\n", (int)input); UNIMPLEMENTED(); @@ -610,6 +640,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { out += "light_vector = normalize(" + light_src + ".position + view);\n"; out += "spot_dir = " + light_src + ".spot_direction;\n"; + out += "half_vector = normalize(view) + light_vector;\n"; // Compute dot product of light_vector and normal, adjust if lighting is one-sided or // two-sided @@ -643,6 +674,12 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { std::string clamp_highlights = lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; + if (light_config.geometric_factor_0 || light_config.geometric_factor_1) { + out += "geo_factor = dot(half_vector, half_vector);\n" + "geo_factor = geo_factor == 0.0 ? 0.0 : min(" + + dot_product + " / geo_factor, 1.0);\n"; + } + // Specular 0 component std::string d0_lut_value = "1.0"; if (lighting.lut_d0.enable && @@ -655,6 +692,9 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { GetLutValue(LightingRegs::LightingSampler::Distribution0, index) + ")"; } std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)"; + if (light_config.geometric_factor_0) { + specular_0 = "(" + specular_0 + " * geo_factor)"; + } // If enabled, lookup ReflectRed value, otherwise, 1.0 is used if (lighting.lut_rr.enable && @@ -710,6 +750,9 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { } std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)"; + if (light_config.geometric_factor_1) { + specular_1 = "(" + specular_1 + " * geo_factor)"; + } // Fresnel if (lighting.lut_fr.enable && diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 9c90eadf9..2302ae453 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -94,6 +94,8 @@ union PicaShaderConfig { bool two_sided_diffuse; bool dist_atten_enable; bool spot_atten_enable; + bool geometric_factor_0; + bool geometric_factor_1; } light[8]; bool enable; diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index 93d7b0b71..70298e211 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -55,6 +55,12 @@ inline GLenum WrapMode(Pica::TexturingRegs::TextureConfig::WrapMode mode) { GL_CLAMP_TO_BORDER, // WrapMode::ClampToBorder GL_REPEAT, // WrapMode::Repeat GL_MIRRORED_REPEAT, // WrapMode::MirroredRepeat + // TODO(wwylele): ClampToEdge2 and ClampToBorder2 are not properly implemented here. See the + // comments in enum WrapMode. + GL_CLAMP_TO_EDGE, // WrapMode::ClampToEdge2 + GL_CLAMP_TO_BORDER, // WrapMode::ClampToBorder2 + GL_REPEAT, // WrapMode::Repeat2 + GL_REPEAT, // WrapMode::Repeat3 }; // Range check table for input @@ -65,6 +71,13 @@ inline GLenum WrapMode(Pica::TexturingRegs::TextureConfig::WrapMode mode) { return GL_CLAMP_TO_EDGE; } + if (static_cast<u32>(mode) > 3) { + // It is still unclear whether mode 4-7 are valid, so log it if a game uses them. + // TODO(wwylele): telemetry should be added here so we can collect more info about which + // game uses this. + LOG_WARNING(Render_OpenGL, "Using texture wrap mode %u", static_cast<u32>(mode)); + } + GLenum gl_mode = wrap_mode_table[mode]; // Check for dummy values indicating an unknown mode diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp index 5d9b6448c..42a57aab1 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.cpp +++ b/src/video_core/shader/shader_jit_x64_compiler.cpp @@ -321,27 +321,27 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) { case Instruction::FlowControlType::Or: mov(eax, COND0); mov(ebx, COND1); - xor(eax, (instr.flow_control.refx.Value() ^ 1)); - xor(ebx, (instr.flow_control.refy.Value() ^ 1)); - or (eax, ebx); + xor_(eax, (instr.flow_control.refx.Value() ^ 1)); + xor_(ebx, (instr.flow_control.refy.Value() ^ 1)); + or_(eax, ebx); break; case Instruction::FlowControlType::And: mov(eax, COND0); mov(ebx, COND1); - xor(eax, (instr.flow_control.refx.Value() ^ 1)); - xor(ebx, (instr.flow_control.refy.Value() ^ 1)); - and(eax, ebx); + xor_(eax, (instr.flow_control.refx.Value() ^ 1)); + xor_(ebx, (instr.flow_control.refy.Value() ^ 1)); + and_(eax, ebx); break; case Instruction::FlowControlType::JustX: mov(eax, COND0); - xor(eax, (instr.flow_control.refx.Value() ^ 1)); + xor_(eax, (instr.flow_control.refx.Value() ^ 1)); break; case Instruction::FlowControlType::JustY: mov(eax, COND1); - xor(eax, (instr.flow_control.refy.Value() ^ 1)); + xor_(eax, (instr.flow_control.refy.Value() ^ 1)); break; } } @@ -734,10 +734,10 @@ void JitShader::Compile_LOOP(Instruction instr) { mov(LOOPCOUNT, dword[SETUP + offset]); mov(LOOPCOUNT_REG, LOOPCOUNT); shr(LOOPCOUNT_REG, 4); - and(LOOPCOUNT_REG, 0xFF0); // Y-component is the start + and_(LOOPCOUNT_REG, 0xFF0); // Y-component is the start mov(LOOPINC, LOOPCOUNT); shr(LOOPINC, 12); - and(LOOPINC, 0xFF0); // Z-component is the incrementer + and_(LOOPINC, 0xFF0); // Z-component is the incrementer movzx(LOOPCOUNT, LOOPCOUNT.cvt8()); // X-component is iteration count add(LOOPCOUNT, 1); // Iteration count is X-component + 1 @@ -858,9 +858,9 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_ mov(STATE, ABI_PARAM2); // Zero address/loop registers - xor(ADDROFFS_REG_0.cvt32(), ADDROFFS_REG_0.cvt32()); - xor(ADDROFFS_REG_1.cvt32(), ADDROFFS_REG_1.cvt32()); - xor(LOOPCOUNT_REG, LOOPCOUNT_REG); + xor_(ADDROFFS_REG_0.cvt32(), ADDROFFS_REG_0.cvt32()); + xor_(ADDROFFS_REG_1.cvt32(), ADDROFFS_REG_1.cvt32()); + xor_(LOOPCOUNT_REG, LOOPCOUNT_REG); // Used to set a register to one static const __m128 one = {1.f, 1.f, 1.f, 1.f}; diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index 8b7b1defb..cd7b6c39d 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -357,10 +357,22 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height))) .ToFloat32(); - if ((texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder && - (s < 0 || static_cast<u32>(s) >= texture.config.width)) || - (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder && - (t < 0 || static_cast<u32>(t) >= texture.config.height))) { + bool use_border_s = false; + bool use_border_t = false; + + if (texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder) { + use_border_s = s < 0 || s >= static_cast<int>(texture.config.width); + } else if (texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder2) { + use_border_s = s >= static_cast<int>(texture.config.width); + } + + if (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder) { + use_border_t = t < 0 || t >= static_cast<int>(texture.config.height); + } else if (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder2) { + use_border_t = t >= static_cast<int>(texture.config.height); + } + + if (use_border_s || use_border_t) { auto border_color = texture.config.border_color; texture_color[i] = {border_color.r, border_color.g, border_color.b, border_color.a}; diff --git a/src/video_core/swrasterizer/texturing.cpp b/src/video_core/swrasterizer/texturing.cpp index aeb6aeb8c..4f02b93f2 100644 --- a/src/video_core/swrasterizer/texturing.cpp +++ b/src/video_core/swrasterizer/texturing.cpp @@ -18,22 +18,33 @@ using TevStageConfig = TexturingRegs::TevStageConfig; int GetWrappedTexCoord(TexturingRegs::TextureConfig::WrapMode mode, int val, unsigned size) { switch (mode) { + case TexturingRegs::TextureConfig::ClampToEdge2: + // For negative coordinate, ClampToEdge2 behaves the same as Repeat + if (val < 0) { + return static_cast<int>(static_cast<unsigned>(val) % size); + } + // [[fallthrough]] case TexturingRegs::TextureConfig::ClampToEdge: val = std::max(val, 0); - val = std::min(val, (int)size - 1); + val = std::min(val, static_cast<int>(size) - 1); return val; case TexturingRegs::TextureConfig::ClampToBorder: return val; + case TexturingRegs::TextureConfig::ClampToBorder2: + // For ClampToBorder2, the case of positive coordinate beyond the texture size is already + // handled outside. Here we only handle the negative coordinate in the same way as Repeat. + case TexturingRegs::TextureConfig::Repeat2: + case TexturingRegs::TextureConfig::Repeat3: case TexturingRegs::TextureConfig::Repeat: - return (int)((unsigned)val % size); + return static_cast<int>(static_cast<unsigned>(val) % size); case TexturingRegs::TextureConfig::MirroredRepeat: { - unsigned int coord = ((unsigned)val % (2 * size)); + unsigned int coord = (static_cast<unsigned>(val) % (2 * size)); if (coord >= size) coord = 2 * size - 1 - coord; - return (int)coord; + return static_cast<int>(coord); } default: |