diff options
-rw-r--r-- | CMakeLists.txt | 11 | ||||
-rw-r--r-- | externals/cmake-modules/FindSDL2.cmake | 239 | ||||
-rw-r--r-- | src/core/hle/kernel/address_arbiter.cpp | 53 | ||||
-rw-r--r-- | src/core/hle/kernel/address_arbiter.h | 2 | ||||
-rw-r--r-- | src/core/hle/kernel/thread.cpp | 4 | ||||
-rw-r--r-- | src/core/hle/service/audio/hwopus.cpp | 6 | ||||
-rw-r--r-- | src/core/hle/service/bcat/backend/backend.cpp | 4 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 91 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.h | 23 | ||||
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 4 | ||||
-rw-r--r-- | src/video_core/gpu.cpp | 16 | ||||
-rw-r--r-- | src/video_core/gpu.h | 2 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 1 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 1 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 1 | ||||
-rw-r--r-- | src/video_core/shader/decode/arithmetic_integer.cpp | 4 | ||||
-rw-r--r-- | src/video_core/shader/decode/bfi.cpp | 2 |
17 files changed, 127 insertions, 337 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index dc782e252..44ed4196d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -151,15 +151,16 @@ if (ENABLE_SDL2) set(SDL2_INCLUDE_DIR "${SDL2_PREFIX}/include" CACHE PATH "Path to SDL2 headers") set(SDL2_LIBRARY "${SDL2_PREFIX}/lib/x64/SDL2.lib" CACHE PATH "Path to SDL2 library") set(SDL2_DLL_DIR "${SDL2_PREFIX}/lib/x64/" CACHE PATH "Path to SDL2.dll") - else() - find_package(SDL2 REQUIRED) - endif() - if (SDL2_FOUND) - # TODO(yuriks): Make FindSDL2.cmake export an IMPORTED library instead add_library(SDL2 INTERFACE) target_link_libraries(SDL2 INTERFACE "${SDL2_LIBRARY}") target_include_directories(SDL2 INTERFACE "${SDL2_INCLUDE_DIR}") + else() + find_package(SDL2 REQUIRED) + include_directories(${SDL2_INCLUDE_DIRS}) + + add_library(SDL2 INTERFACE) + target_link_libraries(SDL2 INTERFACE "${SDL2_LIBRARIES}") endif() else() set(SDL2_FOUND NO) diff --git a/externals/cmake-modules/FindSDL2.cmake b/externals/cmake-modules/FindSDL2.cmake deleted file mode 100644 index 22ce752c5..000000000 --- a/externals/cmake-modules/FindSDL2.cmake +++ /dev/null @@ -1,239 +0,0 @@ - -# This module defines -# SDL2_LIBRARY, the name of the library to link against -# SDL2_FOUND, if false, do not try to link to SDL2 -# SDL2_INCLUDE_DIR, where to find SDL.h -# SDL2_DLL_DIR, where to find SDL2.dll if it exists -# -# This module responds to the the flag: -# SDL2_BUILDING_LIBRARY -# If this is defined, then no SDL2main will be linked in because -# only applications need main(). -# Otherwise, it is assumed you are building an application and this -# module will attempt to locate and set the the proper link flags -# as part of the returned SDL2_LIBRARY variable. -# -# Don't forget to include SDLmain.h and SDLmain.m your project for the -# OS X framework based version. (Other versions link to -lSDL2main which -# this module will try to find on your behalf.) Also for OS X, this -# module will automatically add the -framework Cocoa on your behalf. -# -# -# Additional Note: If you see an empty SDL2_LIBRARY_TEMP in your configuration -# and no SDL2_LIBRARY, it means CMake did not find your SDL2 library -# (SDL2.dll, libsdl2.so, SDL2.framework, etc). -# Set SDL2_LIBRARY_TEMP to point to your SDL2 library, and configure again. -# Similarly, if you see an empty SDL2MAIN_LIBRARY, you should set this value -# as appropriate. These values are used to generate the final SDL2_LIBRARY -# variable, but when these values are unset, SDL2_LIBRARY does not get created. -# -# -# $SDL2DIR is an environment variable that would -# correspond to the ./configure --prefix=$SDL2DIR -# used in building SDL2. -# l.e.galup 9-20-02 -# -# Modified by Eric Wing. -# Added code to assist with automated building by using environmental variables -# and providing a more controlled/consistent search behavior. -# Added new modifications to recognize OS X frameworks and -# additional Unix paths (FreeBSD, etc). -# Also corrected the header search path to follow "proper" SDL guidelines. -# Added a search for SDL2main which is needed by some platforms. -# Added a search for threads which is needed by some platforms. -# Added needed compile switches for MinGW. -# -# On OSX, this will prefer the Framework version (if found) over others. -# People will have to manually change the cache values of -# SDL2_LIBRARY to override this selection or set the CMake environment -# CMAKE_INCLUDE_PATH to modify the search paths. -# -# Note that the header path has changed from SDL2/SDL.h to just SDL.h -# This needed to change because "proper" SDL convention -# is #include "SDL.h", not <SDL2/SDL.h>. This is done for portability -# reasons because not all systems place things in SDL2/ (see FreeBSD). - -#============================================================================= -# Copyright 2003-2009 Kitware, Inc. -# -# Distributed under the OSI-approved BSD License (the "License"). -# -# This software is distributed WITHOUT ANY WARRANTY; without even the -# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the License for more information. -#============================================================================= -# CMake - Cross Platform Makefile Generator -# Copyright 2000-2016 Kitware, Inc. -# Copyright 2000-2011 Insight Software Consortium -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# * Neither the names of Kitware, Inc., the Insight Software Consortium, -# nor the names of their contributors may be used to endorse or promote -# products derived from this software without specific prior written -# permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# ------------------------------------------------------------------------------ -# -# The above copyright and license notice applies to distributions of -# CMake in source and binary form. Some source files contain additional -# notices of original copyright by their contributors; see each source -# for details. Third-party software packages supplied with CMake under -# compatible licenses provide their own copyright notices documented in -# corresponding subdirectories. -# -# ------------------------------------------------------------------------------ -# -# CMake was initially developed by Kitware with the following sponsorship: -# -# * National Library of Medicine at the National Institutes of Health -# as part of the Insight Segmentation and Registration Toolkit (ITK). -# -# * US National Labs (Los Alamos, Livermore, Sandia) ASC Parallel -# Visualization Initiative. -# -# * National Alliance for Medical Image Computing (NAMIC) is funded by the -# National Institutes of Health through the NIH Roadmap for Medical Research, -# Grant U54 EB005149. -# -# * Kitware, Inc. -# - -message("<FindSDL2.cmake>") - -SET(SDL2_SEARCH_PATHS - ~/Library/Frameworks - /Library/Frameworks - /usr/local - /usr - /sw # Fink - /opt/local # DarwinPorts - /opt/csw # Blastwave - /opt - ${SDL2_PATH} -) - -if(CMAKE_SIZEOF_VOID_P EQUAL 8) - set(VC_LIB_PATH_SUFFIX lib/x64) -else() - set(VC_LIB_PATH_SUFFIX lib/x86) -endif() - -FIND_LIBRARY(SDL2_LIBRARY_TEMP - NAMES SDL2 - HINTS - $ENV{SDL2DIR} - PATH_SUFFIXES lib64 lib ${VC_LIB_PATH_SUFFIX} - PATHS ${SDL2_SEARCH_PATHS} -) - -IF(SDL2_LIBRARY_TEMP) - if(MSVC) - get_filename_component(SDL2_DLL_DIR_TEMP ${SDL2_LIBRARY_TEMP} DIRECTORY) - if(EXISTS ${SDL2_DLL_DIR_TEMP}/SDL2.dll) - set(SDL2_DLL_DIR ${SDL2_DLL_DIR_TEMP}) - unset(SDL2_DLL_DIR_TEMP) - endif() - endif() - - FIND_PATH(SDL2_INCLUDE_DIR SDL.h - HINTS - $ENV{SDL2DIR} - PATH_SUFFIXES include/SDL2 include - PATHS ${SDL2_SEARCH_PATHS} - ) - - IF(NOT SDL2_BUILDING_LIBRARY) - IF(NOT ${SDL2_INCLUDE_DIR} MATCHES ".framework") - # Non-OS X framework versions expect you to also dynamically link to - # SDL2main. This is mainly for Windows and OS X. Other (Unix) platforms - # seem to provide SDL2main for compatibility even though they don't - # necessarily need it. - FIND_LIBRARY(SDL2MAIN_LIBRARY - NAMES SDL2main - HINTS - $ENV{SDL2DIR} - PATH_SUFFIXES lib64 lib - PATHS ${SDL2_SEARCH_PATHS} - ) - ENDIF(NOT ${SDL2_INCLUDE_DIR} MATCHES ".framework") - ENDIF(NOT SDL2_BUILDING_LIBRARY) - - # SDL2 may require threads on your system. - # The Apple build may not need an explicit flag because one of the - # frameworks may already provide it. - # But for non-OSX systems, I will use the CMake Threads package. - IF(NOT APPLE) - FIND_PACKAGE(Threads) - ENDIF(NOT APPLE) - - # MinGW needs an additional library, mwindows - # It's total link flags should look like -lmingw32 -lSDL2main -lSDL2 -lmwindows - # (Actually on second look, I think it only needs one of the m* libraries.) - IF(MINGW) - SET(MINGW32_LIBRARY mingw32 CACHE STRING "mwindows for MinGW") - ENDIF(MINGW) - - # For SDL2main - IF(NOT SDL2_BUILDING_LIBRARY) - IF(SDL2MAIN_LIBRARY) - SET(SDL2_LIBRARY_TEMP ${SDL2MAIN_LIBRARY} ${SDL2_LIBRARY_TEMP}) - ENDIF(SDL2MAIN_LIBRARY) - ENDIF(NOT SDL2_BUILDING_LIBRARY) - - # For OS X, SDL2 uses Cocoa as a backend so it must link to Cocoa. - # CMake doesn't display the -framework Cocoa string in the UI even - # though it actually is there if I modify a pre-used variable. - # I think it has something to do with the CACHE STRING. - # So I use a temporary variable until the end so I can set the - # "real" variable in one-shot. - IF(APPLE) - SET(SDL2_LIBRARY_TEMP ${SDL2_LIBRARY_TEMP} "-framework Cocoa") - ENDIF(APPLE) - - # For threads, as mentioned Apple doesn't need this. - # In fact, there seems to be a problem if I used the Threads package - # and try using this line, so I'm just skipping it entirely for OS X. - IF(NOT APPLE) - SET(SDL2_LIBRARY_TEMP ${SDL2_LIBRARY_TEMP} ${CMAKE_THREAD_LIBS_INIT}) - ENDIF(NOT APPLE) - - # For MinGW library - IF(MINGW) - SET(SDL2_LIBRARY_TEMP ${MINGW32_LIBRARY} ${SDL2_LIBRARY_TEMP}) - ENDIF(MINGW) - - # Set the final string here so the GUI reflects the final state. - SET(SDL2_LIBRARY ${SDL2_LIBRARY_TEMP} CACHE STRING "Where the SDL2 Library can be found") - - # Unset the temp variable to INTERNAL so it is not seen in the CMake GUI - UNSET(SDL2_LIBRARY_TEMP) -ENDIF(SDL2_LIBRARY_TEMP) - -message("</FindSDL2.cmake>") - -INCLUDE(FindPackageHandleStandardArgs) - -FIND_PACKAGE_HANDLE_STANDARD_ARGS(SDL2 REQUIRED_VARS SDL2_LIBRARY SDL2_INCLUDE_DIR) diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp index 2ea3dcb61..8475b698c 100644 --- a/src/core/hle/kernel/address_arbiter.cpp +++ b/src/core/hle/kernel/address_arbiter.cpp @@ -201,42 +201,39 @@ void AddressArbiter::HandleWakeupThread(std::shared_ptr<Thread> thread) { void AddressArbiter::InsertThread(std::shared_ptr<Thread> thread) { const VAddr arb_addr = thread->GetArbiterWaitAddress(); std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr]; - auto it = thread_list.begin(); - while (it != thread_list.end()) { - const std::shared_ptr<Thread>& current_thread = *it; - if (current_thread->GetPriority() >= thread->GetPriority()) { - thread_list.insert(it, thread); - return; - } - ++it; + + const auto iter = + std::find_if(thread_list.cbegin(), thread_list.cend(), [&thread](const auto& entry) { + return entry->GetPriority() >= thread->GetPriority(); + }); + + if (iter == thread_list.cend()) { + thread_list.push_back(std::move(thread)); + } else { + thread_list.insert(iter, std::move(thread)); } - thread_list.push_back(std::move(thread)); } void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) { const VAddr arb_addr = thread->GetArbiterWaitAddress(); std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr]; - auto it = thread_list.begin(); - while (it != thread_list.end()) { - const std::shared_ptr<Thread>& current_thread = *it; - if (current_thread.get() == thread.get()) { - thread_list.erase(it); - return; - } - ++it; - } - UNREACHABLE(); + + const auto iter = std::find_if(thread_list.cbegin(), thread_list.cend(), + [&thread](const auto& entry) { return thread == entry; }); + + ASSERT(iter != thread_list.cend()); + + thread_list.erase(iter); } -std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) { - std::vector<std::shared_ptr<Thread>> result; - std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[address]; - auto it = thread_list.begin(); - while (it != thread_list.end()) { - std::shared_ptr<Thread> current_thread = *it; - result.push_back(std::move(current_thread)); - ++it; +std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress( + VAddr address) const { + const auto iter = arb_threads.find(address); + if (iter == arb_threads.cend()) { + return {}; } - return result; + + const std::list<std::shared_ptr<Thread>>& thread_list = iter->second; + return {thread_list.cbegin(), thread_list.cend()}; } } // namespace Kernel diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h index 386983e54..f958eee5a 100644 --- a/src/core/hle/kernel/address_arbiter.h +++ b/src/core/hle/kernel/address_arbiter.h @@ -86,7 +86,7 @@ private: void RemoveThread(std::shared_ptr<Thread> thread); // Gets the threads waiting on an address. - std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address); + std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const; /// List of threads waiting for a address arbiter std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> arb_threads; diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 43b30dd3d..ae5f2c8bd 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -481,7 +481,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) { if (GetSchedulingStatus() != ThreadSchedStatus::Runnable) { return; } - auto& scheduler = Core::System::GetInstance().GlobalScheduler(); + auto& scheduler = kernel.GlobalScheduler(); if (processor_id >= 0) { scheduler.Unschedule(old_priority, static_cast<u32>(processor_id), this); } @@ -513,7 +513,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) { } void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) { - auto& scheduler = Core::System::GetInstance().GlobalScheduler(); + auto& scheduler = kernel.GlobalScheduler(); if (GetSchedulingStatus() != ThreadSchedStatus::Runnable || current_priority >= THREADPRIO_COUNT) { return; diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp index cb839e4a2..d19513cbb 100644 --- a/src/core/hle/service/audio/hwopus.cpp +++ b/src/core/hle/service/audio/hwopus.cpp @@ -170,8 +170,10 @@ public: {3, nullptr, "SetContextForMultiStream"}, {4, &IHardwareOpusDecoderManager::DecodeInterleavedWithPerfOld, "DecodeInterleavedWithPerfOld"}, {5, nullptr, "DecodeInterleavedForMultiStreamWithPerfOld"}, - {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"}, - {7, nullptr, "DecodeInterleavedForMultiStream"}, + {6, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleavedWithPerfAndResetOld"}, + {7, nullptr, "DecodeInterleavedForMultiStreamWithPerfAndResetOld"}, + {8, &IHardwareOpusDecoderManager::DecodeInterleaved, "DecodeInterleaved"}, + {9, nullptr, "DecodeInterleavedForMultiStream"}, }; // clang-format on diff --git a/src/core/hle/service/bcat/backend/backend.cpp b/src/core/hle/service/bcat/backend/backend.cpp index 6f5ea095a..def3410cc 100644 --- a/src/core/hle/service/bcat/backend/backend.cpp +++ b/src/core/hle/service/bcat/backend/backend.cpp @@ -117,13 +117,13 @@ bool NullBackend::SynchronizeDirectory(TitleIDVersion title, std::string name, } bool NullBackend::Clear(u64 title_id) { - LOG_DEBUG(Service_BCAT, "called, title_id={:016X}"); + LOG_DEBUG(Service_BCAT, "called, title_id={:016X}", title_id); return true; } void NullBackend::SetPassphrase(u64 title_id, const Passphrase& passphrase) { - LOG_DEBUG(Service_BCAT, "called, title_id={:016X}, passphrase = {}", title_id, + LOG_DEBUG(Service_BCAT, "called, title_id={:016X}, passphrase={}", title_id, Common::HexToString(passphrase)); } diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 7cea146f0..0b3e8749b 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -9,6 +9,7 @@ #include "core/core_timing.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" +#include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/textures/texture.h" @@ -519,61 +520,63 @@ void Maxwell3D::ProcessFirmwareCall4() { regs.reg_array[0xd00] = 1; } -void Maxwell3D::ProcessQueryGet() { +void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { + struct LongQueryResult { + u64_le value; + u64_le timestamp; + }; + static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size"); const GPUVAddr sequence_address{regs.query.QueryAddress()}; - // Since the sequence address is given as a GPU VAddr, we have to convert it to an application - // VAddr before writing. + if (long_query) { + // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast + // GPU, this command may actually take a while to complete in real hardware due to GPU + // wait queues. + LongQueryResult query_result{payload, system.GPU().GetTicks()}; + memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); + } else { + memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload)); + } +} +void Maxwell3D::ProcessQueryGet() { // TODO(Subv): Support the other query units. ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, "Units other than CROP are unimplemented"); - u64 result = 0; - - // TODO(Subv): Support the other query variables - switch (regs.query.query_get.select) { - case Regs::QuerySelect::Zero: - // This seems to actually write the query sequence to the query address. - result = regs.query.query_sequence; + switch (regs.query.query_get.operation) { + case Regs::QueryOperation::Release: { + const u64 result = regs.query.query_sequence; + StampQueryResult(result, regs.query.query_get.short_query == 0); break; - default: - result = 1; - UNIMPLEMENTED_MSG("Unimplemented query select type {}", - static_cast<u32>(regs.query.query_get.select.Value())); } - - // TODO(Subv): Research and implement how query sync conditions work. - - struct LongQueryResult { - u64_le value; - u64_le timestamp; - }; - static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size"); - - switch (regs.query.query_get.mode) { - case Regs::QueryMode::Write: - case Regs::QueryMode::Write2: { - u32 sequence = regs.query.query_sequence; - if (regs.query.query_get.short_query) { - // Write the current query sequence to the sequence address. - // TODO(Subv): Find out what happens if you use a long query type but mark it as a short - // query. - memory_manager.Write<u32>(sequence_address, sequence); - } else { - // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast - // GPU, this command may actually take a while to complete in real hardware due to GPU - // wait queues. - LongQueryResult query_result{}; - query_result.value = result; - // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming - query_result.timestamp = system.CoreTiming().GetTicks(); - memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); + case Regs::QueryOperation::Acquire: { + // Todo(Blinkhawk): Under this operation, the GPU waits for the CPU + // to write a value that matches the current payload. + UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); + break; + } + case Regs::QueryOperation::Counter: { + u64 result{}; + switch (regs.query.query_get.select) { + case Regs::QuerySelect::Zero: + result = 0; + break; + default: + result = 1; + UNIMPLEMENTED_MSG("Unimplemented query select type {}", + static_cast<u32>(regs.query.query_get.select.Value())); } + StampQueryResult(result, regs.query.query_get.short_query == 0); + break; + } + case Regs::QueryOperation::Trap: { + UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); + break; + } + default: { + UNIMPLEMENTED_MSG("Unknown query operation"); break; } - default: - UNIMPLEMENTED_MSG("Query mode {} not implemented", - static_cast<u32>(regs.query.query_get.mode.Value())); } } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index e437bacb7..0a2af54e5 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -71,12 +71,11 @@ public: static constexpr std::size_t MaxConstBuffers = 18; static constexpr std::size_t MaxConstBufferSize = 0x10000; - enum class QueryMode : u32 { - Write = 0, - Sync = 1, - // TODO(Subv): It is currently unknown what the difference between method 2 and method 0 - // is. - Write2 = 2, + enum class QueryOperation : u32 { + Release = 0, + Acquire = 1, + Counter = 2, + Trap = 3, }; enum class QueryUnit : u32 { @@ -862,7 +861,11 @@ public: float point_size; - INSERT_UNION_PADDING_WORDS(0x7); + INSERT_UNION_PADDING_WORDS(0x1); + + u32 point_sprite_enable; + + INSERT_UNION_PADDING_WORDS(0x5); u32 zeta_enable; @@ -1077,7 +1080,7 @@ public: u32 query_sequence; union { u32 raw; - BitField<0, 2, QueryMode> mode; + BitField<0, 2, QueryOperation> operation; BitField<4, 1, u32> fence; BitField<12, 4, QueryUnit> unit; BitField<16, 1, QuerySyncCondition> sync_cond; @@ -1409,6 +1412,9 @@ private: /// Handles a write to the QUERY_GET register. void ProcessQueryGet(); + // Writes the query result accordingly + void StampQueryResult(u64 payload, bool long_query); + // Handles Conditional Rendering void ProcessQueryCondition(); @@ -1494,6 +1500,7 @@ ASSERT_REG_POSITION(vb_element_base, 0x50D); ASSERT_REG_POSITION(vb_base_instance, 0x50E); ASSERT_REG_POSITION(clip_distance_enabled, 0x544); ASSERT_REG_POSITION(point_size, 0x546); +ASSERT_REG_POSITION(point_sprite_enable, 0x548); ASSERT_REG_POSITION(zeta_enable, 0x54E); ASSERT_REG_POSITION(multisample_control, 0x54F); ASSERT_REG_POSITION(condition, 0x554); diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 402869fde..c9bc83cd7 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1677,11 +1677,11 @@ union Instruction { } xmad; union { - BitField<20, 14, u64> offset; + BitField<20, 14, u64> shifted_offset; BitField<34, 5, u64> index; u64 GetOffset() const { - return offset * 4; + return shifted_offset * 4; } } cbuf34; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 062ca83b8..4419ab735 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -6,6 +6,7 @@ #include "common/microprofile.h" #include "core/core.h" #include "core/core_timing.h" +#include "core/core_timing_util.h" #include "core/memory.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/kepler_compute.h" @@ -122,6 +123,19 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { return true; } +u64 GPU::GetTicks() const { + // This values were reversed engineered by fincs from NVN + // The gpu clock is reported in units of 385/625 nanoseconds + constexpr u64 gpu_ticks_num = 384; + constexpr u64 gpu_ticks_den = 625; + + const u64 cpu_ticks = system.CoreTiming().GetTicks(); + const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count(); + const u64 nanoseconds_num = nanoseconds / gpu_ticks_den; + const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den; + return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den; +} + void GPU::FlushCommands() { renderer.Rasterizer().FlushCommands(); } @@ -340,7 +354,7 @@ void GPU::ProcessSemaphoreTriggerMethod() { block.sequence = regs.semaphore_sequence; // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of // CoreTiming - block.timestamp = system.CoreTiming().GetTicks(); + block.timestamp = GetTicks(); memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block)); } else { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b648317bb..07727210c 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -192,6 +192,8 @@ public: bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value); + u64 GetTicks() const; + std::unique_lock<std::mutex> LockSync() { return std::unique_lock{sync_mutex}; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 46a7433ea..b0eb14c8b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -1220,6 +1220,7 @@ void RasterizerOpenGL::SyncPointState() { // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid // in OpenGL). state.point.program_control = regs.vp_point_size.enable != 0; + state.point.sprite = regs.point_sprite_enable != 0; state.point.size = std::max(1.0f, regs.point_size); } diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index cc185e9e1..ab1f7983c 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -128,6 +128,7 @@ void OpenGLState::ApplyClipDistances() { void OpenGLState::ApplyPointSize() { Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control); + Enable(GL_POINT_SPRITE, cur_state.point.sprite, point.sprite); if (UpdateValue(cur_state.point.size, point.size)) { glPointSize(point.size); } diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 678e5cd89..4953eeda2 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -132,6 +132,7 @@ public: struct { bool program_control = false; // GL_PROGRAM_POINT_SIZE + bool sprite = false; // GL_POINT_SPRITE GLfloat size = 1.0f; // GL_POINT_SIZE } point; diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp index e60875cc4..21366869d 100644 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ b/src/video_core/shader/decode/arithmetic_integer.cpp @@ -166,13 +166,13 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> { switch (opcode->get().GetId()) { case OpCode::Id::ICMP_CR: - return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), + return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), GetRegister(instr.gpr39)}; case OpCode::Id::ICMP_R: return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; case OpCode::Id::ICMP_RC: return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; + GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; case OpCode::Id::ICMP_IMM: return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; default: diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp index f992bbe2a..70d1c055b 100644 --- a/src/video_core/shader/decode/bfi.cpp +++ b/src/video_core/shader/decode/bfi.cpp @@ -21,7 +21,7 @@ u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) { switch (opcode->get().GetId()) { case OpCode::Id::BFI_RC: return {GetRegister(instr.gpr39), - GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; + GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; case OpCode::Id::BFI_IMM_R: return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; default: |