diff options
Diffstat (limited to '')
45 files changed, 2153 insertions, 354 deletions
@@ -9,6 +9,8 @@ Citra is an experimental open-source Nintendo 3DS emulator/debugger written in C Citra is licensed under the GPLv2 (or any later version). Refer to the license.txt file included. Please read the [FAQ](https://github.com/citra-emu/citra/wiki/FAQ) before getting started with the project. +Check out our [website](https://citra-emu.org/)! + For development discussion, please join us @ #citra on freenode. ### Development diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cb09f3cd1..2bb411492 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -4,6 +4,7 @@ include_directories(.) add_subdirectory(common) add_subdirectory(core) add_subdirectory(video_core) +add_subdirectory(audio_core) if (ENABLE_GLFW) add_subdirectory(citra) endif() diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt new file mode 100644 index 000000000..b0d1c7eb6 --- /dev/null +++ b/src/audio_core/CMakeLists.txt @@ -0,0 +1,16 @@ +set(SRCS + audio_core.cpp + hle/dsp.cpp + hle/pipe.cpp + ) + +set(HEADERS + audio_core.h + hle/dsp.h + hle/pipe.h + sink.h + ) + +create_directory_groups(${SRCS} ${HEADERS}) + +add_library(audio_core STATIC ${SRCS} ${HEADERS})
\ No newline at end of file diff --git a/src/audio_core/audio_core.cpp b/src/audio_core/audio_core.cpp new file mode 100644 index 000000000..894f46990 --- /dev/null +++ b/src/audio_core/audio_core.cpp @@ -0,0 +1,53 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "audio_core/audio_core.h" +#include "audio_core/hle/dsp.h" + +#include "core/core_timing.h" +#include "core/hle/kernel/vm_manager.h" +#include "core/hle/service/dsp_dsp.h" + +namespace AudioCore { + +// Audio Ticks occur about every 5 miliseconds. +static int tick_event; ///< CoreTiming event +static constexpr u64 audio_frame_ticks = 1310252ull; ///< Units: ARM11 cycles + +static void AudioTickCallback(u64 /*userdata*/, int cycles_late) { + if (DSP::HLE::Tick()) { + // HACK: We're not signaling the interrups when they should be, but just firing them all off together. + // It should be only (interrupt_id = 2, channel_id = 2) that's signalled here. + // TODO(merry): Understand when the other interrupts are fired. + DSP_DSP::SignalAllInterrupts(); + } + + // Reschedule recurrent event + CoreTiming::ScheduleEvent(audio_frame_ticks - cycles_late, tick_event); +} + +/// Initialise Audio +void Init() { + DSP::HLE::Init(); + + tick_event = CoreTiming::RegisterEvent("AudioCore::tick_event", AudioTickCallback); + CoreTiming::ScheduleEvent(audio_frame_ticks, tick_event); +} + +/// Add DSP address spaces to Process's address space. +void AddAddressSpace(Kernel::VMManager& address_space) { + auto r0_vma = address_space.MapBackingMemory(DSP::HLE::region0_base, reinterpret_cast<u8*>(&DSP::HLE::g_region0), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom(); + address_space.Reprotect(r0_vma, Kernel::VMAPermission::ReadWrite); + + auto r1_vma = address_space.MapBackingMemory(DSP::HLE::region1_base, reinterpret_cast<u8*>(&DSP::HLE::g_region1), sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO).MoveFrom(); + address_space.Reprotect(r1_vma, Kernel::VMAPermission::ReadWrite); +} + +/// Shutdown Audio +void Shutdown() { + CoreTiming::UnscheduleEvent(tick_event, 0); + DSP::HLE::Shutdown(); +} + +} //namespace diff --git a/src/audio_core/audio_core.h b/src/audio_core/audio_core.h new file mode 100644 index 000000000..64c330914 --- /dev/null +++ b/src/audio_core/audio_core.h @@ -0,0 +1,26 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +namespace Kernel { +class VMManager; +} + +namespace AudioCore { + +constexpr int num_sources = 24; +constexpr int samples_per_frame = 160; ///< Samples per audio frame at native sample rate +constexpr int native_sample_rate = 32728; ///< 32kHz + +/// Initialise Audio Core +void Init(); + +/// Add DSP address spaces to a Process. +void AddAddressSpace(Kernel::VMManager& vm_manager); + +/// Shutdown Audio Core +void Shutdown(); + +} // namespace diff --git a/src/audio_core/hle/dsp.cpp b/src/audio_core/hle/dsp.cpp new file mode 100644 index 000000000..c89356edc --- /dev/null +++ b/src/audio_core/hle/dsp.cpp @@ -0,0 +1,42 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "audio_core/hle/dsp.h" +#include "audio_core/hle/pipe.h" + +namespace DSP { +namespace HLE { + +SharedMemory g_region0; +SharedMemory g_region1; + +void Init() { + DSP::HLE::ResetPipes(); +} + +void Shutdown() { +} + +bool Tick() { + return true; +} + +SharedMemory& CurrentRegion() { + // The region with the higher frame counter is chosen unless there is wraparound. + + if (g_region0.frame_counter == 0xFFFFu && g_region1.frame_counter != 0xFFFEu) { + // Wraparound has occured. + return g_region1; + } + + if (g_region1.frame_counter == 0xFFFFu && g_region0.frame_counter != 0xFFFEu) { + // Wraparound has occured. + return g_region0; + } + + return (g_region0.frame_counter > g_region1.frame_counter) ? g_region0 : g_region1; +} + +} // namespace HLE +} // namespace DSP diff --git a/src/audio_core/hle/dsp.h b/src/audio_core/hle/dsp.h new file mode 100644 index 000000000..14c4000c6 --- /dev/null +++ b/src/audio_core/hle/dsp.h @@ -0,0 +1,502 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstddef> +#include <type_traits> + +#include "audio_core/audio_core.h" + +#include "common/bit_field.h" +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "common/swap.h" + +namespace DSP { +namespace HLE { + +// The application-accessible region of DSP memory consists of two parts. +// Both are marked as IO and have Read/Write permissions. +// +// First Region: 0x1FF50000 (Size: 0x8000) +// Second Region: 0x1FF70000 (Size: 0x8000) +// +// The DSP reads from each region alternately based on the frame counter for each region much like a +// double-buffer. The frame counter is located as the very last u16 of each region and is incremented +// each audio tick. + +struct SharedMemory; + +constexpr VAddr region0_base = 0x1FF50000; +extern SharedMemory g_region0; + +constexpr VAddr region1_base = 0x1FF70000; +extern SharedMemory g_region1; + +/** + * The DSP is native 16-bit. The DSP also appears to be big-endian. When reading 32-bit numbers from + * its memory regions, the higher and lower 16-bit halves are swapped compared to the little-endian + * layout of the ARM11. Hence from the ARM11's point of view the memory space appears to be + * middle-endian. + * + * Unusually this does not appear to be an issue for floating point numbers. The DSP makes the more + * sensible choice of keeping that little-endian. There are also some exceptions such as the + * IntermediateMixSamples structure, which is little-endian. + * + * This struct implements the conversion to and from this middle-endianness. + */ +struct u32_dsp { + u32_dsp() = default; + operator u32() const { + return Convert(storage); + } + void operator=(u32 new_value) { + storage = Convert(new_value); + } +private: + static constexpr u32 Convert(u32 value) { + return (value << 16) | (value >> 16); + } + u32_le storage; +}; +#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER) +static_assert(std::is_trivially_copyable<u32_dsp>::value, "u32_dsp isn't trivially copyable"); +#endif + +// There are 15 structures in each memory region. A table of them in the order they appear in memory +// is presented below +// +// Pipe 2 # First Region DSP Address Purpose Control +// 5 0x8400 DSP Status DSP +// 9 0x8410 DSP Debug Info DSP +// 6 0x8540 Final Mix Samples DSP +// 2 0x8680 Source Status [24] DSP +// 8 0x8710 Compressor Table Application +// 4 0x9430 DSP Configuration Application +// 7 0x9492 Intermediate Mix Samples DSP + App +// 1 0x9E92 Source Configuration [24] Application +// 3 0xA792 Source ADPCM Coefficients [24] Application +// 10 0xA912 Surround Sound Related +// 11 0xAA12 Surround Sound Related +// 12 0xAAD2 Surround Sound Related +// 13 0xAC52 Surround Sound Related +// 14 0xAC5C Surround Sound Related +// 0 0xBFFF Frame Counter Application +// +// Note that the above addresses do vary slightly between audio firmwares observed; the addresses are +// not fixed in stone. The addresses above are only an examplar; they're what this implementation +// does and provides to applications. +// +// Application requests the DSP service to convert DSP addresses into ARM11 virtual addresses using the +// ConvertProcessAddressFromDspDram service call. Applications seem to derive the addresses for the +// second region via: +// second_region_dsp_addr = first_region_dsp_addr | 0x10000 +// +// Applications maintain most of its own audio state, the memory region is used mainly for +// communication and not storage of state. +// +// In the documentation below, filter and effect transfer functions are specified in the z domain. +// (If you are more familiar with the Laplace transform, z = exp(sT). The z domain is the digital +// frequency domain, just like how the s domain is the analog frequency domain.) + +#define INSERT_PADDING_DSPWORDS(num_words) INSERT_PADDING_BYTES(2 * (num_words)) + +// GCC versions < 5.0 do not implement std::is_trivially_copyable. +// Excluding MSVC because it has weird behaviour for std::is_trivially_copyable. +#if (__GNUC__ >= 5) || defined(__clang__) + #define ASSERT_DSP_STRUCT(name, size) \ + static_assert(std::is_standard_layout<name>::value, "DSP structure " #name " doesn't use standard layout"); \ + static_assert(std::is_trivially_copyable<name>::value, "DSP structure " #name " isn't trivially copyable"); \ + static_assert(sizeof(name) == (size), "Unexpected struct size for DSP structure " #name) +#else + #define ASSERT_DSP_STRUCT(name, size) \ + static_assert(std::is_standard_layout<name>::value, "DSP structure " #name " doesn't use standard layout"); \ + static_assert(sizeof(name) == (size), "Unexpected struct size for DSP structure " #name) +#endif + +struct SourceConfiguration { + struct Configuration { + /// These dirty flags are set by the application when it updates the fields in this struct. + /// The DSP clears these each audio frame. + union { + u32_le dirty_raw; + + BitField<2, 1, u32_le> adpcm_coefficients_dirty; + BitField<3, 1, u32_le> partial_embedded_buffer_dirty; ///< Tends to be set when a looped buffer is queued. + + BitField<16, 1, u32_le> enable_dirty; + BitField<17, 1, u32_le> interpolation_dirty; + BitField<18, 1, u32_le> rate_multiplier_dirty; + BitField<19, 1, u32_le> buffer_queue_dirty; + BitField<20, 1, u32_le> loop_related_dirty; + BitField<21, 1, u32_le> play_position_dirty; ///< Tends to also be set when embedded buffer is updated. + BitField<22, 1, u32_le> filters_enabled_dirty; + BitField<23, 1, u32_le> simple_filter_dirty; + BitField<24, 1, u32_le> biquad_filter_dirty; + BitField<25, 1, u32_le> gain_0_dirty; + BitField<26, 1, u32_le> gain_1_dirty; + BitField<27, 1, u32_le> gain_2_dirty; + BitField<28, 1, u32_le> sync_dirty; + BitField<29, 1, u32_le> reset_flag; + + BitField<31, 1, u32_le> embedded_buffer_dirty; + }; + + // Gain control + + /** + * Gain is between 0.0-1.0. This determines how much will this source appear on + * each of the 12 channels that feed into the intermediate mixers. + * Each of the three intermediate mixers is fed two left and two right channels. + */ + float_le gain[3][4]; + + // Interpolation + + /// Multiplier for sample rate. Resampling occurs with the selected interpolation method. + float_le rate_multiplier; + + enum class InterpolationMode : u8 { + None = 0, + Linear = 1, + Polyphase = 2 + }; + + InterpolationMode interpolation_mode; + INSERT_PADDING_BYTES(1); ///< Interpolation related + + // Filters + + /** + * This is the simplest normalized first-order digital recursive filter. + * The transfer function of this filter is: + * H(z) = b0 / (1 + a1 z^-1) + * Values are signed fixed point with 15 fractional bits. + */ + struct SimpleFilter { + s16_le b0; + s16_le a1; + }; + + /** + * This is a normalised biquad filter (second-order). + * The transfer function of this filter is: + * H(z) = (b0 + b1 z^-1 + b2 z^-2) / (1 - a1 z^-1 - a2 z^-2) + * Nintendo chose to negate the feedbackward coefficients. This differs from standard notation + * as in: https://ccrma.stanford.edu/~jos/filters/Direct_Form_I.html + * Values are signed fixed point with 14 fractional bits. + */ + struct BiquadFilter { + s16_le b0; + s16_le b1; + s16_le b2; + s16_le a1; + s16_le a2; + }; + + union { + u16_le filters_enabled; + BitField<0, 1, u16_le> simple_filter_enabled; + BitField<1, 1, u16_le> biquad_filter_enabled; + }; + + SimpleFilter simple_filter; + BiquadFilter biquad_filter; + + // Buffer Queue + + /// A buffer of audio data from the application, along with metadata about it. + struct Buffer { + /// Physical memory address of the start of the buffer + u32_dsp physical_address; + + /// This is length in terms of samples. + /// Note that in different buffer formats a sample takes up different number of bytes. + u32_dsp length; + + /// ADPCM Predictor (4 bits) and Scale (4 bits) + union { + u16_le adpcm_ps; + BitField<0, 4, u16_le> adpcm_scale; + BitField<4, 4, u16_le> adpcm_predictor; + }; + + /// ADPCM Historical Samples (y[n-1] and y[n-2]) + u16_le adpcm_yn[2]; + + /// This is non-zero when the ADPCM values above are to be updated. + u8 adpcm_dirty; + + /// Is a looping buffer. + u8 is_looping; + + /// This value is shown in SourceStatus::previous_buffer_id when this buffer has finished. + /// This allows the emulated application to tell what buffer is currently playing + u16_le buffer_id; + + INSERT_PADDING_DSPWORDS(1); + }; + + u16_le buffers_dirty; ///< Bitmap indicating which buffers are dirty (bit i -> buffers[i]) + Buffer buffers[4]; ///< Queued Buffers + + // Playback controls + + u32_dsp loop_related; + u8 enable; + INSERT_PADDING_BYTES(1); + u16_le sync; ///< Application-side sync (See also: SourceStatus::sync) + u32_dsp play_position; ///< Position. (Units: number of samples) + INSERT_PADDING_DSPWORDS(2); + + // Embedded Buffer + // This buffer is often the first buffer to be used when initiating audio playback, + // after which the buffer queue is used. + + u32_dsp physical_address; + + /// This is length in terms of samples. + /// Note a sample takes up different number of bytes in different buffer formats. + u32_dsp length; + + enum class MonoOrStereo : u16_le { + Mono = 1, + Stereo = 2 + }; + + enum class Format : u16_le { + PCM8 = 0, + PCM16 = 1, + ADPCM = 2 + }; + + union { + u16_le flags1_raw; + BitField<0, 2, MonoOrStereo> mono_or_stereo; + BitField<2, 2, Format> format; + BitField<5, 1, u16_le> fade_in; + }; + + /// ADPCM Predictor (4 bit) and Scale (4 bit) + union { + u16_le adpcm_ps; + BitField<0, 4, u16_le> adpcm_scale; + BitField<4, 4, u16_le> adpcm_predictor; + }; + + /// ADPCM Historical Samples (y[n-1] and y[n-2]) + u16_le adpcm_yn[2]; + + union { + u16_le flags2_raw; + BitField<0, 1, u16_le> adpcm_dirty; ///< Has the ADPCM info above been changed? + BitField<1, 1, u16_le> is_looping; ///< Is this a looping buffer? + }; + + /// Buffer id of embedded buffer (used as a buffer id in SourceStatus to reference this buffer). + u16_le buffer_id; + }; + + Configuration config[AudioCore::num_sources]; +}; +ASSERT_DSP_STRUCT(SourceConfiguration::Configuration, 192); +ASSERT_DSP_STRUCT(SourceConfiguration::Configuration::Buffer, 20); + +struct SourceStatus { + struct Status { + u8 is_enabled; ///< Is this channel enabled? (Doesn't have to be playing anything.) + u8 previous_buffer_id_dirty; ///< Non-zero when previous_buffer_id changes + u16_le sync; ///< Is set by the DSP to the value of SourceConfiguration::sync + u32_dsp buffer_position; ///< Number of samples into the current buffer + u16_le previous_buffer_id; ///< Updated when a buffer finishes playing + INSERT_PADDING_DSPWORDS(1); + }; + + Status status[AudioCore::num_sources]; +}; +ASSERT_DSP_STRUCT(SourceStatus::Status, 12); + +struct DspConfiguration { + /// These dirty flags are set by the application when it updates the fields in this struct. + /// The DSP clears these each audio frame. + union { + u32_le dirty_raw; + + BitField<8, 1, u32_le> mixer1_enabled_dirty; + BitField<9, 1, u32_le> mixer2_enabled_dirty; + BitField<10, 1, u32_le> delay_effect_0_dirty; + BitField<11, 1, u32_le> delay_effect_1_dirty; + BitField<12, 1, u32_le> reverb_effect_0_dirty; + BitField<13, 1, u32_le> reverb_effect_1_dirty; + + BitField<16, 1, u32_le> volume_0_dirty; + + BitField<24, 1, u32_le> volume_1_dirty; + BitField<25, 1, u32_le> volume_2_dirty; + BitField<26, 1, u32_le> output_format_dirty; + BitField<27, 1, u32_le> limiter_enabled_dirty; + BitField<28, 1, u32_le> headphones_connected_dirty; + }; + + /// The DSP has three intermediate audio mixers. This controls the volume level (0.0-1.0) for each at the final mixer + float_le volume[3]; + + INSERT_PADDING_DSPWORDS(3); + + enum class OutputFormat : u16_le { + Mono = 0, + Stereo = 1, + Surround = 2 + }; + + OutputFormat output_format; + + u16_le limiter_enabled; ///< Not sure of the exact gain equation for the limiter. + u16_le headphones_connected; ///< Application updates the DSP on headphone status. + INSERT_PADDING_DSPWORDS(4); ///< TODO: Surround sound related + INSERT_PADDING_DSPWORDS(2); ///< TODO: Intermediate mixer 1/2 related + u16_le mixer1_enabled; + u16_le mixer2_enabled; + + /** + * This is delay with feedback. + * Transfer function: + * H(z) = a z^-N / (1 - b z^-1 + a g z^-N) + * where + * N = frame_count * samples_per_frame + * g, a and b are fixed point with 7 fractional bits + */ + struct DelayEffect { + /// These dirty flags are set by the application when it updates the fields in this struct. + /// The DSP clears these each audio frame. + union { + u16_le dirty_raw; + BitField<0, 1, u16_le> enable_dirty; + BitField<1, 1, u16_le> work_buffer_address_dirty; + BitField<2, 1, u16_le> other_dirty; ///< Set when anything else has been changed + }; + + u16_le enable; + INSERT_PADDING_DSPWORDS(1); + u16_le outputs; + u32_dsp work_buffer_address; ///< The application allocates a block of memory for the DSP to use as a work buffer. + u16_le frame_count; ///< Frames to delay by + + // Coefficients + s16_le g; ///< Fixed point with 7 fractional bits + s16_le a; ///< Fixed point with 7 fractional bits + s16_le b; ///< Fixed point with 7 fractional bits + }; + + DelayEffect delay_effect[2]; + + struct ReverbEffect { + INSERT_PADDING_DSPWORDS(26); ///< TODO + }; + + ReverbEffect reverb_effect[2]; + + INSERT_PADDING_DSPWORDS(4); +}; +ASSERT_DSP_STRUCT(DspConfiguration, 196); +ASSERT_DSP_STRUCT(DspConfiguration::DelayEffect, 20); +ASSERT_DSP_STRUCT(DspConfiguration::ReverbEffect, 52); + +struct AdpcmCoefficients { + /// Coefficients are signed fixed point with 11 fractional bits. + /// Each source has 16 coefficients associated with it. + s16_le coeff[AudioCore::num_sources][16]; +}; +ASSERT_DSP_STRUCT(AdpcmCoefficients, 768); + +struct DspStatus { + u16_le unknown; + u16_le dropped_frames; + INSERT_PADDING_DSPWORDS(0xE); +}; +ASSERT_DSP_STRUCT(DspStatus, 32); + +/// Final mixed output in PCM16 stereo format, what you hear out of the speakers. +/// When the application writes to this region it has no effect. +struct FinalMixSamples { + s16_le pcm16[2 * AudioCore::samples_per_frame]; +}; +ASSERT_DSP_STRUCT(FinalMixSamples, 640); + +/// DSP writes output of intermediate mixers 1 and 2 here. +/// Writes to this region by the application edits the output of the intermediate mixers. +/// This seems to be intended to allow the application to do custom effects on the ARM11. +/// Values that exceed s16 range will be clipped by the DSP after further processing. +struct IntermediateMixSamples { + struct Samples { + s32_le pcm32[4][AudioCore::samples_per_frame]; ///< Little-endian as opposed to DSP middle-endian. + }; + + Samples mix1; + Samples mix2; +}; +ASSERT_DSP_STRUCT(IntermediateMixSamples, 5120); + +/// Compressor table +struct Compressor { + INSERT_PADDING_DSPWORDS(0xD20); ///< TODO +}; + +/// There is no easy way to implement this in a HLE implementation. +struct DspDebug { + INSERT_PADDING_DSPWORDS(0x130); +}; +ASSERT_DSP_STRUCT(DspDebug, 0x260); + +struct SharedMemory { + /// Padding + INSERT_PADDING_DSPWORDS(0x400); + + DspStatus dsp_status; + + DspDebug dsp_debug; + + FinalMixSamples final_samples; + + SourceStatus source_statuses; + + Compressor compressor; + + DspConfiguration dsp_configuration; + + IntermediateMixSamples intermediate_mix_samples; + + SourceConfiguration source_configurations; + + AdpcmCoefficients adpcm_coefficients; + + /// Unknown 10-14 (Surround sound related) + INSERT_PADDING_DSPWORDS(0x16ED); + + u16_le frame_counter; +}; +ASSERT_DSP_STRUCT(SharedMemory, 0x8000); + +#undef INSERT_PADDING_DSPWORDS +#undef ASSERT_DSP_STRUCT + +/// Initialize DSP hardware +void Init(); + +/// Shutdown DSP hardware +void Shutdown(); + +/** + * Perform processing and updates state of current shared memory buffer. + * This function is called every audio tick before triggering the audio interrupt. + * @return Whether an audio interrupt should be triggered this frame. + */ +bool Tick(); + +/// Returns a mutable reference to the current region. Current region is selected based on the frame counter. +SharedMemory& CurrentRegion(); + +} // namespace HLE +} // namespace DSP diff --git a/src/audio_core/hle/pipe.cpp b/src/audio_core/hle/pipe.cpp new file mode 100644 index 000000000..6542c760c --- /dev/null +++ b/src/audio_core/hle/pipe.cpp @@ -0,0 +1,55 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <array> +#include <vector> + +#include "audio_core/hle/pipe.h" + +#include "common/common_types.h" +#include "common/logging/log.h" + +namespace DSP { +namespace HLE { + +static size_t pipe2position = 0; + +void ResetPipes() { + pipe2position = 0; +} + +std::vector<u8> PipeRead(u32 pipe_number, u32 length) { + if (pipe_number != 2) { + LOG_WARNING(Audio_DSP, "pipe_number = %u (!= 2), unimplemented", pipe_number); + return {}; // We currently don't handle anything other than the audio pipe. + } + + // Canned DSP responses that games expect. These were taken from HW by 3dmoo team. + // TODO: Our implementation will actually use a slightly different response than this one. + // TODO: Use offsetof on DSP structures instead for a proper response. + static const std::array<u8, 32> canned_response {{ + 0x0F, 0x00, 0xFF, 0xBF, 0x8E, 0x9E, 0x80, 0x86, 0x8E, 0xA7, 0x30, 0x94, 0x00, 0x84, 0x40, 0x85, + 0x8E, 0x94, 0x10, 0x87, 0x10, 0x84, 0x0E, 0xA9, 0x0E, 0xAA, 0xCE, 0xAA, 0x4E, 0xAC, 0x58, 0xAC + }}; + + // TODO: Move this into dsp::DSP service since it happens on the service side. + // Hardware observation: No data is returned if requested length reads beyond the end of the data in-pipe. + if (pipe2position + length > canned_response.size()) { + return {}; + } + + std::vector<u8> ret; + for (size_t i = 0; i < length; i++, pipe2position++) { + ret.emplace_back(canned_response[pipe2position]); + } + + return ret; +} + +void PipeWrite(u32 pipe_number, const std::vector<u8>& buffer) { + // TODO: proper pipe behaviour +} + +} // namespace HLE +} // namespace DSP diff --git a/src/audio_core/hle/pipe.h b/src/audio_core/hle/pipe.h new file mode 100644 index 000000000..ff6536950 --- /dev/null +++ b/src/audio_core/hle/pipe.h @@ -0,0 +1,38 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> + +#include "common/common_types.h" + +namespace DSP { +namespace HLE { + +/// Reset the pipes by setting pipe positions back to the beginning. +void ResetPipes(); + +/** + * Read a DSP pipe. + * Pipe IDs: + * pipe_number = 0: Debug + * pipe_number = 1: P-DMA + * pipe_number = 2: Audio + * pipe_number = 3: Binary + * @param pipe_number The Pipe ID + * @param length How much data to request. + * @return The data read from the pipe. The size of this vector can be less than the length requested. + */ +std::vector<u8> PipeRead(u32 pipe_number, u32 length); + +/** + * Write to a DSP pipe. + * @param pipe_number The Pipe ID + * @param buffer The data to write to the pipe. + */ +void PipeWrite(u32 pipe_number, const std::vector<u8>& buffer); + +} // namespace HLE +} // namespace DSP diff --git a/src/audio_core/sink.h b/src/audio_core/sink.h new file mode 100644 index 000000000..cad21a85e --- /dev/null +++ b/src/audio_core/sink.h @@ -0,0 +1,34 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> + +#include "common/common_types.h" + +namespace AudioCore { + +/** + * This class is an interface for an audio sink. An audio sink accepts samples in stereo signed PCM16 format to be output. + * Sinks *do not* handle resampling and expect the correct sample rate. They are dumb outputs. + */ +class Sink { +public: + virtual ~Sink() = default; + + /// The native rate of this sink. The sink expects to be fed samples that respect this. (Units: samples/sec) + virtual unsigned GetNativeSampleRate() const = 0; + + /** + * Feed stereo samples to sink. + * @param samples Samples in interleaved stereo PCM16 format. Size of vector must be multiple of two. + */ + virtual void EnqueueSamples(const std::vector<s16>& samples) = 0; + + /// Samples enqueued that have not been played yet. + virtual std::size_t SamplesInQueue() const = 0; +}; + +} // namespace diff --git a/src/citra/CMakeLists.txt b/src/citra/CMakeLists.txt index e7f8a17f9..b9abb818e 100644 --- a/src/citra/CMakeLists.txt +++ b/src/citra/CMakeLists.txt @@ -17,7 +17,7 @@ include_directories(${GLFW_INCLUDE_DIRS}) link_directories(${GLFW_LIBRARY_DIRS}) add_executable(citra ${SRCS} ${HEADERS}) -target_link_libraries(citra core video_core common) +target_link_libraries(citra core video_core audio_core common) target_link_libraries(citra ${GLFW_LIBRARIES} ${OPENGL_gl_LIBRARY} inih glad) if (MSVC) target_link_libraries(citra getopt) diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt index bbf6ae001..b3d1205a4 100644 --- a/src/citra_qt/CMakeLists.txt +++ b/src/citra_qt/CMakeLists.txt @@ -79,7 +79,7 @@ if (APPLE) else() add_executable(citra-qt ${SRCS} ${HEADERS} ${UI_HDRS}) endif() -target_link_libraries(citra-qt core video_core common qhexedit) +target_link_libraries(citra-qt core video_core audio_core common qhexedit) target_link_libraries(citra-qt ${OPENGL_gl_LIBRARY} ${CITRA_QT_LIBS}) target_link_libraries(citra-qt ${PLATFORM_LIBRARIES}) diff --git a/src/common/bit_field.h b/src/common/bit_field.h index 66689f398..371eb17a1 100644 --- a/src/common/bit_field.h +++ b/src/common/bit_field.h @@ -115,29 +115,24 @@ template<std::size_t position, std::size_t bits, typename T> struct BitField { private: - // This constructor might be considered ambiguous: - // Would it initialize the storage or just the bitfield? - // Hence, delete it. Use the assignment operator to set bitfield values! - BitField(T val) = delete; + // We hide the copy assigment operator here, because the default copy + // assignment would copy the full storage value, rather than just the bits + // relevant to this particular bit field. + // We don't delete it because we want BitField to be trivially copyable. + BitField& operator=(const BitField&) = default; public: + // This constructor and assignment operator might be considered ambiguous: + // Would they initialize the storage or just the bitfield? + // Hence, delete them. Use the Assign method to set bitfield values! + BitField(T val) = delete; + BitField& operator=(T val) = delete; + // Force default constructor to be created // so that we can use this within unions BitField() = default; - // We explicitly delete the copy assigment operator here, because the - // default copy assignment would copy the full storage value, rather than - // just the bits relevant to this particular bit field. - BitField& operator=(const BitField&) = delete; - - FORCE_INLINE BitField& operator=(T val) - { - Assign(val); - return *this; - } - - FORCE_INLINE operator T() const - { + FORCE_INLINE operator T() const { return Value(); } @@ -145,8 +140,7 @@ public: storage = (storage & ~GetMask()) | (((StorageType)value << position) & GetMask()); } - FORCE_INLINE T Value() const - { + FORCE_INLINE T Value() const { if (std::numeric_limits<T>::is_signed) { std::size_t shift = 8 * sizeof(T)-bits; @@ -159,8 +153,7 @@ public: } // TODO: we may want to change this to explicit operator bool() if it's bug-free in VS2015 - FORCE_INLINE bool ToBool() const - { + FORCE_INLINE bool ToBool() const { return Value() != 0; } @@ -176,8 +169,7 @@ private: // Unsigned version of StorageType typedef typename std::make_unsigned<StorageType>::type StorageTypeU; - FORCE_INLINE StorageType GetMask() const - { + FORCE_INLINE StorageType GetMask() const { return (((StorageTypeU)~0) >> (8 * sizeof(T)-bits)) << position; } @@ -189,6 +181,10 @@ private: static_assert(position < 8 * sizeof(T), "Invalid position"); static_assert(bits <= 8 * sizeof(T), "Invalid number of bits"); static_assert(bits > 0, "Invalid number of bits"); - static_assert(std::is_standard_layout<T>::value, "Invalid base type"); + static_assert(std::is_pod<T>::value, "Invalid base type"); }; #pragma pack() + +#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER) +static_assert(std::is_trivially_copyable<BitField<0, 1, u32>>::value, "BitField must be trivially copyable"); +#endif diff --git a/src/common/emu_window.cpp b/src/common/emu_window.cpp index b69b05cb9..b2807354a 100644 --- a/src/common/emu_window.cpp +++ b/src/common/emu_window.cpp @@ -55,14 +55,14 @@ void EmuWindow::TouchPressed(unsigned framebuffer_x, unsigned framebuffer_y) { (framebuffer_layout.bottom_screen.bottom - framebuffer_layout.bottom_screen.top); touch_pressed = true; - pad_state.touch = 1; + pad_state.touch.Assign(1); } void EmuWindow::TouchReleased() { touch_pressed = false; touch_x = 0; touch_y = 0; - pad_state.touch = 0; + pad_state.touch.Assign(0); } void EmuWindow::TouchMoved(unsigned framebuffer_x, unsigned framebuffer_y) { diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index d186ba8f8..58819012d 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -58,6 +58,8 @@ namespace Log { CLS(Render) \ SUB(Render, Software) \ SUB(Render, OpenGL) \ + CLS(Audio) \ + SUB(Audio, DSP) \ CLS(Loader) // GetClassName is a macro defined by Windows.h, grrr... diff --git a/src/common/logging/backend.h b/src/common/logging/backend.h index c1f4d08e4..795d42ebd 100644 --- a/src/common/logging/backend.h +++ b/src/common/logging/backend.h @@ -27,25 +27,9 @@ struct Entry { std::string message; Entry() = default; + Entry(Entry&& o) = default; - // TODO(yuriks) Use defaulted move constructors once MSVC supports them -#define MOVE(member) member(std::move(o.member)) - Entry(Entry&& o) - : MOVE(timestamp), MOVE(log_class), MOVE(log_level), - MOVE(location), MOVE(message) - {} -#undef MOVE - - Entry& operator=(const Entry&& o) { -#define MOVE(member) member = std::move(o.member) - MOVE(timestamp); - MOVE(log_class); - MOVE(log_level); - MOVE(location); - MOVE(message); -#undef MOVE - return *this; - } + Entry& operator=(Entry&& o) = default; }; /** diff --git a/src/common/logging/log.h b/src/common/logging/log.h index 2d9323a7b..ec7bb00b8 100644 --- a/src/common/logging/log.h +++ b/src/common/logging/log.h @@ -73,6 +73,8 @@ enum class Class : ClassType { Render, ///< Emulator video output and hardware acceleration Render_Software, ///< Software renderer backend Render_OpenGL, ///< OpenGL backend + Audio, ///< Emulator audio output + Audio_DSP, ///< The HLE implementation of the DSP Loader, ///< ROM loader Count ///< Total number of logging classes diff --git a/src/core/hle/kernel/memory.cpp b/src/core/hle/kernel/memory.cpp index 0cfb43fc7..862643448 100644 --- a/src/core/hle/kernel/memory.cpp +++ b/src/core/hle/kernel/memory.cpp @@ -7,6 +7,8 @@ #include <utility> #include <vector> +#include "audio_core/audio_core.h" + #include "common/common_types.h" #include "common/logging/log.h" @@ -107,7 +109,6 @@ struct MemoryArea { static MemoryArea memory_areas[] = { {SHARED_MEMORY_VADDR, SHARED_MEMORY_SIZE, "Shared Memory"}, // Shared memory {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM) - {DSP_RAM_VADDR, DSP_RAM_SIZE, "DSP RAM"}, // DSP memory {TLS_AREA_VADDR, TLS_AREA_SIZE, "TLS Area"}, // TLS memory }; @@ -133,6 +134,8 @@ void InitLegacyAddressSpace(Kernel::VMManager& address_space) { auto shared_page_vma = address_space.MapBackingMemory(SHARED_PAGE_VADDR, (u8*)&SharedPage::shared_page, SHARED_PAGE_SIZE, MemoryState::Shared).MoveFrom(); address_space.Reprotect(shared_page_vma, VMAPermission::Read); + + AudioCore::AddAddressSpace(address_space); } } // namespace diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index d148efde2..16eb972fb 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -35,7 +35,7 @@ SharedPtr<Process> Process::Create(SharedPtr<CodeSet> code_set) { process->codeset = std::move(code_set); process->flags.raw = 0; - process->flags.memory_region = MemoryRegion::APPLICATION; + process->flags.memory_region.Assign(MemoryRegion::APPLICATION); Memory::InitLegacyAddressSpace(process->vm_manager); return process; diff --git a/src/core/hle/result.h b/src/core/hle/result.h index ea3abb5f6..0fce5988b 100644 --- a/src/core/hle/result.h +++ b/src/core/hle/result.h @@ -193,10 +193,10 @@ union ResultCode { explicit ResultCode(u32 raw) : raw(raw) {} ResultCode(ErrorDescription description_, ErrorModule module_, ErrorSummary summary_, ErrorLevel level_) : raw(0) { - description = description_; - module = module_; - summary = summary_; - level = level_; + description.Assign(description_); + module.Assign(module_); + summary.Assign(summary_); + level.Assign(level_); } ResultCode& operator=(const ResultCode& o) { raw = o.raw; return *this; } diff --git a/src/core/hle/service/cfg/cfg.cpp b/src/core/hle/service/cfg/cfg.cpp index 633fe19eb..7556aa6a5 100644 --- a/src/core/hle/service/cfg/cfg.cpp +++ b/src/core/hle/service/cfg/cfg.cpp @@ -293,8 +293,8 @@ ResultCode DeleteConfigNANDSaveFile() { ResultCode UpdateConfigNANDSavegame() { FileSys::Mode mode = {}; - mode.write_flag = 1; - mode.create_flag = 1; + mode.write_flag.Assign(1); + mode.create_flag.Assign(1); FileSys::Path path("config"); @@ -405,7 +405,7 @@ void Init() { FileSys::Path config_path("config"); FileSys::Mode open_mode = {}; - open_mode.read_flag = 1; + open_mode.read_flag.Assign(1); auto config_result = Service::FS::OpenFileFromArchive(*archive_result, config_path, open_mode); diff --git a/src/core/hle/service/dsp_dsp.cpp b/src/core/hle/service/dsp_dsp.cpp index f9f931f6d..15d3274ec 100644 --- a/src/core/hle/service/dsp_dsp.cpp +++ b/src/core/hle/service/dsp_dsp.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "audio_core/hle/pipe.h" + #include "common/logging/log.h" #include "core/hle/kernel/event.h" @@ -14,17 +16,30 @@ namespace DSP_DSP { static u32 read_pipe_count; static Kernel::SharedPtr<Kernel::Event> semaphore_event; -static Kernel::SharedPtr<Kernel::Event> interrupt_event; -void SignalInterrupt() { - // TODO(bunnei): This is just a stub, it does not do anything other than signal to the emulated - // application that a DSP interrupt occurred, without specifying which one. Since we do not - // emulate the DSP yet (and how it works is largely unknown), this is a work around to get games - // that check the DSP interrupt signal event to run. We should figure out the different types of - // DSP interrupts, and trigger them at the appropriate times. +struct PairHash { + template <typename T, typename U> + std::size_t operator()(const std::pair<T, U> &x) const { + // TODO(yuriks): Replace with better hash combining function. + return std::hash<T>()(x.first) ^ std::hash<U>()(x.second); + } +}; + +/// Map of (audio interrupt number, channel number) to Kernel::Events. See: RegisterInterruptEvents +static std::unordered_map<std::pair<u32, u32>, Kernel::SharedPtr<Kernel::Event>, PairHash> interrupt_events; + +// DSP Interrupts: +// Interrupt #2 occurs every frame tick. Userland programs normally have a thread that's waiting +// for an interrupt event. Immediately after this interrupt event, userland normally updates the +// state in the next region and increments the relevant frame counter by two. +void SignalAllInterrupts() { + // HACK: The other interrupts have currently unknown purpose, we trigger them each tick in any case. + for (auto& interrupt_event : interrupt_events) + interrupt_event.second->Signal(); +} - if (interrupt_event != 0) - interrupt_event->Signal(); +void SignalInterrupt(u32 interrupt, u32 channel) { + interrupt_events[std::make_pair(interrupt, channel)]->Signal(); } /** @@ -43,7 +58,7 @@ static void ConvertProcessAddressFromDspDram(Service::Interface* self) { cmd_buff[1] = 0; // No error cmd_buff[2] = (addr << 1) + (Memory::DSP_RAM_VADDR + 0x40000); - LOG_WARNING(Service_DSP, "(STUBBED) called with address 0x%08X", addr); + LOG_TRACE(Service_DSP, "addr=0x%08X", addr); } /** @@ -121,8 +136,8 @@ static void FlushDataCache(Service::Interface* self) { /** * DSP_DSP::RegisterInterruptEvents service function * Inputs: - * 1 : Parameter 0 (purpose unknown) - * 2 : Parameter 1 (purpose unknown) + * 1 : Interrupt Number + * 2 : Channel Number * 4 : Interrupt event handle * Outputs: * 1 : Result of function, 0 on success, otherwise error code @@ -130,22 +145,24 @@ static void FlushDataCache(Service::Interface* self) { static void RegisterInterruptEvents(Service::Interface* self) { u32* cmd_buff = Kernel::GetCommandBuffer(); - u32 param0 = cmd_buff[1]; - u32 param1 = cmd_buff[2]; + u32 interrupt = cmd_buff[1]; + u32 channel = cmd_buff[2]; u32 event_handle = cmd_buff[4]; - auto evt = Kernel::g_handle_table.Get<Kernel::Event>(cmd_buff[4]); - if (evt != nullptr) { - interrupt_event = evt; - cmd_buff[1] = 0; // No error + if (event_handle) { + auto evt = Kernel::g_handle_table.Get<Kernel::Event>(cmd_buff[4]); + if (evt) { + interrupt_events[std::make_pair(interrupt, channel)] = evt; + cmd_buff[1] = RESULT_SUCCESS.raw; + LOG_WARNING(Service_DSP, "Registered interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle); + } else { + cmd_buff[1] = -1; + LOG_ERROR(Service_DSP, "Invalid event handle! interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle); + } } else { - LOG_ERROR(Service_DSP, "called with invalid handle=%08X", cmd_buff[4]); - - // TODO(yuriks): An error should be returned from SendSyncRequest, not in the cmdbuf - cmd_buff[1] = -1; + interrupt_events.erase(std::make_pair(interrupt, channel)); + LOG_WARNING(Service_DSP, "Unregistered interrupt=%u, channel=%u, event_handle=0x%08X", interrupt, channel, event_handle); } - - LOG_WARNING(Service_DSP, "(STUBBED) called param0=%u, param1=%u, event_handle=0x%08X", param0, param1, event_handle); } /** @@ -158,8 +175,6 @@ static void RegisterInterruptEvents(Service::Interface* self) { static void SetSemaphore(Service::Interface* self) { u32* cmd_buff = Kernel::GetCommandBuffer(); - SignalInterrupt(); - cmd_buff[1] = 0; // No error LOG_WARNING(Service_DSP, "(STUBBED) called"); @@ -168,9 +183,9 @@ static void SetSemaphore(Service::Interface* self) { /** * DSP_DSP::WriteProcessPipe service function * Inputs: - * 1 : Number + * 1 : Channel * 2 : Size - * 3 : (size <<14) | 0x402 + * 3 : (size << 14) | 0x402 * 4 : Buffer * Outputs: * 0 : Return header @@ -179,21 +194,42 @@ static void SetSemaphore(Service::Interface* self) { static void WriteProcessPipe(Service::Interface* self) { u32* cmd_buff = Kernel::GetCommandBuffer(); - u32 number = cmd_buff[1]; + u32 channel = cmd_buff[1]; u32 size = cmd_buff[2]; - u32 new_size = cmd_buff[3]; u32 buffer = cmd_buff[4]; + if (IPC::StaticBufferDesc(size, 1) != cmd_buff[3]) { + LOG_ERROR(Service_DSP, "IPC static buffer descriptor failed validation (0x%X). channel=%u, size=0x%X, buffer=0x%08X", cmd_buff[3], channel, size, buffer); + cmd_buff[1] = -1; // TODO + return; + } + + if (!Memory::GetPointer(buffer)) { + LOG_ERROR(Service_DSP, "Invalid Buffer: channel=%u, size=0x%X, buffer=0x%08X", channel, size, buffer); + cmd_buff[1] = -1; // TODO + return; + } + + std::vector<u8> message(size); + + for (size_t i = 0; i < size; i++) { + message[i] = Memory::Read8(buffer + i); + } + + DSP::HLE::PipeWrite(channel, message); + cmd_buff[1] = RESULT_SUCCESS.raw; // No error - LOG_WARNING(Service_DSP, "(STUBBED) called number=%u, size=0x%X, new_size=0x%X, buffer=0x%08X", - number, size, new_size, buffer); + LOG_TRACE(Service_DSP, "channel=%u, size=0x%X, buffer=0x%08X", channel, size, buffer); } /** * DSP_DSP::ReadPipeIfPossible service function + * A pipe is a means of communication between the ARM11 and DSP that occurs on + * hardware by writing to/reading from the DSP registers at 0x10203000. + * Pipes are used for initialisation. See also DSP::HLE::PipeRead. * Inputs: - * 1 : Unknown + * 1 : Pipe Number * 2 : Unknown * 3 : Size in bytes of read (observed only lower half word used) * 0x41 : Virtual address to read from DSP pipe to in memory @@ -204,35 +240,25 @@ static void WriteProcessPipe(Service::Interface* self) { static void ReadPipeIfPossible(Service::Interface* self) { u32* cmd_buff = Kernel::GetCommandBuffer(); - u32 unk1 = cmd_buff[1]; + u32 pipe = cmd_buff[1]; u32 unk2 = cmd_buff[2]; u32 size = cmd_buff[3] & 0xFFFF;// Lower 16 bits are size VAddr addr = cmd_buff[0x41]; - // Canned DSP responses that games expect. These were taken from HW by 3dmoo team. - // TODO: Remove this hack :) - static const std::array<u16, 16> canned_read_pipe = {{ - 0x000F, 0xBFFF, 0x9E8E, 0x8680, 0xA78E, 0x9430, 0x8400, 0x8540, - 0x948E, 0x8710, 0x8410, 0xA90E, 0xAA0E, 0xAACE, 0xAC4E, 0xAC58 - }}; + if (!Memory::GetPointer(addr)) { + LOG_ERROR(Service_DSP, "Invalid addr: pipe=0x%08X, unk2=0x%08X, size=0x%X, buffer=0x%08X", pipe, unk2, size, addr); + cmd_buff[1] = -1; // TODO + return; + } - u32 initial_size = read_pipe_count; + std::vector<u8> response = DSP::HLE::PipeRead(pipe, size); - for (unsigned offset = 0; offset < size; offset += sizeof(u16)) { - if (read_pipe_count < canned_read_pipe.size()) { - Memory::Write16(addr + offset, canned_read_pipe[read_pipe_count]); - read_pipe_count++; - } else { - LOG_ERROR(Service_DSP, "canned read pipe log exceeded!"); - break; - } - } + Memory::WriteBlock(addr, response.data(), response.size()); cmd_buff[1] = 0; // No error - cmd_buff[2] = (read_pipe_count - initial_size) * sizeof(u16); + cmd_buff[2] = (u32)response.size(); - LOG_WARNING(Service_DSP, "(STUBBED) called unk1=0x%08X, unk2=0x%08X, size=0x%X, buffer=0x%08X", - unk1, unk2, size, addr); + LOG_TRACE(Service_DSP, "pipe=0x%08X, unk2=0x%08X, size=0x%X, buffer=0x%08X", pipe, unk2, size, addr); } /** @@ -311,7 +337,6 @@ const Interface::FunctionInfo FunctionTable[] = { Interface::Interface() { semaphore_event = Kernel::Event::Create(RESETTYPE_ONESHOT, "DSP_DSP::semaphore_event"); - interrupt_event = nullptr; read_pipe_count = 0; Register(FunctionTable); @@ -319,7 +344,7 @@ Interface::Interface() { Interface::~Interface() { semaphore_event = nullptr; - interrupt_event = nullptr; + interrupt_events.clear(); } } // namespace diff --git a/src/core/hle/service/dsp_dsp.h b/src/core/hle/service/dsp_dsp.h index b6f611db5..32b89e9bb 100644 --- a/src/core/hle/service/dsp_dsp.h +++ b/src/core/hle/service/dsp_dsp.h @@ -23,7 +23,15 @@ public: } }; -/// Signals that a DSP interrupt has occurred to userland code -void SignalInterrupt(); +/// Signal all audio related interrupts. +void SignalAllInterrupts(); + +/** + * Signal a specific audio related interrupt based on interrupt id and channel id. + * @param interrupt_id The interrupt id + * @param channel_id The channel id + * The significance of various values of interrupt_id and channel_id is not yet known. + */ +void SignalInterrupt(u32 interrupt_id, u32 channel_id); } // namespace diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp index 98b11c798..5838b6d71 100644 --- a/src/core/hle/service/gsp_gpu.cpp +++ b/src/core/hle/service/gsp_gpu.cpp @@ -347,7 +347,7 @@ void SignalInterrupt(InterruptId interrupt_id) { FrameBufferUpdate* info = GetFrameBufferInfo(thread_id, screen_id); if (info->is_dirty) { SetBufferSwap(screen_id, info->framebuffer_info[info->index]); - info->is_dirty = false; + info->is_dirty.Assign(false); } } } @@ -499,7 +499,7 @@ static void SetLcdForceBlack(Service::Interface* self) { // Since data is already zeroed, there is no need to explicitly set // the color to black (all zero). - data.is_enabled = enable_black; + data.is_enabled.Assign(enable_black); LCD::Write(HW::VADDR_LCD + 4 * LCD_REG_INDEX(color_fill_top), data.raw); // Top LCD LCD::Write(HW::VADDR_LCD + 4 * LCD_REG_INDEX(color_fill_bottom), data.raw); // Bottom LCD @@ -521,7 +521,7 @@ static void TriggerCmdReqQueue(Service::Interface* self) { ExecuteCommand(command_buffer->commands[i], thread_id); // Indicates that command has completed - command_buffer->number_commands = command_buffer->number_commands - 1; + command_buffer->number_commands.Assign(command_buffer->number_commands - 1); } } diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp index 0bed0ce36..11d7e69a1 100644 --- a/src/core/hle/service/hid/hid.cpp +++ b/src/core/hle/service/hid/hid.cpp @@ -105,7 +105,7 @@ void Update() { bool pressed = false; std::tie(touch_entry->x, touch_entry->y, pressed) = VideoCore::g_emu_window->GetTouchState(); - touch_entry->valid = pressed ? 1 : 0; + touch_entry->valid.Assign(pressed ? 1 : 0); // TODO(bunnei): We're not doing anything with offset 0xA8 + 0x18 of HID SharedMemory, which // supposedly is "Touch-screen entry, which contains the raw coordinate data prior to being diff --git a/src/core/hle/service/ptm/ptm.cpp b/src/core/hle/service/ptm/ptm.cpp index 22c1093ff..6bdee4d9e 100644 --- a/src/core/hle/service/ptm/ptm.cpp +++ b/src/core/hle/service/ptm/ptm.cpp @@ -110,8 +110,8 @@ void Init() { FileSys::Path gamecoin_path("gamecoin.dat"); FileSys::Mode open_mode = {}; - open_mode.write_flag = 1; - open_mode.create_flag = 1; + open_mode.write_flag.Assign(1); + open_mode.create_flag.Assign(1); // Open the file and write the default gamecoin information auto gamecoin_result = Service::FS::OpenFileFromArchive(*archive_result, gamecoin_path, open_mode); if (gamecoin_result.Succeeded()) { diff --git a/src/core/hle/service/soc_u.cpp b/src/core/hle/service/soc_u.cpp index 822b093f4..e603bf794 100644 --- a/src/core/hle/service/soc_u.cpp +++ b/src/core/hle/service/soc_u.cpp @@ -178,17 +178,17 @@ struct CTRPollFD { static Events TranslateTo3DS(u32 input_event) { Events ev = {}; if (input_event & POLLIN) - ev.pollin = 1; + ev.pollin.Assign(1); if (input_event & POLLPRI) - ev.pollpri = 1; + ev.pollpri.Assign(1); if (input_event & POLLHUP) - ev.pollhup = 1; + ev.pollhup.Assign(1); if (input_event & POLLERR) - ev.pollerr = 1; + ev.pollerr.Assign(1); if (input_event & POLLOUT) - ev.pollout = 1; + ev.pollout.Assign(1); if (input_event & POLLNVAL) - ev.pollnval = 1; + ev.pollnval.Assign(1); return ev; } diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 4bd3a632d..5312baa83 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -17,7 +17,6 @@ #include "core/core_timing.h" #include "core/hle/service/gsp_gpu.h" -#include "core/hle/service/dsp_dsp.h" #include "core/hle/service/hid/hid.h" #include "core/hw/hw.h" @@ -146,8 +145,8 @@ inline void Write(u32 addr, const T data) { // Reset "trigger" flag and set the "finish" flag // NOTE: This was confirmed to happen on hardware even if "address_start" is zero. - config.trigger = 0; - config.finished = 1; + config.trigger.Assign(0); + config.finished.Assign(1); } break; } @@ -414,11 +413,6 @@ static void VBlankCallback(u64 userdata, int cycles_late) { GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC0); GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC1); - // TODO(bunnei): Fake a DSP interrupt on each frame. This does not belong here, but - // until we can emulate DSP interrupts, this is probably the only reasonable place to do - // this. Certain games expect this to be periodically signaled. - DSP_DSP::SignalInterrupt(); - // Check for user input updates Service::HID::Update(); @@ -444,16 +438,16 @@ void Init() { framebuffer_sub.address_left1 = 0x1848F000; framebuffer_sub.address_left2 = 0x184C7800; - framebuffer_top.width = 240; - framebuffer_top.height = 400; + framebuffer_top.width.Assign(240); + framebuffer_top.height.Assign(400); framebuffer_top.stride = 3 * 240; - framebuffer_top.color_format = Regs::PixelFormat::RGB8; + framebuffer_top.color_format.Assign(Regs::PixelFormat::RGB8); framebuffer_top.active_fb = 0; - framebuffer_sub.width = 240; - framebuffer_sub.height = 320; + framebuffer_sub.width.Assign(240); + framebuffer_sub.height.Assign(320); framebuffer_sub.stride = 3 * 240; - framebuffer_sub.color_format = Regs::PixelFormat::RGB8; + framebuffer_sub.color_format.Assign(Regs::PixelFormat::RGB8); framebuffer_sub.active_fb = 0; last_skip_frame = false; diff --git a/src/core/system.cpp b/src/core/system.cpp index 7e9c56538..b62ebf69e 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -2,9 +2,12 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "audio_core/audio_core.h" + #include "core/core.h" #include "core/core_timing.h" #include "core/system.h" +#include "core/gdbstub/gdbstub.h" #include "core/hw/hw.h" #include "core/hle/hle.h" #include "core/hle/kernel/kernel.h" @@ -12,8 +15,6 @@ #include "video_core/video_core.h" -#include "core/gdbstub/gdbstub.h" - namespace System { void Init(EmuWindow* emu_window) { @@ -24,11 +25,13 @@ void Init(EmuWindow* emu_window) { Kernel::Init(); HLE::Init(); VideoCore::Init(emu_window); + AudioCore::Init(); GDBStub::Init(); } void Shutdown() { GDBStub::Shutdown(); + AudioCore::Shutdown(); VideoCore::Shutdown(); HLE::Shutdown(); Kernel::Shutdown(); diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index c3d7294d5..4b5d298f3 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -33,6 +33,7 @@ set(HEADERS command_processor.h gpu_debugger.h pica.h + pica_types.h primitive_assembly.h rasterizer.h rasterizer_interface.h diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp index 5d609da06..a385589d2 100644 --- a/src/video_core/clipper.cpp +++ b/src/video_core/clipper.cpp @@ -59,15 +59,17 @@ static void InitScreenCoordinates(OutputVertex& vtx) } viewport; const auto& regs = g_state.regs; - viewport.halfsize_x = float24::FromRawFloat24(regs.viewport_size_x); - viewport.halfsize_y = float24::FromRawFloat24(regs.viewport_size_y); + viewport.halfsize_x = float24::FromRaw(regs.viewport_size_x); + viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y); viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x)); viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y)); - viewport.zscale = float24::FromRawFloat24(regs.viewport_depth_range); - viewport.offset_z = float24::FromRawFloat24(regs.viewport_depth_far_plane); + viewport.zscale = float24::FromRaw(regs.viewport_depth_range); + viewport.offset_z = float24::FromRaw(regs.viewport_depth_far_plane); float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; vtx.color *= inv_w; + vtx.view *= inv_w; + vtx.quat *= inv_w; vtx.tc0 *= inv_w; vtx.tc1 *= inv_w; vtx.tc2 *= inv_w; diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 59c75042c..73fdfbe9c 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -98,10 +98,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { Math::Vec4<float24>& attribute = g_state.vs.default_attributes[setup.index]; // NOTE: The destination component order indeed is "backwards" - attribute.w = float24::FromRawFloat24(default_attr_write_buffer[0] >> 8); - attribute.z = float24::FromRawFloat24(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); - attribute.y = float24::FromRawFloat24(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF)); - attribute.x = float24::FromRawFloat24(default_attr_write_buffer[2] & 0xFFFFFF); + attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8); + attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); + attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF)); + attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF); LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), @@ -157,15 +157,25 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { // TODO: What happens if a loader overwrites a previous one's data? for (unsigned component = 0; component < loader_config.component_count; ++component) { - if (component >= 12) + if (component >= 12) { LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component); + continue; + } + u32 attribute_index = loader_config.GetComponent(component); - vertex_attribute_sources[attribute_index] = load_address; - vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count); - vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); - vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); - vertex_attribute_element_size[attribute_index] = attribute_config.GetElementSizeInBytes(attribute_index); - load_address += attribute_config.GetStride(attribute_index); + if (attribute_index < 12) { + vertex_attribute_sources[attribute_index] = load_address; + vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count); + vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); + vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); + vertex_attribute_element_size[attribute_index] = attribute_config.GetElementSizeInBytes(attribute_index); + load_address += attribute_config.GetStride(attribute_index); + } else if (attribute_index < 16) { + // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively + load_address += (attribute_index - 11) * 4; + } else { + UNREACHABLE(); // This is truly unreachable due to the number of bits for each component + } } } @@ -418,10 +428,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i])); } else { // TODO: Untested - uniform.w = float24::FromRawFloat24(uniform_write_buffer[0] >> 8); - uniform.z = float24::FromRawFloat24(((uniform_write_buffer[0] & 0xFF)<<16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF)); - uniform.y = float24::FromRawFloat24(((uniform_write_buffer[1] & 0xFFFF)<<8) | ((uniform_write_buffer[2] >> 24) & 0xFF)); - uniform.x = float24::FromRawFloat24(uniform_write_buffer[2] & 0xFFFFFF); + uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8); + uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF)); + uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | ((uniform_write_buffer[2] >> 24) & 0xFF)); + uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF); } LOG_TRACE(HW_GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index, @@ -429,7 +439,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { uniform.w.ToFloat32()); // TODO: Verify that this actually modifies the register! - uniform_setup.index = uniform_setup.index + 1; + uniform_setup.index.Assign(uniform_setup.index + 1); } break; } @@ -464,6 +474,24 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { break; } + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[0], 0x1c8): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[1], 0x1c9): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[2], 0x1ca): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[3], 0x1cb): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): + { + auto& lut_config = regs.lighting.lut_config; + + ASSERT_MSG(lut_config.index < 256, "lut_config.index exceeded maximum value of 255!"); + + g_state.lighting.luts[lut_config.type][lut_config.index].raw = value; + lut_config.index.Assign(lut_config.index + 1); + break; + } + default: break; } diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 4f66dbd65..6e6fd7335 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -201,11 +201,11 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c if (it == output_info_table.end()) { output_info_table.emplace_back(); - output_info_table.back().type = type; - output_info_table.back().component_mask = component_mask; - output_info_table.back().id = i; + output_info_table.back().type.Assign(type); + output_info_table.back().component_mask.Assign(component_mask); + output_info_table.back().id.Assign(i); } else { - it->component_mask = it->component_mask | component_mask; + it->component_mask.Assign(it->component_mask | component_mask); } } catch (const std::out_of_range& ) { DEBUG_ASSERT_MSG(false, "Unknown output attribute mapping"); diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 2f1b2dec4..9077b1725 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -16,6 +16,8 @@ #include "common/vector_math.h" #include "common/logging/log.h" +#include "pica_types.h" + namespace Pica { // Returns index corresponding to the Regs member labeled by field_name @@ -239,7 +241,8 @@ struct Regs { TextureConfig texture0; INSERT_PADDING_WORDS(0x8); BitField<0, 4, TextureFormat> texture0_format; - INSERT_PADDING_WORDS(0x2); + BitField<0, 1, u32> fragment_lighting_enable; + INSERT_PADDING_WORDS(0x1); TextureConfig texture1; BitField<0, 4, TextureFormat> texture1_format; INSERT_PADDING_WORDS(0x2); @@ -641,7 +644,268 @@ struct Regs { } } - INSERT_PADDING_WORDS(0xe0); + INSERT_PADDING_WORDS(0x20); + + enum class LightingSampler { + Distribution0 = 0, + Distribution1 = 1, + Fresnel = 3, + ReflectBlue = 4, + ReflectGreen = 5, + ReflectRed = 6, + SpotlightAttenuation = 8, + DistanceAttenuation = 16, + }; + + /** + * Pica fragment lighting supports using different LUTs for each lighting component: + * Reflectance R, G, and B channels, distribution function for specular components 0 and 1, + * fresnel factor, and spotlight attenuation. Furthermore, which LUTs are used for each channel + * (or whether a channel is enabled at all) is specified by various pre-defined lighting + * configurations. With configurations that require more LUTs, more cycles are required on HW to + * perform lighting computations. + */ + enum class LightingConfig { + Config0 = 0, ///< Reflect Red, Distribution 0, Spotlight + Config1 = 1, ///< Reflect Red, Fresnel, Spotlight + Config2 = 2, ///< Reflect Red, Distribution 0/1 + Config3 = 3, ///< Distribution 0/1, Fresnel + Config4 = 4, ///< Reflect Red/Green/Blue, Distribution 0/1, Spotlight + Config5 = 5, ///< Reflect Red/Green/Blue, Distribution 0, Fresnel, Spotlight + Config6 = 6, ///< Reflect Red, Distribution 0/1, Fresnel, Spotlight + Config7 = 8, ///< Reflect Red/Green/Blue, Distribution 0/1, Fresnel, Spotlight + ///< NOTE: '8' is intentional, '7' does not appear to be a valid configuration + }; + + /// Selects which lighting components are affected by fresnel + enum class LightingFresnelSelector { + None = 0, ///< Fresnel is disabled + PrimaryAlpha = 1, ///< Primary (diffuse) lighting alpha is affected by fresnel + SecondaryAlpha = 2, ///< Secondary (specular) lighting alpha is affected by fresnel + Both = PrimaryAlpha | SecondaryAlpha, ///< Both primary and secondary lighting alphas are affected by fresnel + }; + + /// Factor used to scale the output of a lighting LUT + enum class LightingScale { + Scale1 = 0, ///< Scale is 1x + Scale2 = 1, ///< Scale is 2x + Scale4 = 2, ///< Scale is 4x + Scale8 = 3, ///< Scale is 8x + Scale1_4 = 6, ///< Scale is 0.25x + Scale1_2 = 7, ///< Scale is 0.5x + }; + + enum class LightingLutInput { + NH = 0, // Cosine of the angle between the normal and half-angle vectors + VH = 1, // Cosine of the angle between the view and half-angle vectors + NV = 2, // Cosine of the angle between the normal and the view vector + LN = 3, // Cosine of the angle between the light and the normal vectors + }; + + enum class LightingBumpMode : u32 { + None = 0, + NormalMap = 1, + TangentMap = 2, + }; + + union LightColor { + BitField< 0, 10, u32> b; + BitField<10, 10, u32> g; + BitField<20, 10, u32> r; + + Math::Vec3f ToVec3f() const { + // These fields are 10 bits wide, however 255 corresponds to 1.0f for each color component + return Math::MakeVec((f32)r / 255.f, (f32)g / 255.f, (f32)b / 255.f); + } + }; + + /// Returns true if the specified lighting sampler is supported by the current Pica lighting configuration + static bool IsLightingSamplerSupported(LightingConfig config, LightingSampler sampler) { + switch (sampler) { + case LightingSampler::Distribution0: + return (config != LightingConfig::Config1); + + case LightingSampler::Distribution1: + return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) && (config != LightingConfig::Config5); + + case LightingSampler::Fresnel: + return (config != LightingConfig::Config0) && (config != LightingConfig::Config2) && (config != LightingConfig::Config4); + + case LightingSampler::ReflectRed: + return (config != LightingConfig::Config3); + + case LightingSampler::ReflectGreen: + case LightingSampler::ReflectBlue: + return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || (config == LightingConfig::Config7); + } + return false; + } + + struct { + struct LightSrc { + LightColor specular_0; // material.specular_0 * light.specular_0 + LightColor specular_1; // material.specular_1 * light.specular_1 + LightColor diffuse; // material.diffuse * light.diffuse + LightColor ambient; // material.ambient * light.ambient + + struct { + // Encoded as 16-bit floating point + union { + BitField< 0, 16, u32> x; + BitField<16, 16, u32> y; + }; + union { + BitField< 0, 16, u32> z; + }; + + INSERT_PADDING_WORDS(0x3); + + union { + BitField<0, 1, u32> directional; + BitField<1, 1, u32> two_sided_diffuse; // When disabled, clamp dot-product to 0 + }; + }; + + BitField<0, 20, u32> dist_atten_bias; + BitField<0, 20, u32> dist_atten_scale; + + INSERT_PADDING_WORDS(0x4); + }; + static_assert(sizeof(LightSrc) == 0x10 * sizeof(u32), "LightSrc structure must be 0x10 words"); + + LightSrc light[8]; + LightColor global_ambient; // Emission + (material.ambient * lighting.ambient) + INSERT_PADDING_WORDS(0x1); + BitField<0, 3, u32> num_lights; // Number of enabled lights - 1 + + union { + BitField< 2, 2, LightingFresnelSelector> fresnel_selector; + BitField< 4, 4, LightingConfig> config; + BitField<22, 2, u32> bump_selector; // 0: Texture 0, 1: Texture 1, 2: Texture 2 + BitField<27, 1, u32> clamp_highlights; + BitField<28, 2, LightingBumpMode> bump_mode; + BitField<30, 1, u32> disable_bump_renorm; + }; + + union { + BitField<16, 1, u32> disable_lut_d0; + BitField<17, 1, u32> disable_lut_d1; + BitField<19, 1, u32> disable_lut_fr; + BitField<20, 1, u32> disable_lut_rr; + BitField<21, 1, u32> disable_lut_rg; + BitField<22, 1, u32> disable_lut_rb; + + // Each bit specifies whether distance attenuation should be applied for the + // corresponding light + + BitField<24, 1, u32> disable_dist_atten_light_0; + BitField<25, 1, u32> disable_dist_atten_light_1; + BitField<26, 1, u32> disable_dist_atten_light_2; + BitField<27, 1, u32> disable_dist_atten_light_3; + BitField<28, 1, u32> disable_dist_atten_light_4; + BitField<29, 1, u32> disable_dist_atten_light_5; + BitField<30, 1, u32> disable_dist_atten_light_6; + BitField<31, 1, u32> disable_dist_atten_light_7; + }; + + bool IsDistAttenDisabled(unsigned index) const { + const unsigned disable[] = { disable_dist_atten_light_0, disable_dist_atten_light_1, + disable_dist_atten_light_2, disable_dist_atten_light_3, + disable_dist_atten_light_4, disable_dist_atten_light_5, + disable_dist_atten_light_6, disable_dist_atten_light_7 }; + return disable[index] != 0; + } + + union { + BitField<0, 8, u32> index; ///< Index at which to set data in the LUT + BitField<8, 5, u32> type; ///< Type of LUT for which to set data + } lut_config; + + BitField<0, 1, u32> disable; + INSERT_PADDING_WORDS(0x1); + + // When data is written to any of these registers, it gets written to the lookup table of + // the selected type at the selected index, specified above in the `lut_config` register. + // With each write, `lut_config.index` is incremented. It does not matter which of these + // registers is written to, the behavior will be the same. + u32 lut_data[8]; + + // These are used to specify if absolute (abs) value should be used for each LUT index. When + // abs mode is disabled, LUT indexes are in the range of (-1.0, 1.0). Otherwise, they are in + // the range of (0.0, 1.0). + union { + BitField< 1, 1, u32> disable_d0; + BitField< 5, 1, u32> disable_d1; + BitField< 9, 1, u32> disable_sp; + BitField<13, 1, u32> disable_fr; + BitField<17, 1, u32> disable_rb; + BitField<21, 1, u32> disable_rg; + BitField<25, 1, u32> disable_rr; + } abs_lut_input; + + union { + BitField< 0, 3, LightingLutInput> d0; + BitField< 4, 3, LightingLutInput> d1; + BitField< 8, 3, LightingLutInput> sp; + BitField<12, 3, LightingLutInput> fr; + BitField<16, 3, LightingLutInput> rb; + BitField<20, 3, LightingLutInput> rg; + BitField<24, 3, LightingLutInput> rr; + } lut_input; + + union { + BitField< 0, 3, LightingScale> d0; + BitField< 4, 3, LightingScale> d1; + BitField< 8, 3, LightingScale> sp; + BitField<12, 3, LightingScale> fr; + BitField<16, 3, LightingScale> rb; + BitField<20, 3, LightingScale> rg; + BitField<24, 3, LightingScale> rr; + + static float GetScale(LightingScale scale) { + switch (scale) { + case LightingScale::Scale1: + return 1.0f; + case LightingScale::Scale2: + return 2.0f; + case LightingScale::Scale4: + return 4.0f; + case LightingScale::Scale8: + return 8.0f; + case LightingScale::Scale1_4: + return 0.25f; + case LightingScale::Scale1_2: + return 0.5f; + } + return 0.0f; + } + } lut_scale; + + INSERT_PADDING_WORDS(0x6); + + union { + // There are 8 light enable "slots", corresponding to the total number of lights + // supported by Pica. For N enabled lights (specified by register 0x1c2, or 'src_num' + // above), the first N slots below will be set to integers within the range of 0-7, + // corresponding to the actual light that is enabled for each slot. + + BitField< 0, 3, u32> slot_0; + BitField< 4, 3, u32> slot_1; + BitField< 8, 3, u32> slot_2; + BitField<12, 3, u32> slot_3; + BitField<16, 3, u32> slot_4; + BitField<20, 3, u32> slot_5; + BitField<24, 3, u32> slot_6; + BitField<28, 3, u32> slot_7; + + unsigned GetNum(unsigned index) const { + const unsigned enable_slots[] = { slot_0, slot_1, slot_2, slot_3, slot_4, slot_5, slot_6, slot_7 }; + return enable_slots[index]; + } + } light_enable; + } lighting; + + INSERT_PADDING_WORDS(0x26); enum class VertexAttributeFormat : u64 { BYTE = 0, @@ -990,6 +1254,7 @@ ASSERT_REG_POSITION(viewport_corner, 0x68); ASSERT_REG_POSITION(texture0_enable, 0x80); ASSERT_REG_POSITION(texture0, 0x81); ASSERT_REG_POSITION(texture0_format, 0x8e); +ASSERT_REG_POSITION(fragment_lighting_enable, 0x8f); ASSERT_REG_POSITION(texture1, 0x91); ASSERT_REG_POSITION(texture1_format, 0x96); ASSERT_REG_POSITION(texture2, 0x99); @@ -1004,6 +1269,7 @@ ASSERT_REG_POSITION(tev_stage5, 0xf8); ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd); ASSERT_REG_POSITION(output_merger, 0x100); ASSERT_REG_POSITION(framebuffer, 0x110); +ASSERT_REG_POSITION(lighting, 0x140); ASSERT_REG_POSITION(vertex_attributes, 0x200); ASSERT_REG_POSITION(index_array, 0x227); ASSERT_REG_POSITION(num_vertices, 0x228); @@ -1026,118 +1292,6 @@ static_assert(sizeof(Regs::ShaderConfig) == 0x30 * sizeof(u32), "ShaderConfig st static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be"); static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be"); -struct float24 { - static float24 FromFloat32(float val) { - float24 ret; - ret.value = val; - return ret; - } - - // 16 bit mantissa, 7 bit exponent, 1 bit sign - // TODO: No idea if this works as intended - static float24 FromRawFloat24(u32 hex) { - float24 ret; - if ((hex & 0xFFFFFF) == 0) { - ret.value = 0; - } else { - u32 mantissa = hex & 0xFFFF; - u32 exponent = (hex >> 16) & 0x7F; - u32 sign = hex >> 23; - ret.value = std::pow(2.0f, (float)exponent-63.0f) * (1.0f + mantissa * std::pow(2.0f, -16.f)); - if (sign) - ret.value = -ret.value; - } - return ret; - } - - static float24 Zero() { - return FromFloat32(0.f); - } - - // Not recommended for anything but logging - float ToFloat32() const { - return value; - } - - float24 operator * (const float24& flt) const { - if ((this->value == 0.f && !std::isnan(flt.value)) || - (flt.value == 0.f && !std::isnan(this->value))) - // PICA gives 0 instead of NaN when multiplying by inf - return Zero(); - return float24::FromFloat32(ToFloat32() * flt.ToFloat32()); - } - - float24 operator / (const float24& flt) const { - return float24::FromFloat32(ToFloat32() / flt.ToFloat32()); - } - - float24 operator + (const float24& flt) const { - return float24::FromFloat32(ToFloat32() + flt.ToFloat32()); - } - - float24 operator - (const float24& flt) const { - return float24::FromFloat32(ToFloat32() - flt.ToFloat32()); - } - - float24& operator *= (const float24& flt) { - if ((this->value == 0.f && !std::isnan(flt.value)) || - (flt.value == 0.f && !std::isnan(this->value))) - // PICA gives 0 instead of NaN when multiplying by inf - *this = Zero(); - else value *= flt.ToFloat32(); - return *this; - } - - float24& operator /= (const float24& flt) { - value /= flt.ToFloat32(); - return *this; - } - - float24& operator += (const float24& flt) { - value += flt.ToFloat32(); - return *this; - } - - float24& operator -= (const float24& flt) { - value -= flt.ToFloat32(); - return *this; - } - - float24 operator - () const { - return float24::FromFloat32(-ToFloat32()); - } - - bool operator < (const float24& flt) const { - return ToFloat32() < flt.ToFloat32(); - } - - bool operator > (const float24& flt) const { - return ToFloat32() > flt.ToFloat32(); - } - - bool operator >= (const float24& flt) const { - return ToFloat32() >= flt.ToFloat32(); - } - - bool operator <= (const float24& flt) const { - return ToFloat32() <= flt.ToFloat32(); - } - - bool operator == (const float24& flt) const { - return ToFloat32() == flt.ToFloat32(); - } - - bool operator != (const float24& flt) const { - return ToFloat32() != flt.ToFloat32(); - } - -private: - // Stored as a regular float, merely for convenience - // TODO: Perform proper arithmetic on this! - float value; -}; -static_assert(sizeof(float24) == sizeof(float), "Shader JIT assumes float24 is implemented as a 32-bit float"); - /// Struct used to describe current Pica state struct State { /// Pica registers @@ -1163,6 +1317,25 @@ struct State { ShaderSetup vs; ShaderSetup gs; + struct { + union LutEntry { + // Used for raw access + u32 raw; + + // LUT value, encoded as 12-bit fixed point, with 12 fraction bits + BitField< 0, 12, u32> value; + + // Used by HW for efficient interpolation, Citra does not use these + BitField<12, 12, u32> difference; + + float ToFloat() { + return static_cast<float>(value) / 4095.f; + } + }; + + std::array<std::array<LutEntry, 256>, 24> luts; + } lighting; + /// Current Pica command list struct { const u32* head_ptr; diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h new file mode 100644 index 000000000..ecf45654b --- /dev/null +++ b/src/video_core/pica_types.h @@ -0,0 +1,146 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstring> + +#include "common/common_types.h" + +namespace Pica { + +/** + * Template class for converting arbitrary Pica float types to IEEE 754 32-bit single-precision + * floating point. + * + * When decoding, format is as follows: + * - The first `M` bits are the mantissa + * - The next `E` bits are the exponent + * - The last bit is the sign bit + * + * @todo Verify on HW if this conversion is sufficiently accurate. + */ +template<unsigned M, unsigned E> +struct Float { +public: + static Float<M, E> FromFloat32(float val) { + Float<M, E> ret; + ret.value = val; + return ret; + } + + static Float<M, E> FromRaw(u32 hex) { + Float<M, E> res; + + const int width = M + E + 1; + const int bias = 128 - (1 << (E - 1)); + const int exponent = (hex >> M) & ((1 << E) - 1); + const unsigned mantissa = hex & ((1 << M) - 1); + + if (hex & ((1 << (width - 1)) - 1)) + hex = ((hex >> (E + M)) << 31) | (mantissa << (23 - M)) | ((exponent + bias) << 23); + else + hex = ((hex >> (E + M)) << 31); + + std::memcpy(&res.value, &hex, sizeof(float)); + + return res; + } + + static Float<M, E> Zero() { + return FromFloat32(0.f); + } + + // Not recommended for anything but logging + float ToFloat32() const { + return value; + } + + Float<M, E> operator * (const Float<M, E>& flt) const { + if ((this->value == 0.f && !std::isnan(flt.value)) || + (flt.value == 0.f && !std::isnan(this->value))) + // PICA gives 0 instead of NaN when multiplying by inf + return Zero(); + return Float<M, E>::FromFloat32(ToFloat32() * flt.ToFloat32()); + } + + Float<M, E> operator / (const Float<M, E>& flt) const { + return Float<M, E>::FromFloat32(ToFloat32() / flt.ToFloat32()); + } + + Float<M, E> operator + (const Float<M, E>& flt) const { + return Float<M, E>::FromFloat32(ToFloat32() + flt.ToFloat32()); + } + + Float<M, E> operator - (const Float<M, E>& flt) const { + return Float<M, E>::FromFloat32(ToFloat32() - flt.ToFloat32()); + } + + Float<M, E>& operator *= (const Float<M, E>& flt) { + if ((this->value == 0.f && !std::isnan(flt.value)) || + (flt.value == 0.f && !std::isnan(this->value))) + // PICA gives 0 instead of NaN when multiplying by inf + *this = Zero(); + else value *= flt.ToFloat32(); + return *this; + } + + Float<M, E>& operator /= (const Float<M, E>& flt) { + value /= flt.ToFloat32(); + return *this; + } + + Float<M, E>& operator += (const Float<M, E>& flt) { + value += flt.ToFloat32(); + return *this; + } + + Float<M, E>& operator -= (const Float<M, E>& flt) { + value -= flt.ToFloat32(); + return *this; + } + + Float<M, E> operator - () const { + return Float<M, E>::FromFloat32(-ToFloat32()); + } + + bool operator < (const Float<M, E>& flt) const { + return ToFloat32() < flt.ToFloat32(); + } + + bool operator > (const Float<M, E>& flt) const { + return ToFloat32() > flt.ToFloat32(); + } + + bool operator >= (const Float<M, E>& flt) const { + return ToFloat32() >= flt.ToFloat32(); + } + + bool operator <= (const Float<M, E>& flt) const { + return ToFloat32() <= flt.ToFloat32(); + } + + bool operator == (const Float<M, E>& flt) const { + return ToFloat32() == flt.ToFloat32(); + } + + bool operator != (const Float<M, E>& flt) const { + return ToFloat32() != flt.ToFloat32(); + } + +private: + static const unsigned MASK = (1 << (M + E + 1)) - 1; + static const unsigned MANTISSA_MASK = (1 << M) - 1; + static const unsigned EXPONENT_MASK = (1 << E) - 1; + + // Stored as a regular float, merely for convenience + // TODO: Perform proper arithmetic on this! + float value; +}; + +using float24 = Float<16, 7>; +using float20 = Float<12, 7>; +using float16 = Float<10, 5>; + +} // namespace Pica diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 291ef737d..b7d19bf94 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -36,7 +36,7 @@ static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) { stage.GetAlphaMultiplier() == 1); } -RasterizerOpenGL::RasterizerOpenGL() : last_fb_color_addr(0), last_fb_depth_addr(0) { } +RasterizerOpenGL::RasterizerOpenGL() : cached_fb_color_addr(0), cached_fb_depth_addr(0) { } RasterizerOpenGL::~RasterizerOpenGL() { } void RasterizerOpenGL::InitObjects() { @@ -75,6 +75,12 @@ void RasterizerOpenGL::InitObjects() { glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1); glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2); + glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat)); + glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT); + + glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view)); + glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW); + SetShader(); // Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation @@ -120,6 +126,19 @@ void RasterizerOpenGL::InitObjects() { glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fb_color_texture.texture.handle, 0); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0); + for (size_t i = 0; i < lighting_lut.size(); ++i) { + lighting_lut[i].Create(); + state.lighting_lut[i].texture_1d = lighting_lut[i].handle; + + glActiveTexture(GL_TEXTURE3 + i); + glBindTexture(GL_TEXTURE_1D, state.lighting_lut[i].texture_1d); + + glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + } + state.Apply(); + ASSERT_MSG(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE, "OpenGL rasterizer framebuffer setup failed, status %X", glCheckFramebufferStatus(GL_FRAMEBUFFER)); } @@ -139,12 +158,34 @@ void RasterizerOpenGL::Reset() { res_cache.InvalidateAll(); } +/** + * This is a helper function to resolve an issue with opposite quaternions being interpolated by + * OpenGL. See below for a detailed description of this issue (yuriks): + * + * For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you + * interpolate two quaternions that are opposite, instead of going from one rotation to another + * using the shortest path, you'll go around the longest path. You can test if two quaternions are + * opposite by checking if Dot(Q1, W2) < 0. In that case, you can flip either of them, therefore + * making Dot(-Q1, W2) positive. + * + * NOTE: This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This + * should be correct for nearly all cases, however a more correct implementation (but less trivial + * and perhaps unnecessary) would be to handle this per-fragment, by interpolating the quaternions + * manually using two Lerps, and doing this correction before each Lerp. + */ +static bool AreQuaternionsOpposite(Math::Vec4<Pica::float24> qa, Math::Vec4<Pica::float24> qb) { + Math::Vec4f a{ qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32() }; + Math::Vec4f b{ qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32() }; + + return (Math::Dot(a, b) < 0.f); +} + void RasterizerOpenGL::AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1, const Pica::Shader::OutputVertex& v2) { - vertex_batch.emplace_back(v0); - vertex_batch.emplace_back(v1); - vertex_batch.emplace_back(v2); + vertex_batch.emplace_back(v0, false); + vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat)); + vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat)); } void RasterizerOpenGL::DrawTriangles() { @@ -156,6 +197,13 @@ void RasterizerOpenGL::DrawTriangles() { state.draw.shader_dirty = false; } + for (unsigned index = 0; index < lighting_lut.size(); index++) { + if (uniform_block_data.lut_dirty[index]) { + SyncLightingLUT(index); + uniform_block_data.lut_dirty[index] = false; + } + } + if (uniform_block_data.dirty) { glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW); uniform_block_data.dirty = false; @@ -169,16 +217,14 @@ void RasterizerOpenGL::DrawTriangles() { // Flush the resource cache at the current depth and color framebuffer addresses for render-to-texture const auto& regs = Pica::g_state.regs; - PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); - u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(regs.framebuffer.color_format) - * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); + u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) + * fb_color_texture.width * fb_color_texture.height; - PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); - u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format) - * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); + u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) + * fb_depth_texture.width * fb_depth_texture.height; - res_cache.InvalidateInRange(cur_fb_color_addr, cur_fb_color_size, true); - res_cache.InvalidateInRange(cur_fb_depth_addr, cur_fb_depth_size, true); + res_cache.InvalidateInRange(cached_fb_color_addr, cached_fb_color_size, true); + res_cache.InvalidateInRange(cached_fb_depth_addr, cached_fb_depth_size, true); } void RasterizerOpenGL::FlushFramebuffer() { @@ -285,44 +331,199 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { case PICA_REG_INDEX(tev_combiner_buffer_color): SyncCombinerColor(); break; + + // Fragment lighting specular 0 color + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].specular_0, 0x140 + 0 * 0x10): + SyncLightSpecular0(0); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].specular_0, 0x140 + 1 * 0x10): + SyncLightSpecular0(1); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].specular_0, 0x140 + 2 * 0x10): + SyncLightSpecular0(2); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].specular_0, 0x140 + 3 * 0x10): + SyncLightSpecular0(3); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].specular_0, 0x140 + 4 * 0x10): + SyncLightSpecular0(4); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].specular_0, 0x140 + 5 * 0x10): + SyncLightSpecular0(5); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].specular_0, 0x140 + 6 * 0x10): + SyncLightSpecular0(6); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].specular_0, 0x140 + 7 * 0x10): + SyncLightSpecular0(7); + break; + + // Fragment lighting specular 1 color + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].specular_1, 0x141 + 0 * 0x10): + SyncLightSpecular1(0); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].specular_1, 0x141 + 1 * 0x10): + SyncLightSpecular1(1); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].specular_1, 0x141 + 2 * 0x10): + SyncLightSpecular1(2); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].specular_1, 0x141 + 3 * 0x10): + SyncLightSpecular1(3); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].specular_1, 0x141 + 4 * 0x10): + SyncLightSpecular1(4); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].specular_1, 0x141 + 5 * 0x10): + SyncLightSpecular1(5); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].specular_1, 0x141 + 6 * 0x10): + SyncLightSpecular1(6); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].specular_1, 0x141 + 7 * 0x10): + SyncLightSpecular1(7); + break; + + // Fragment lighting diffuse color + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].diffuse, 0x142 + 0 * 0x10): + SyncLightDiffuse(0); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].diffuse, 0x142 + 1 * 0x10): + SyncLightDiffuse(1); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].diffuse, 0x142 + 2 * 0x10): + SyncLightDiffuse(2); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].diffuse, 0x142 + 3 * 0x10): + SyncLightDiffuse(3); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].diffuse, 0x142 + 4 * 0x10): + SyncLightDiffuse(4); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].diffuse, 0x142 + 5 * 0x10): + SyncLightDiffuse(5); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].diffuse, 0x142 + 6 * 0x10): + SyncLightDiffuse(6); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].diffuse, 0x142 + 7 * 0x10): + SyncLightDiffuse(7); + break; + + // Fragment lighting ambient color + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].ambient, 0x143 + 0 * 0x10): + SyncLightAmbient(0); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].ambient, 0x143 + 1 * 0x10): + SyncLightAmbient(1); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].ambient, 0x143 + 2 * 0x10): + SyncLightAmbient(2); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].ambient, 0x143 + 3 * 0x10): + SyncLightAmbient(3); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].ambient, 0x143 + 4 * 0x10): + SyncLightAmbient(4); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].ambient, 0x143 + 5 * 0x10): + SyncLightAmbient(5); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].ambient, 0x143 + 6 * 0x10): + SyncLightAmbient(6); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].ambient, 0x143 + 7 * 0x10): + SyncLightAmbient(7); + break; + + // Fragment lighting position + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].x, 0x144 + 0 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].z, 0x145 + 0 * 0x10): + SyncLightPosition(0); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].x, 0x144 + 1 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].z, 0x145 + 1 * 0x10): + SyncLightPosition(1); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].x, 0x144 + 2 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].z, 0x145 + 2 * 0x10): + SyncLightPosition(2); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].x, 0x144 + 3 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].z, 0x145 + 3 * 0x10): + SyncLightPosition(3); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].x, 0x144 + 4 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].z, 0x145 + 4 * 0x10): + SyncLightPosition(4); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].x, 0x144 + 5 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].z, 0x145 + 5 * 0x10): + SyncLightPosition(5); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].x, 0x144 + 6 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].z, 0x145 + 6 * 0x10): + SyncLightPosition(6); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].x, 0x144 + 7 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].z, 0x145 + 7 * 0x10): + SyncLightPosition(7); + break; + + // Fragment lighting global ambient color (emission + ambient * ambient) + case PICA_REG_INDEX_WORKAROUND(lighting.global_ambient, 0x1c0): + SyncGlobalAmbient(); + break; + + // Fragment lighting lookup tables + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[0], 0x1c8): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[1], 0x1c9): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[2], 0x1ca): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[3], 0x1cb): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce): + case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): + { + auto& lut_config = regs.lighting.lut_config; + uniform_block_data.lut_dirty[lut_config.type / 4] = true; + break; + } + } } void RasterizerOpenGL::FlushRegion(PAddr addr, u32 size) { const auto& regs = Pica::g_state.regs; - PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); - u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(regs.framebuffer.color_format) - * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); + u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) + * fb_color_texture.width * fb_color_texture.height; - PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); - u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format) - * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); + u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) + * fb_depth_texture.width * fb_depth_texture.height; // If source memory region overlaps 3DS framebuffers, commit them before the copy happens - if (MathUtil::IntervalsIntersect(addr, size, cur_fb_color_addr, cur_fb_color_size)) + if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size)) CommitColorBuffer(); - if (MathUtil::IntervalsIntersect(addr, size, cur_fb_depth_addr, cur_fb_depth_size)) + if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size)) CommitDepthBuffer(); } void RasterizerOpenGL::InvalidateRegion(PAddr addr, u32 size) { const auto& regs = Pica::g_state.regs; - PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); - u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(regs.framebuffer.color_format) - * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); + u32 cached_fb_color_size = Pica::Regs::BytesPerColorPixel(fb_color_texture.format) + * fb_color_texture.width * fb_color_texture.height; - PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); - u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format) - * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); + u32 cached_fb_depth_size = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format) + * fb_depth_texture.width * fb_depth_texture.height; // If modified memory region overlaps 3DS framebuffers, reload their contents into OpenGL - if (MathUtil::IntervalsIntersect(addr, size, cur_fb_color_addr, cur_fb_color_size)) + if (MathUtil::IntervalsIntersect(addr, size, cached_fb_color_addr, cached_fb_color_size)) ReloadColorBuffer(); - if (MathUtil::IntervalsIntersect(addr, size, cur_fb_depth_addr, cur_fb_depth_size)) + if (MathUtil::IntervalsIntersect(addr, size, cached_fb_depth_addr, cached_fb_depth_size)) ReloadDepthBuffer(); // Notify cache of flush in case the region touches a cached resource @@ -497,27 +698,48 @@ void RasterizerOpenGL::SetShader() { uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]"); if (uniform_tex != -1) { glUniform1i(uniform_tex, 2); } + // Set the texture samplers to correspond to different lookup table texture units + GLuint uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[0]"); + if (uniform_lut != -1) { glUniform1i(uniform_lut, 3); } + uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[1]"); + if (uniform_lut != -1) { glUniform1i(uniform_lut, 4); } + uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[2]"); + if (uniform_lut != -1) { glUniform1i(uniform_lut, 5); } + uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[3]"); + if (uniform_lut != -1) { glUniform1i(uniform_lut, 6); } + uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[4]"); + if (uniform_lut != -1) { glUniform1i(uniform_lut, 7); } + uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[5]"); + if (uniform_lut != -1) { glUniform1i(uniform_lut, 8); } + current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get(); unsigned int block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data"); glUniformBlockBinding(current_shader->shader.handle, block_index, 0); - } - // Update uniforms - SyncAlphaTest(); - SyncCombinerColor(); - auto& tev_stages = Pica::g_state.regs.GetTevStages(); - for (int index = 0; index < tev_stages.size(); ++index) - SyncTevConstColor(index, tev_stages[index]); + // Update uniforms + SyncAlphaTest(); + SyncCombinerColor(); + auto& tev_stages = Pica::g_state.regs.GetTevStages(); + for (int index = 0; index < tev_stages.size(); ++index) + SyncTevConstColor(index, tev_stages[index]); + + SyncGlobalAmbient(); + for (int light_index = 0; light_index < 8; light_index++) { + SyncLightDiffuse(light_index); + SyncLightAmbient(light_index); + SyncLightPosition(light_index); + } + } } void RasterizerOpenGL::SyncFramebuffer() { const auto& regs = Pica::g_state.regs; - PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); + PAddr new_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); Pica::Regs::ColorFormat new_fb_color_format = regs.framebuffer.color_format; - PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); + PAddr new_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); Pica::Regs::DepthFormat new_fb_depth_format = regs.framebuffer.depth_format; bool fb_size_changed = fb_color_texture.width != static_cast<GLsizei>(regs.framebuffer.GetWidth()) || @@ -529,10 +751,10 @@ void RasterizerOpenGL::SyncFramebuffer() { bool depth_fb_prop_changed = fb_depth_texture.format != new_fb_depth_format || fb_size_changed; - bool color_fb_modified = last_fb_color_addr != cur_fb_color_addr || + bool color_fb_modified = cached_fb_color_addr != new_fb_color_addr || color_fb_prop_changed; - bool depth_fb_modified = last_fb_depth_addr != cur_fb_depth_addr || + bool depth_fb_modified = cached_fb_depth_addr != new_fb_depth_addr || depth_fb_prop_changed; // Commit if framebuffer modified in any way @@ -572,13 +794,13 @@ void RasterizerOpenGL::SyncFramebuffer() { // Load buffer data again if fb modified in any way if (color_fb_modified) { - last_fb_color_addr = cur_fb_color_addr; + cached_fb_color_addr = new_fb_color_addr; ReloadColorBuffer(); } if (depth_fb_modified) { - last_fb_depth_addr = cur_fb_depth_addr; + cached_fb_depth_addr = new_fb_depth_addr; ReloadDepthBuffer(); } @@ -610,8 +832,8 @@ void RasterizerOpenGL::SyncCullMode() { } void RasterizerOpenGL::SyncDepthModifiers() { - float depth_scale = -Pica::float24::FromRawFloat24(Pica::g_state.regs.viewport_depth_range).ToFloat32(); - float depth_offset = Pica::float24::FromRawFloat24(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f; + float depth_scale = -Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32(); + float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f; // TODO: Implement scale modifier uniform_block_data.data.depth_offset = depth_offset; @@ -689,12 +911,81 @@ void RasterizerOpenGL::SyncTevConstColor(int stage_index, const Pica::Regs::TevS } } +void RasterizerOpenGL::SyncGlobalAmbient() { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.global_ambient); + if (color != uniform_block_data.data.lighting_global_ambient) { + uniform_block_data.data.lighting_global_ambient = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) { + std::array<GLvec4, 256> new_data; + + for (unsigned offset = 0; offset < new_data.size(); ++offset) { + new_data[offset][0] = Pica::g_state.lighting.luts[(lut_index * 4) + 0][offset].ToFloat(); + new_data[offset][1] = Pica::g_state.lighting.luts[(lut_index * 4) + 1][offset].ToFloat(); + new_data[offset][2] = Pica::g_state.lighting.luts[(lut_index * 4) + 2][offset].ToFloat(); + new_data[offset][3] = Pica::g_state.lighting.luts[(lut_index * 4) + 3][offset].ToFloat(); + } + + if (new_data != lighting_lut_data[lut_index]) { + lighting_lut_data[lut_index] = new_data; + glActiveTexture(GL_TEXTURE3 + lut_index); + glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, lighting_lut_data[lut_index].data()); + } +} + +void RasterizerOpenGL::SyncLightSpecular0(int light_index) { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_0); + if (color != uniform_block_data.data.light_src[light_index].specular_0) { + uniform_block_data.data.light_src[light_index].specular_0 = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightSpecular1(int light_index) { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_1); + if (color != uniform_block_data.data.light_src[light_index].specular_1) { + uniform_block_data.data.light_src[light_index].specular_1 = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightDiffuse(int light_index) { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse); + if (color != uniform_block_data.data.light_src[light_index].diffuse) { + uniform_block_data.data.light_src[light_index].diffuse = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightAmbient(int light_index) { + auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].ambient); + if (color != uniform_block_data.data.light_src[light_index].ambient) { + uniform_block_data.data.light_src[light_index].ambient = color; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightPosition(int light_index) { + GLvec3 position = { + Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(), + Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(), + Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32() }; + + if (position != uniform_block_data.data.light_src[light_index].position) { + uniform_block_data.data.light_src[light_index].position = position; + uniform_block_data.dirty = true; + } +} + void RasterizerOpenGL::SyncDrawState() { const auto& regs = Pica::g_state.regs; // Sync the viewport - GLsizei viewport_width = (GLsizei)Pica::float24::FromRawFloat24(regs.viewport_size_x).ToFloat32() * 2; - GLsizei viewport_height = (GLsizei)Pica::float24::FromRawFloat24(regs.viewport_size_y).ToFloat32() * 2; + GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2; + GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2; // OpenGL uses different y coordinates, so negate corner offset and flip origin // TODO: Ensure viewport_corner.x should not be negated or origin flipped @@ -723,7 +1014,7 @@ void RasterizerOpenGL::SyncDrawState() { MICROPROFILE_DEFINE(OpenGL_FramebufferReload, "OpenGL", "FB Reload", MP_RGB(70, 70, 200)); void RasterizerOpenGL::ReloadColorBuffer() { - u8* color_buffer = Memory::GetPhysicalPointer(Pica::g_state.regs.framebuffer.GetColorBufferPhysicalAddress()); + u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr); if (color_buffer == nullptr) return; @@ -758,13 +1049,11 @@ void RasterizerOpenGL::ReloadColorBuffer() { } void RasterizerOpenGL::ReloadDepthBuffer() { - PAddr depth_buffer_addr = Pica::g_state.regs.framebuffer.GetDepthBufferPhysicalAddress(); - - if (depth_buffer_addr == 0) + if (cached_fb_depth_addr == 0) return; // TODO: Appears to work, but double-check endianness of depth values and order of depth-stencil - u8* depth_buffer = Memory::GetPhysicalPointer(depth_buffer_addr); + u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr); if (depth_buffer == nullptr) return; @@ -827,8 +1116,8 @@ Common::Profiling::TimingCategory buffer_commit_category("Framebuffer Commit"); MICROPROFILE_DEFINE(OpenGL_FramebufferCommit, "OpenGL", "FB Commit", MP_RGB(70, 70, 200)); void RasterizerOpenGL::CommitColorBuffer() { - if (last_fb_color_addr != 0) { - u8* color_buffer = Memory::GetPhysicalPointer(last_fb_color_addr); + if (cached_fb_color_addr != 0) { + u8* color_buffer = Memory::GetPhysicalPointer(cached_fb_color_addr); if (color_buffer != nullptr) { Common::Profiling::ScopeTimer timer(buffer_commit_category); @@ -863,9 +1152,9 @@ void RasterizerOpenGL::CommitColorBuffer() { } void RasterizerOpenGL::CommitDepthBuffer() { - if (last_fb_depth_addr != 0) { + if (cached_fb_depth_addr != 0) { // TODO: Output seems correct visually, but doesn't quite match sw renderer output. One of them is wrong. - u8* depth_buffer = Memory::GetPhysicalPointer(last_fb_depth_addr); + u8* depth_buffer = Memory::GetPhysicalPointer(cached_fb_depth_addr); if (depth_buffer != nullptr) { Common::Profiling::ScopeTimer timer(buffer_commit_category); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index c8a2d8f16..fef5f5331 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -17,6 +17,7 @@ #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/pica_to_gl.h" #include "video_core/shader/shader_interpreter.h" /** @@ -71,6 +72,59 @@ struct PicaShaderConfig { regs.tev_combiner_buffer_input.update_mask_rgb.Value() | regs.tev_combiner_buffer_input.update_mask_a.Value() << 4; + // Fragment lighting + + res.lighting.enable = !regs.lighting.disable; + res.lighting.src_num = regs.lighting.num_lights + 1; + + for (unsigned light_index = 0; light_index < res.lighting.src_num; ++light_index) { + unsigned num = regs.lighting.light_enable.GetNum(light_index); + const auto& light = regs.lighting.light[num]; + res.lighting.light[light_index].num = num; + res.lighting.light[light_index].directional = light.directional != 0; + res.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0; + res.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num); + res.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32(); + res.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32(); + } + + res.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0; + res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0; + res.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value(); + res.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); + + res.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0; + res.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0; + res.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value(); + res.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); + + res.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0; + res.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0; + res.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value(); + res.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); + + res.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0; + res.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0; + res.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value(); + res.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); + + res.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0; + res.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0; + res.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value(); + res.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); + + res.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0; + res.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0; + res.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value(); + res.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); + + res.lighting.config = regs.lighting.config; + res.lighting.fresnel_selector = regs.lighting.fresnel_selector; + res.lighting.bump_mode = regs.lighting.bump_mode; + res.lighting.bump_selector = regs.lighting.bump_selector; + res.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0; + res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0; + return res; } @@ -86,9 +140,37 @@ struct PicaShaderConfig { return std::memcmp(this, &o, sizeof(PicaShaderConfig)) == 0; }; - Pica::Regs::CompareFunc alpha_test_func; + Pica::Regs::CompareFunc alpha_test_func = Pica::Regs::CompareFunc::Never; std::array<Pica::Regs::TevStageConfig, 6> tev_stages = {}; - u8 combiner_buffer_input; + u8 combiner_buffer_input = 0; + + struct { + struct { + unsigned num = 0; + bool directional = false; + bool two_sided_diffuse = false; + bool dist_atten_enable = false; + GLfloat dist_atten_scale = 0.0f; + GLfloat dist_atten_bias = 0.0f; + } light[8]; + + bool enable = false; + unsigned src_num = 0; + Pica::Regs::LightingBumpMode bump_mode = Pica::Regs::LightingBumpMode::None; + unsigned bump_selector = 0; + bool bump_renorm = false; + bool clamp_highlights = false; + + Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0; + Pica::Regs::LightingFresnelSelector fresnel_selector = Pica::Regs::LightingFresnelSelector::None; + + struct { + bool enable = false; + bool abs_input = false; + Pica::Regs::LightingLutInput type = Pica::Regs::LightingLutInput::NH; + float scale = 1.0f; + } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb; + } lighting; }; namespace std { @@ -167,7 +249,7 @@ private: /// Structure that the hardware rendered vertices are composed of struct HardwareVertex { - HardwareVertex(const Pica::Shader::OutputVertex& v) { + HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) { position[0] = v.pos.x.ToFloat32(); position[1] = v.pos.y.ToFloat32(); position[2] = v.pos.z.ToFloat32(); @@ -182,6 +264,19 @@ private: tex_coord1[1] = v.tc1.y.ToFloat32(); tex_coord2[0] = v.tc2.x.ToFloat32(); tex_coord2[1] = v.tc2.y.ToFloat32(); + normquat[0] = v.quat.x.ToFloat32(); + normquat[1] = v.quat.y.ToFloat32(); + normquat[2] = v.quat.z.ToFloat32(); + normquat[3] = v.quat.w.ToFloat32(); + view[0] = v.view.x.ToFloat32(); + view[1] = v.view.y.ToFloat32(); + view[2] = v.view.z.ToFloat32(); + + if (flip_quaternion) { + for (float& x : normquat) { + x = -x; + } + } } GLfloat position[4]; @@ -189,20 +284,31 @@ private: GLfloat tex_coord0[2]; GLfloat tex_coord1[2]; GLfloat tex_coord2[2]; + GLfloat normquat[4]; + GLfloat view[3]; + }; + + struct LightSrc { + alignas(16) GLvec3 specular_0; + alignas(16) GLvec3 specular_1; + alignas(16) GLvec3 diffuse; + alignas(16) GLvec3 ambient; + alignas(16) GLvec3 position; }; /// Uniform structure for the Uniform Buffer Object, all members must be 16-byte aligned struct UniformData { // A vec4 color for each of the six tev stages - std::array<GLfloat, 4> const_color[6]; - std::array<GLfloat, 4> tev_combiner_buffer_color; + GLvec4 const_color[6]; + GLvec4 tev_combiner_buffer_color; GLint alphatest_ref; GLfloat depth_offset; - INSERT_PADDING_BYTES(8); + alignas(16) GLvec3 lighting_global_ambient; + LightSrc light_src[8]; }; - static_assert(sizeof(UniformData) == 0x80, "The size of the UniformData structure has changed, update the structure in the shader"); - static_assert(sizeof(UniformData) < 16000, "UniformData structure must be less than 16kb as per the OpenGL spec"); + static_assert(sizeof(UniformData) == 0x310, "The size of the UniformData structure has changed, update the structure in the shader"); + static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); /// Reconfigure the OpenGL color texture to use the given format and dimensions void ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height); @@ -249,6 +355,27 @@ private: /// Syncs the TEV combiner color buffer to match the PICA register void SyncCombinerColor(); + /// Syncs the lighting global ambient color to match the PICA register + void SyncGlobalAmbient(); + + /// Syncs the lighting lookup tables + void SyncLightingLUT(unsigned index); + + /// Syncs the specified light's diffuse color to match the PICA register + void SyncLightDiffuse(int light_index); + + /// Syncs the specified light's ambient color to match the PICA register + void SyncLightAmbient(int light_index); + + /// Syncs the specified light's position to match the PICA register + void SyncLightPosition(int light_index); + + /// Syncs the specified light's specular 0 color to match the PICA register + void SyncLightSpecular0(int light_index); + + /// Syncs the specified light's specular 1 color to match the PICA register + void SyncLightSpecular1(int light_index); + /// Syncs the remaining OpenGL drawing state to match the current PICA state void SyncDrawState(); @@ -278,8 +405,8 @@ private: OpenGLState state; - PAddr last_fb_color_addr; - PAddr last_fb_depth_addr; + PAddr cached_fb_color_addr; + PAddr cached_fb_depth_addr; // Hardware rasterizer std::array<SamplerInfo, 3> texture_samplers; @@ -291,6 +418,7 @@ private: struct { UniformData data; + bool lut_dirty[6]; bool dirty; } uniform_block_data; @@ -298,4 +426,7 @@ private: OGLBuffer vertex_buffer; OGLBuffer uniform_buffer; OGLFramebuffer framebuffer; + + std::array<OGLTexture, 6> lighting_lut; + std::array<std::array<GLvec4, 256>, 6> lighting_lut_data; }; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 22022f7f4..ee4b54ab9 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -32,12 +32,10 @@ static void AppendSource(std::string& out, TevStageConfig::Source source, out += "primary_color"; break; case Source::PrimaryFragmentColor: - // HACK: Until we implement fragment lighting, use primary_color - out += "primary_color"; + out += "primary_fragment_color"; break; case Source::SecondaryFragmentColor: - // HACK: Until we implement fragment lighting, use zero - out += "vec4(0.0)"; + out += "secondary_fragment_color"; break; case Source::Texture0: out += "texture(tex[0], texcoord[0])"; @@ -320,26 +318,229 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi out += "next_combiner_buffer.a = last_tex_env_out.a;\n"; } +/// Writes the code to emulate fragment lighting +static void WriteLighting(std::string& out, const PicaShaderConfig& config) { + // Define lighting globals + out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" + "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" + "vec3 light_vector = vec3(0.0);\n" + "vec3 refl_value = vec3(0.0);\n"; + + // Compute fragment normals + if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) { + // Bump mapping is enabled using a normal map, read perturbation vector from the selected texture + std::string bump_selector = std::to_string(config.lighting.bump_selector); + out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n"; + + // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result + if (config.lighting.bump_renorm) { + std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; + out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n"; + } + } else if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) { + // Bump mapping is enabled using a tangent map + LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)"); + UNIMPLEMENTED(); + } else { + // No bump mapping - surface local normal is just a unit normal + out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n"; + } + + // Rotate the surface-local normal by the interpolated normal quaternion to convert it to eyespace + out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n"; + + // Gets the index into the specified lookup table for specular lighting + auto GetLutIndex = [config](unsigned light_num, Regs::LightingLutInput input, bool abs) { + const std::string half_angle = "normalize(normalize(view) + light_vector)"; + std::string index; + switch (input) { + case Regs::LightingLutInput::NH: + index = "dot(normal, " + half_angle + ")"; + break; + + case Regs::LightingLutInput::VH: + index = std::string("dot(normalize(view), " + half_angle + ")"); + break; + + case Regs::LightingLutInput::NV: + index = std::string("dot(normal, normalize(view))"); + break; + + case Regs::LightingLutInput::LN: + index = std::string("dot(light_vector, normal)"); + break; + + default: + LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %d\n", (int)input); + UNIMPLEMENTED(); + break; + } + + if (abs) { + // LUT index is in the range of (0.0, 1.0) + index = config.lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; + return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))"; + } else { + // LUT index is in the range of (-1.0, 1.0) + index = "clamp(" + index + ", -1.0, 1.0)"; + return "(FLOAT_255 * ((" + index + " < 0) ? " + index + " + 2.0 : " + index + ") / 2.0)"; + } + + return std::string(); + }; + + // Gets the lighting lookup table value given the specified sampler and index + auto GetLutValue = [](Regs::LightingSampler sampler, std::string lut_index) { + return std::string("texture(lut[" + std::to_string((unsigned)sampler / 4) + "], " + + lut_index + ")[" + std::to_string((unsigned)sampler & 3) + "]"); + }; + + // Write the code to emulate each enabled light + for (unsigned light_index = 0; light_index < config.lighting.src_num; ++light_index) { + const auto& light_config = config.lighting.light[light_index]; + std::string light_src = "light_src[" + std::to_string(light_config.num) + "]"; + + // Compute light vector (directional or positional) + if (light_config.directional) + out += "light_vector = normalize(" + light_src + ".position);\n"; + else + out += "light_vector = normalize(" + light_src + ".position + view);\n"; + + // Compute dot product of light_vector and normal, adjust if lighting is one-sided or two-sided + std::string dot_product = light_config.two_sided_diffuse ? "abs(dot(light_vector, normal))" : "max(dot(light_vector, normal), 0.0)"; + + // If enabled, compute distance attenuation value + std::string dist_atten = "1.0"; + if (light_config.dist_atten_enable) { + std::string scale = std::to_string(light_config.dist_atten_scale); + std::string bias = std::to_string(light_config.dist_atten_bias); + std::string index = "(" + scale + " * length(-view - " + light_src + ".position) + " + bias + ")"; + index = "((clamp(" + index + ", 0.0, FLOAT_255)))"; + const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + light_config.num); + dist_atten = GetLutValue((Regs::LightingSampler)lut_num, index); + } + + // If enabled, clamp specular component if lighting result is negative + std::string clamp_highlights = config.lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; + + // Specular 0 component + std::string d0_lut_value = "1.0"; + if (config.lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution0)) { + // Lookup specular "distribution 0" LUT value + std::string index = GetLutIndex(light_config.num, config.lighting.lut_d0.type, config.lighting.lut_d0.abs_input); + d0_lut_value = "(" + std::to_string(config.lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")"; + } + std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)"; + + // If enabled, lookup ReflectRed value, otherwise, 1.0 is used + if (config.lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectRed)) { + std::string index = GetLutIndex(light_config.num, config.lighting.lut_rr.type, config.lighting.lut_rr.abs_input); + std::string value = "(" + std::to_string(config.lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")"; + out += "refl_value.r = " + value + ";\n"; + } else { + out += "refl_value.r = 1.0;\n"; + } + + // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used + if (config.lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) { + std::string index = GetLutIndex(light_config.num, config.lighting.lut_rg.type, config.lighting.lut_rg.abs_input); + std::string value = "(" + std::to_string(config.lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")"; + out += "refl_value.g = " + value + ";\n"; + } else { + out += "refl_value.g = refl_value.r;\n"; + } + + // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used + if (config.lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) { + std::string index = GetLutIndex(light_config.num, config.lighting.lut_rb.type, config.lighting.lut_rb.abs_input); + std::string value = "(" + std::to_string(config.lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")"; + out += "refl_value.b = " + value + ";\n"; + } else { + out += "refl_value.b = refl_value.r;\n"; + } + + // Specular 1 component + std::string d1_lut_value = "1.0"; + if (config.lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution1)) { + // Lookup specular "distribution 1" LUT value + std::string index = GetLutIndex(light_config.num, config.lighting.lut_d1.type, config.lighting.lut_d1.abs_input); + d1_lut_value = "(" + std::to_string(config.lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")"; + } + std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)"; + + // Fresnel + if (config.lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Fresnel)) { + // Lookup fresnel LUT value + std::string index = GetLutIndex(light_config.num, config.lighting.lut_fr.type, config.lighting.lut_fr.abs_input); + std::string value = "(" + std::to_string(config.lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")"; + + // Enabled for difffuse lighting alpha component + if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha || + config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) + out += "diffuse_sum.a *= " + value + ";\n"; + + // Enabled for the specular lighting alpha component + if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha || + config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) + out += "specular_sum.a *= " + value + ";\n"; + } + + // Compute primary fragment color (diffuse lighting) function + out += "diffuse_sum.rgb += ((" + light_src + ".diffuse * " + dot_product + ") + " + light_src + ".ambient) * " + dist_atten + ";\n"; + + // Compute secondary fragment color (specular lighting) function + out += "specular_sum.rgb += (" + specular_0 + " + " + specular_1 + ") * " + clamp_highlights + " * " + dist_atten + ";\n"; + } + + // Sum final lighting result + out += "diffuse_sum.rgb += lighting_global_ambient;\n"; + out += "primary_fragment_color = clamp(diffuse_sum, vec4(0.0), vec4(1.0));\n"; + out += "secondary_fragment_color = clamp(specular_sum, vec4(0.0), vec4(1.0));\n"; +} + std::string GenerateFragmentShader(const PicaShaderConfig& config) { std::string out = R"( #version 330 core #define NUM_TEV_STAGES 6 +#define NUM_LIGHTS 8 +#define LIGHTING_LUT_SIZE 256 +#define FLOAT_255 (255.0 / 256.0) in vec4 primary_color; in vec2 texcoord[3]; +in vec4 normquat; +in vec3 view; out vec4 color; +struct LightSrc { + vec3 specular_0; + vec3 specular_1; + vec3 diffuse; + vec3 ambient; + vec3 position; +}; + layout (std140) uniform shader_data { vec4 const_color[NUM_TEV_STAGES]; vec4 tev_combiner_buffer_color; int alphatest_ref; float depth_offset; + vec3 lighting_global_ambient; + LightSrc light_src[NUM_LIGHTS]; }; uniform sampler2D tex[3]; +uniform sampler1D lut[6]; + +// Rotate the vector v by the quaternion q +vec3 quaternion_rotate(vec4 q, vec3 v) { + return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v); +} void main() { +vec4 primary_fragment_color = vec4(0.0); +vec4 secondary_fragment_color = vec4(0.0); )"; // Do not do any sort of processing if it's obvious we're not going to pass the alpha test @@ -348,6 +549,9 @@ void main() { return out; } + if (config.lighting.enable) + WriteLighting(out, config); + out += "vec4 combiner_buffer = vec4(0.0);\n"; out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n"; out += "vec4 last_tex_env_out = vec4(0.0);\n"; @@ -369,21 +573,28 @@ void main() { std::string GenerateVertexShader() { std::string out = "#version 330 core\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n"; out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n"; out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n"; out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n"; out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n"; out += R"( out vec4 primary_color; out vec2 texcoord[3]; +out vec4 normquat; +out vec3 view; void main() { primary_color = vert_color; texcoord[0] = vert_texcoord0; texcoord[1] = vert_texcoord1; texcoord[2] = vert_texcoord2; + normquat = vert_normquat; + view = vert_view; gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w); } )"; diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index 046aae14f..097242f6f 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -14,6 +14,8 @@ enum Attributes { ATTRIBUTE_TEXCOORD0, ATTRIBUTE_TEXCOORD1, ATTRIBUTE_TEXCOORD2, + ATTRIBUTE_NORMQUAT, + ATTRIBUTE_VIEW, }; /** diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index a82372995..08e4d0b54 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -48,6 +48,10 @@ OpenGLState::OpenGLState() { texture_unit.sampler = 0; } + for (auto& lut : lighting_lut) { + lut.texture_1d = 0; + } + draw.framebuffer = 0; draw.vertex_array = 0; draw.vertex_buffer = 0; @@ -170,6 +174,14 @@ void OpenGLState::Apply() { } } + // Lighting LUTs + for (unsigned i = 0; i < ARRAY_SIZE(lighting_lut); ++i) { + if (lighting_lut[i].texture_1d != cur_state.lighting_lut[i].texture_1d) { + glActiveTexture(GL_TEXTURE3 + i); + glBindTexture(GL_TEXTURE_1D, lighting_lut[i].texture_1d); + } + } + // Framebuffer if (draw.framebuffer != cur_state.draw.framebuffer) { glBindFramebuffer(GL_FRAMEBUFFER, draw.framebuffer); diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index b8ab45bb8..e848058d7 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -62,6 +62,10 @@ public: } texture_units[3]; struct { + GLuint texture_1d; // GL_TEXTURE_BINDING_1D + } lighting_lut[6]; + + struct { GLuint framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING GLuint vertex_buffer; // GL_ARRAY_BUFFER_BINDING diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index 04c1d1a34..3d6c4e9e5 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -10,6 +10,9 @@ #include "video_core/pica.h" +using GLvec3 = std::array<GLfloat, 3>; +using GLvec4 = std::array<GLfloat, 4>; + namespace PicaToGL { inline GLenum TextureFilterMode(Pica::Regs::TextureConfig::TextureFilter mode) { @@ -175,7 +178,7 @@ inline GLenum StencilOp(Pica::Regs::StencilAction action) { return stencil_op_table[(unsigned)action]; } -inline std::array<GLfloat, 4> ColorRGBA8(const u32 color) { +inline GLvec4 ColorRGBA8(const u32 color) { return { { (color >> 0 & 0xFF) / 255.0f, (color >> 8 & 0xFF) / 255.0f, (color >> 16 & 0xFF) / 255.0f, @@ -183,4 +186,11 @@ inline std::array<GLfloat, 4> ColorRGBA8(const u32 color) { } }; } +inline std::array<GLfloat, 3> LightColor(const Pica::Regs::LightColor& color) { + return { { color.r / 255.0f, + color.g / 255.0f, + color.b / 255.0f + } }; +} + } // namespace diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index a6a38f0af..ca3a6a6b4 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -81,8 +81,8 @@ struct ScreenRectVertex { * The projection part of the matrix is trivial, hence these operations are represented * by a 3x2 matrix. */ -static std::array<GLfloat, 3*2> MakeOrthographicMatrix(const float width, const float height) { - std::array<GLfloat, 3*2> matrix; +static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, const float height) { + std::array<GLfloat, 3 * 2> matrix; matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f; diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 59f54236b..44c234ed8 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -134,11 +134,13 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); } - LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), quat (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", + LOG_TRACE(Render_Software, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), " + "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)", ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), - ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); + ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), + ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32()); return ret; } diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 1c6fa592c..f068cd93f 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -37,17 +37,19 @@ struct OutputVertex { Math::Vec4<float24> color; Math::Vec2<float24> tc0; Math::Vec2<float24> tc1; - float24 pad[6]; + INSERT_PADDING_WORDS(2); + Math::Vec3<float24> view; + INSERT_PADDING_WORDS(1); Math::Vec2<float24> tc2; // Padding for optimal alignment - float24 pad2[4]; + INSERT_PADDING_WORDS(4); // Attributes used to store intermediate results // position after perspective divide Math::Vec3<float24> screenpos; - float24 pad3; + INSERT_PADDING_WORDS(1); // Linear interpolation // factor: 0=this, 1=vtx |