diff options
-rw-r--r-- | appveyor.yml | 28 | ||||
-rw-r--r-- | src/common/logging/log.h | 2 | ||||
-rw-r--r-- | src/core/arm/arm_interface.h | 9 | ||||
-rw-r--r-- | src/core/arm/dyncom/arm_dyncom.cpp | 10 | ||||
-rw-r--r-- | src/core/arm/dyncom/arm_dyncom.h | 57 | ||||
-rw-r--r-- | src/core/arm/skyeye_common/vfp/vfp.cpp | 4 | ||||
-rw-r--r-- | src/core/arm/skyeye_common/vfp/vfp_helper.h | 654 | ||||
-rw-r--r-- | src/core/arm/skyeye_common/vfp/vfpdouble.cpp | 187 | ||||
-rw-r--r-- | src/core/arm/skyeye_common/vfp/vfpsingle.cpp | 54 | ||||
-rw-r--r-- | src/core/hle/kernel/kernel.cpp | 6 | ||||
-rw-r--r-- | src/core/hle/kernel/mutex.cpp | 33 | ||||
-rw-r--r-- | src/core/hle/kernel/mutex.h | 3 | ||||
-rw-r--r-- | src/core/hle/kernel/thread.cpp | 360 | ||||
-rw-r--r-- | src/core/hle/kernel/thread.h | 115 | ||||
-rw-r--r-- | src/core/hle/svc.cpp | 22 |
15 files changed, 648 insertions, 896 deletions
diff --git a/appveyor.yml b/appveyor.yml index 06c9a7909..c9edb9e19 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -2,28 +2,28 @@ os: unstable # shallow clone -clone_depth: 1 +clone_depth: 5 environment: - QTDIR: C:\Qt\5.4\msvc2013_opengl + QTDIR: C:\Qt\5.4\msvc2013_64_opengl MEGA_EMAIL: secure: rEo9CGAYX87GKTqZCZ9vLCNCNqxO5JLgbERaHF3YJWg= MEGA_PASSWORD: secure: zE1zmgjS/6GfN/19ROl/O0fVR58svORQ5gdtsxI7J8k= platform: - - Win32 + - x64 configuration: - Release install: - - git submodule update --init --recursive + - git submodule update --init --recursive --depth 20 before_build: - mkdir build - cd build - - cmake .. + - cmake -G "Visual Studio 12 Win64" .. - cd .. after_build: @@ -32,16 +32,16 @@ after_build: - wget -q http://megatools.megous.com/builds/megatools-1.9.94-win64.zip # extract megatools silently. See http://stackoverflow.com/a/11629736/1748450 - 7z x megatools-1.9.94-win64.zip | FIND /V "ing " - # copy the qt dlls - - copy C:\Qt\5.4\msvc2013_opengl\bin\icudt53.dll build\bin\release - - copy C:\Qt\5.4\msvc2013_opengl\bin\icuin53.dll build\bin\release - - copy C:\Qt\5.4\msvc2013_opengl\bin\icuuc53.dll build\bin\release - - copy C:\Qt\5.4\msvc2013_opengl\bin\Qt5Core.dll build\bin\release - - copy C:\Qt\5.4\msvc2013_opengl\bin\Qt5Gui.dll build\bin\release - - copy C:\Qt\5.4\msvc2013_opengl\bin\Qt5OpenGL.dll build\bin\release - - copy C:\Qt\5.4\msvc2013_opengl\bin\Qt5Widgets.dll build\bin\release + # copy the qt dlls + - copy C:\Qt\5.4\msvc2013_64_opengl\bin\icudt53.dll build\bin\release + - copy C:\Qt\5.4\msvc2013_64_opengl\bin\icuin53.dll build\bin\release + - copy C:\Qt\5.4\msvc2013_64_opengl\bin\icuuc53.dll build\bin\release + - copy C:\Qt\5.4\msvc2013_64_opengl\bin\Qt5Core.dll build\bin\release + - copy C:\Qt\5.4\msvc2013_64_opengl\bin\Qt5Gui.dll build\bin\release + - copy C:\Qt\5.4\msvc2013_64_opengl\bin\Qt5OpenGL.dll build\bin\release + - copy C:\Qt\5.4\msvc2013_64_opengl\bin\Qt5Widgets.dll build\bin\release - mkdir build\bin\release\platforms\ - - copy C:\Qt\5.4\msvc2013_opengl\plugins\platforms\qwindows.dll build\bin\release\platforms + - copy C:\Qt\5.4\msvc2013_64_opengl\plugins\platforms\qwindows.dll build\bin\release\platforms # zip up the build folder -> build.7z - 7z a build .\build\bin\release\* # rename, upload to Mega diff --git a/src/common/logging/log.h b/src/common/logging/log.h index 3d94bf0d9..897ef36b8 100644 --- a/src/common/logging/log.h +++ b/src/common/logging/log.h @@ -32,7 +32,7 @@ typedef u8 ClassType; /** * Specifies the sub-system that generated the log message. * - * @note If you add a new entry here, also add a corresponding one to `ALL_LOG_CLASSES` in log.cpp. + * @note If you add a new entry here, also add a corresponding one to `ALL_LOG_CLASSES` in backend.cpp. */ enum class Class : ClassType { Log, ///< Messages about the log system itself diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h index e612f7439..ef37ee055 100644 --- a/src/core/arm/arm_interface.h +++ b/src/core/arm/arm_interface.h @@ -86,6 +86,15 @@ public: virtual void AddTicks(u64 ticks) = 0; /** + * Initializes a CPU context for use on this CPU + * @param context Thread context to reset + * @param stack_top Pointer to the top of the stack + * @param entry_point Entry point for execution + * @param arg User argument for thread + */ + virtual void ResetContext(Core::ThreadContext& context, u32 stack_top, u32 entry_point, u32 arg) = 0; + + /** * Saves the current CPU context * @param ctx Thread context to save */ diff --git a/src/core/arm/dyncom/arm_dyncom.cpp b/src/core/arm/dyncom/arm_dyncom.cpp index f6628ca33..68fddc94f 100644 --- a/src/core/arm/dyncom/arm_dyncom.cpp +++ b/src/core/arm/dyncom/arm_dyncom.cpp @@ -93,6 +93,16 @@ void ARM_DynCom::ExecuteInstructions(int num_instructions) { AddTicks(ticks_executed); } +void ARM_DynCom::ResetContext(Core::ThreadContext& context, u32 stack_top, u32 entry_point, u32 arg) { + memset(&context, 0, sizeof(Core::ThreadContext)); + + context.cpu_registers[0] = arg; + context.pc = entry_point; + context.sp = stack_top; + context.cpsr = 0x1F; // Usermode + context.mode = 8; // Instructs dyncom CPU core to start execution as if it's "resuming" a thread. +} + void ARM_DynCom::SaveContext(Core::ThreadContext& ctx) { memcpy(ctx.cpu_registers, state->Reg, sizeof(ctx.cpu_registers)); memcpy(ctx.fpu_registers, state->ExtReg, sizeof(ctx.fpu_registers)); diff --git a/src/core/arm/dyncom/arm_dyncom.h b/src/core/arm/dyncom/arm_dyncom.h index f16fb070c..9e2dda843 100644 --- a/src/core/arm/dyncom/arm_dyncom.h +++ b/src/core/arm/dyncom/arm_dyncom.h @@ -13,79 +13,24 @@ class ARM_DynCom final : virtual public ARM_Interface { public: - ARM_DynCom(); ~ARM_DynCom(); - /** - * Set the Program Counter to an address - * @param pc Address to set PC to - */ void SetPC(u32 pc) override; - - /* - * Get the current Program Counter - * @return Returns current PC - */ u32 GetPC() const override; - - /** - * Get an ARM register - * @param index Register index (0-15) - * @return Returns the value in the register - */ u32 GetReg(int index) const override; - - /** - * Set an ARM register - * @param index Register index (0-15) - * @param value Value to set register to - */ void SetReg(int index, u32 value) override; - - /** - * Get the current CPSR register - * @return Returns the value of the CPSR register - */ u32 GetCPSR() const override; - - /** - * Set the current CPSR register - * @param cpsr Value to set CPSR to - */ void SetCPSR(u32 cpsr) override; - /** - * Returns the number of clock ticks since the last reset - * @return Returns number of clock ticks - */ u64 GetTicks() const override; - - /** - * Advance the CPU core by the specified number of ticks (e.g. to simulate CPU execution time) - * @param ticks Number of ticks to advance the CPU core - */ void AddTicks(u64 ticks) override; - /** - * Saves the current CPU context - * @param ctx Thread context to save - */ + void ResetContext(Core::ThreadContext& context, u32 stack_top, u32 entry_point, u32 arg); void SaveContext(Core::ThreadContext& ctx) override; - - /** - * Loads a CPU context - * @param ctx Thread context to load - */ void LoadContext(const Core::ThreadContext& ctx) override; - /// Prepare core for thread reschedule (if needed to correctly handle state) void PrepareReschedule() override; - - /** - * Executes the given number of instructions - * @param num_instructions Number of instructions to executes - */ void ExecuteInstructions(int num_instructions) override; private: diff --git a/src/core/arm/skyeye_common/vfp/vfp.cpp b/src/core/arm/skyeye_common/vfp/vfp.cpp index 888709124..1cf146c53 100644 --- a/src/core/arm/skyeye_common/vfp/vfp.cpp +++ b/src/core/arm/skyeye_common/vfp/vfp.cpp @@ -773,8 +773,8 @@ void vfp_raise_exceptions(ARMul_State* state, u32 exceptions, u32 inst, u32 fpsc * Comparison instructions always return at least one of * these flags set. */ - if (exceptions & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V)) - fpscr &= ~(FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V); + if (exceptions & (FPSCR_NFLAG|FPSCR_ZFLAG|FPSCR_CFLAG|FPSCR_VFLAG)) + fpscr &= ~(FPSCR_NFLAG|FPSCR_ZFLAG|FPSCR_CFLAG|FPSCR_VFLAG); fpscr |= exceptions; diff --git a/src/core/arm/skyeye_common/vfp/vfp_helper.h b/src/core/arm/skyeye_common/vfp/vfp_helper.h index 581f0358f..b68090b80 100644 --- a/src/core/arm/skyeye_common/vfp/vfp_helper.h +++ b/src/core/arm/skyeye_common/vfp/vfp_helper.h @@ -45,444 +45,400 @@ #define do_div(n, base) {n/=base;} -/* From vfpinstr.h */ - -#define INST_CPRTDO(inst) (((inst) & 0x0f000000) == 0x0e000000) -#define INST_CPRT(inst) ((inst) & (1 << 4)) -#define INST_CPRT_L(inst) ((inst) & (1 << 20)) -#define INST_CPRT_Rd(inst) (((inst) & (15 << 12)) >> 12) -#define INST_CPRT_OP(inst) (((inst) >> 21) & 7) -#define INST_CPNUM(inst) ((inst) & 0xf00) -#define CPNUM(cp) ((cp) << 8) - -#define FOP_MASK (0x00b00040) -#define FOP_FMAC (0x00000000) -#define FOP_FNMAC (0x00000040) -#define FOP_FMSC (0x00100000) -#define FOP_FNMSC (0x00100040) -#define FOP_FMUL (0x00200000) -#define FOP_FNMUL (0x00200040) -#define FOP_FADD (0x00300000) -#define FOP_FSUB (0x00300040) -#define FOP_FDIV (0x00800000) -#define FOP_EXT (0x00b00040) - -#define FOP_TO_IDX(inst) ((inst & 0x00b00000) >> 20 | (inst & (1 << 6)) >> 4) - -#define FEXT_MASK (0x000f0080) -#define FEXT_FCPY (0x00000000) -#define FEXT_FABS (0x00000080) -#define FEXT_FNEG (0x00010000) -#define FEXT_FSQRT (0x00010080) -#define FEXT_FCMP (0x00040000) -#define FEXT_FCMPE (0x00040080) -#define FEXT_FCMPZ (0x00050000) -#define FEXT_FCMPEZ (0x00050080) -#define FEXT_FCVT (0x00070080) -#define FEXT_FUITO (0x00080000) -#define FEXT_FSITO (0x00080080) -#define FEXT_FTOUI (0x000c0000) -#define FEXT_FTOUIZ (0x000c0080) -#define FEXT_FTOSI (0x000d0000) -#define FEXT_FTOSIZ (0x000d0080) - -#define FEXT_TO_IDX(inst) ((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7) - -#define vfp_get_sd(inst) ((inst & 0x0000f000) >> 11 | (inst & (1 << 22)) >> 22) -#define vfp_get_dd(inst) ((inst & 0x0000f000) >> 12 | (inst & (1 << 22)) >> 18) -#define vfp_get_sm(inst) ((inst & 0x0000000f) << 1 | (inst & (1 << 5)) >> 5) -#define vfp_get_dm(inst) ((inst & 0x0000000f) | (inst & (1 << 5)) >> 1) -#define vfp_get_sn(inst) ((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7) -#define vfp_get_dn(inst) ((inst & 0x000f0000) >> 16 | (inst & (1 << 7)) >> 3) - -#define vfp_single(inst) (((inst) & 0x0000f00) == 0xa00) - -#define FPSCR_N (1 << 31) -#define FPSCR_Z (1 << 30) -#define FPSCR_C (1 << 29) -#define FPSCR_V (1 << 28) +enum : u32 { + FOP_MASK = 0x00b00040, + FOP_FMAC = 0x00000000, + FOP_FNMAC = 0x00000040, + FOP_FMSC = 0x00100000, + FOP_FNMSC = 0x00100040, + FOP_FMUL = 0x00200000, + FOP_FNMUL = 0x00200040, + FOP_FADD = 0x00300000, + FOP_FSUB = 0x00300040, + FOP_FDIV = 0x00800000, + FOP_EXT = 0x00b00040 +}; + +#define FOP_TO_IDX(inst) ((inst & 0x00b00000) >> 20 | (inst & (1 << 6)) >> 4) + +enum : u32 { + FEXT_MASK = 0x000f0080, + FEXT_FCPY = 0x00000000, + FEXT_FABS = 0x00000080, + FEXT_FNEG = 0x00010000, + FEXT_FSQRT = 0x00010080, + FEXT_FCMP = 0x00040000, + FEXT_FCMPE = 0x00040080, + FEXT_FCMPZ = 0x00050000, + FEXT_FCMPEZ = 0x00050080, + FEXT_FCVT = 0x00070080, + FEXT_FUITO = 0x00080000, + FEXT_FSITO = 0x00080080, + FEXT_FTOUI = 0x000c0000, + FEXT_FTOUIZ = 0x000c0080, + FEXT_FTOSI = 0x000d0000, + FEXT_FTOSIZ = 0x000d0080 +}; + +#define FEXT_TO_IDX(inst) ((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7) + +#define vfp_get_sd(inst) ((inst & 0x0000f000) >> 11 | (inst & (1 << 22)) >> 22) +#define vfp_get_dd(inst) ((inst & 0x0000f000) >> 12 | (inst & (1 << 22)) >> 18) +#define vfp_get_sm(inst) ((inst & 0x0000000f) << 1 | (inst & (1 << 5)) >> 5) +#define vfp_get_dm(inst) ((inst & 0x0000000f) | (inst & (1 << 5)) >> 1) +#define vfp_get_sn(inst) ((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7) +#define vfp_get_dn(inst) ((inst & 0x000f0000) >> 16 | (inst & (1 << 7)) >> 3) + +#define vfp_single(inst) (((inst) & 0x0000f00) == 0xa00) static inline u32 vfp_shiftright32jamming(u32 val, unsigned int shift) { - if (shift) { - if (shift < 32) - val = val >> shift | ((val << (32 - shift)) != 0); - else - val = val != 0; - } - return val; + if (shift) { + if (shift < 32) + val = val >> shift | ((val << (32 - shift)) != 0); + else + val = val != 0; + } + return val; } static inline u64 vfp_shiftright64jamming(u64 val, unsigned int shift) { - if (shift) { - if (shift < 64) - val = val >> shift | ((val << (64 - shift)) != 0); - else - val = val != 0; - } - return val; + if (shift) { + if (shift < 64) + val = val >> shift | ((val << (64 - shift)) != 0); + else + val = val != 0; + } + return val; } static inline u32 vfp_hi64to32jamming(u64 val) { - u32 v; - u32 highval = val >> 32; - u32 lowval = val & 0xffffffff; + u32 v; + u32 highval = val >> 32; + u32 lowval = val & 0xffffffff; - if (lowval >= 1) - v = highval | 1; - else - v = highval; + if (lowval >= 1) + v = highval | 1; + else + v = highval; - return v; + return v; } -static inline void add128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml) +static inline void add128(u64* resh, u64* resl, u64 nh, u64 nl, u64 mh, u64 ml) { - *resl = nl + ml; - *resh = nh + mh; - if (*resl < nl) - *resh += 1; + *resl = nl + ml; + *resh = nh + mh; + if (*resl < nl) + *resh += 1; } -static inline void sub128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml) +static inline void sub128(u64* resh, u64* resl, u64 nh, u64 nl, u64 mh, u64 ml) { - *resl = nl - ml; - *resh = nh - mh; - if (*resl > nl) - *resh -= 1; + *resl = nl - ml; + *resh = nh - mh; + if (*resl > nl) + *resh -= 1; } -static inline void mul64to128(u64 *resh, u64 *resl, u64 n, u64 m) +static inline void mul64to128(u64* resh, u64* resl, u64 n, u64 m) { - u32 nh, nl, mh, ml; - u64 rh, rma, rmb, rl; + u32 nh, nl, mh, ml; + u64 rh, rma, rmb, rl; - nl = n; - ml = m; - rl = (u64)nl * ml; + nl = n; + ml = m; + rl = (u64)nl * ml; - nh = n >> 32; - rma = (u64)nh * ml; + nh = n >> 32; + rma = (u64)nh * ml; - mh = m >> 32; - rmb = (u64)nl * mh; - rma += rmb; + mh = m >> 32; + rmb = (u64)nl * mh; + rma += rmb; - rh = (u64)nh * mh; - rh += ((u64)(rma < rmb) << 32) + (rma >> 32); + rh = (u64)nh * mh; + rh += ((u64)(rma < rmb) << 32) + (rma >> 32); - rma <<= 32; - rl += rma; - rh += (rl < rma); + rma <<= 32; + rl += rma; + rh += (rl < rma); - *resl = rl; - *resh = rh; + *resl = rl; + *resh = rh; } -static inline void shift64left(u64 *resh, u64 *resl, u64 n) +static inline void shift64left(u64* resh, u64* resl, u64 n) { - *resh = n >> 63; - *resl = n << 1; + *resh = n >> 63; + *resl = n << 1; } static inline u64 vfp_hi64multiply64(u64 n, u64 m) { - u64 rh, rl; - mul64to128(&rh, &rl, n, m); - return rh | (rl != 0); + u64 rh, rl; + mul64to128(&rh, &rl, n, m); + return rh | (rl != 0); } static inline u64 vfp_estimate_div128to64(u64 nh, u64 nl, u64 m) { - u64 mh, ml, remh, reml, termh, terml, z; - - if (nh >= m) - return ~0ULL; - mh = m >> 32; - if (mh << 32 <= nh) { - z = 0xffffffff00000000ULL; - } else { - z = nh; - do_div(z, mh); - z <<= 32; - } - mul64to128(&termh, &terml, m, z); - sub128(&remh, &reml, nh, nl, termh, terml); - ml = m << 32; - while ((s64)remh < 0) { - z -= 0x100000000ULL; - add128(&remh, &reml, remh, reml, mh, ml); - } - remh = (remh << 32) | (reml >> 32); - if (mh << 32 <= remh) { - z |= 0xffffffff; - } else { - do_div(remh, mh); - z |= remh; - } - return z; + u64 mh, ml, remh, reml, termh, terml, z; + + if (nh >= m) + return ~0ULL; + mh = m >> 32; + if (mh << 32 <= nh) { + z = 0xffffffff00000000ULL; + } else { + z = nh; + do_div(z, mh); + z <<= 32; + } + mul64to128(&termh, &terml, m, z); + sub128(&remh, &reml, nh, nl, termh, terml); + ml = m << 32; + while ((s64)remh < 0) { + z -= 0x100000000ULL; + add128(&remh, &reml, remh, reml, mh, ml); + } + remh = (remh << 32) | (reml >> 32); + if (mh << 32 <= remh) { + z |= 0xffffffff; + } else { + do_div(remh, mh); + z |= remh; + } + return z; } -/* - * Operations on unpacked elements - */ -#define vfp_sign_negate(sign) (sign ^ 0x8000) +// Operations on unpacked elements +#define vfp_sign_negate(sign) (sign ^ 0x8000) -/* - * Single-precision - */ +// Single-precision struct vfp_single { - s16 exponent; - u16 sign; - u32 significand; + s16 exponent; + u16 sign; + u32 significand; }; -/* - * VFP_SINGLE_MANTISSA_BITS - number of bits in the mantissa - * VFP_SINGLE_EXPONENT_BITS - number of bits in the exponent - * VFP_SINGLE_LOW_BITS - number of low bits in the unpacked significand - * which are not propagated to the float upon packing. - */ -#define VFP_SINGLE_MANTISSA_BITS (23) -#define VFP_SINGLE_EXPONENT_BITS (8) -#define VFP_SINGLE_LOW_BITS (32 - VFP_SINGLE_MANTISSA_BITS - 2) -#define VFP_SINGLE_LOW_BITS_MASK ((1 << VFP_SINGLE_LOW_BITS) - 1) +// VFP_SINGLE_MANTISSA_BITS - number of bits in the mantissa +// VFP_SINGLE_EXPONENT_BITS - number of bits in the exponent +// VFP_SINGLE_LOW_BITS - number of low bits in the unpacked significand +// which are not propagated to the float upon packing. +#define VFP_SINGLE_MANTISSA_BITS (23) +#define VFP_SINGLE_EXPONENT_BITS (8) +#define VFP_SINGLE_LOW_BITS (32 - VFP_SINGLE_MANTISSA_BITS - 2) +#define VFP_SINGLE_LOW_BITS_MASK ((1 << VFP_SINGLE_LOW_BITS) - 1) -/* - * The bit in an unpacked float which indicates that it is a quiet NaN - */ +// The bit in an unpacked float which indicates that it is a quiet NaN #define VFP_SINGLE_SIGNIFICAND_QNAN (1 << (VFP_SINGLE_MANTISSA_BITS - 1 + VFP_SINGLE_LOW_BITS)) -/* - * Operations on packed single-precision numbers - */ -#define vfp_single_packed_sign(v) ((v) & 0x80000000) -#define vfp_single_packed_negate(v) ((v) ^ 0x80000000) -#define vfp_single_packed_abs(v) ((v) & ~0x80000000) -#define vfp_single_packed_exponent(v) (((v) >> VFP_SINGLE_MANTISSA_BITS) & ((1 << VFP_SINGLE_EXPONENT_BITS) - 1)) -#define vfp_single_packed_mantissa(v) ((v) & ((1 << VFP_SINGLE_MANTISSA_BITS) - 1)) - -/* - * Unpack a single-precision float. Note that this returns the magnitude - * of the single-precision float mantissa with the 1. if necessary, - * aligned to bit 30. - */ -static inline void vfp_single_unpack(struct vfp_single *s, s32 val) +// Operations on packed single-precision numbers +#define vfp_single_packed_sign(v) ((v) & 0x80000000) +#define vfp_single_packed_negate(v) ((v) ^ 0x80000000) +#define vfp_single_packed_abs(v) ((v) & ~0x80000000) +#define vfp_single_packed_exponent(v) (((v) >> VFP_SINGLE_MANTISSA_BITS) & ((1 << VFP_SINGLE_EXPONENT_BITS) - 1)) +#define vfp_single_packed_mantissa(v) ((v) & ((1 << VFP_SINGLE_MANTISSA_BITS) - 1)) + +// Unpack a single-precision float. Note that this returns the magnitude +// of the single-precision float mantissa with the 1. if necessary, +// aligned to bit 30. +static inline void vfp_single_unpack(vfp_single* s, s32 val) { - u32 significand; + u32 significand; - s->sign = vfp_single_packed_sign(val) >> 16, - s->exponent = vfp_single_packed_exponent(val); + s->sign = vfp_single_packed_sign(val) >> 16, + s->exponent = vfp_single_packed_exponent(val); - significand = (u32) val; - significand = (significand << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2; - if (s->exponent && s->exponent != 255) - significand |= 0x40000000; - s->significand = significand; + significand = (u32) val; + significand = (significand << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2; + if (s->exponent && s->exponent != 255) + significand |= 0x40000000; + s->significand = significand; } -/* - * Re-pack a single-precision float. This assumes that the float is - * already normalised such that the MSB is bit 30, _not_ bit 31. - */ -static inline s32 vfp_single_pack(struct vfp_single *s) +// Re-pack a single-precision float. This assumes that the float is +// already normalised such that the MSB is bit 30, _not_ bit 31. +static inline s32 vfp_single_pack(vfp_single* s) { - u32 val; - val = (s->sign << 16) + - (s->exponent << VFP_SINGLE_MANTISSA_BITS) + - (s->significand >> VFP_SINGLE_LOW_BITS); - return (s32)val; + u32 val = (s->sign << 16) + + (s->exponent << VFP_SINGLE_MANTISSA_BITS) + + (s->significand >> VFP_SINGLE_LOW_BITS); + return (s32)val; } -#define VFP_NUMBER (1<<0) -#define VFP_ZERO (1<<1) -#define VFP_DENORMAL (1<<2) -#define VFP_INFINITY (1<<3) -#define VFP_NAN (1<<4) -#define VFP_NAN_SIGNAL (1<<5) +enum : u32 { + VFP_NUMBER = (1 << 0), + VFP_ZERO = (1 << 1), + VFP_DENORMAL = (1 << 2), + VFP_INFINITY = (1 << 3), + VFP_NAN = (1 << 4), + VFP_NAN_SIGNAL = (1 << 5), -#define VFP_QNAN (VFP_NAN) -#define VFP_SNAN (VFP_NAN|VFP_NAN_SIGNAL) + VFP_QNAN = (VFP_NAN), + VFP_SNAN = (VFP_NAN|VFP_NAN_SIGNAL) +}; -static inline int vfp_single_type(struct vfp_single *s) +static inline int vfp_single_type(vfp_single* s) { - int type = VFP_NUMBER; - if (s->exponent == 255) { - if (s->significand == 0) - type = VFP_INFINITY; - else if (s->significand & VFP_SINGLE_SIGNIFICAND_QNAN) - type = VFP_QNAN; - else - type = VFP_SNAN; - } else if (s->exponent == 0) { - if (s->significand == 0) - type |= VFP_ZERO; - else - type |= VFP_DENORMAL; - } - return type; + int type = VFP_NUMBER; + if (s->exponent == 255) { + if (s->significand == 0) + type = VFP_INFINITY; + else if (s->significand & VFP_SINGLE_SIGNIFICAND_QNAN) + type = VFP_QNAN; + else + type = VFP_SNAN; + } else if (s->exponent == 0) { + if (s->significand == 0) + type |= VFP_ZERO; + else + type |= VFP_DENORMAL; + } + return type; } -u32 vfp_single_normaliseround(ARMul_State* state, int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func); +u32 vfp_single_normaliseround(ARMul_State* state, int sd, vfp_single* vs, u32 fpscr, u32 exceptions, const char* func); -/* - * Double-precision - */ +// Double-precision struct vfp_double { - s16 exponent; - u16 sign; - u64 significand; + s16 exponent; + u16 sign; + u64 significand; }; -/* - * VFP_REG_ZERO is a special register number for vfp_get_double - * which returns (double)0.0. This is useful for the compare with - * zero instructions. - */ +// VFP_REG_ZERO is a special register number for vfp_get_double +// which returns (double)0.0. This is useful for the compare with +// zero instructions. #ifdef CONFIG_VFPv3 -#define VFP_REG_ZERO 32 +#define VFP_REG_ZERO 32 #else -#define VFP_REG_ZERO 16 +#define VFP_REG_ZERO 16 #endif -#define VFP_DOUBLE_MANTISSA_BITS (52) -#define VFP_DOUBLE_EXPONENT_BITS (11) -#define VFP_DOUBLE_LOW_BITS (64 - VFP_DOUBLE_MANTISSA_BITS - 2) -#define VFP_DOUBLE_LOW_BITS_MASK ((1 << VFP_DOUBLE_LOW_BITS) - 1) - -/* - * The bit in an unpacked double which indicates that it is a quiet NaN - */ -#define VFP_DOUBLE_SIGNIFICAND_QNAN (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1 + VFP_DOUBLE_LOW_BITS)) - -/* - * Operations on packed single-precision numbers - */ -#define vfp_double_packed_sign(v) ((v) & (1ULL << 63)) -#define vfp_double_packed_negate(v) ((v) ^ (1ULL << 63)) -#define vfp_double_packed_abs(v) ((v) & ~(1ULL << 63)) -#define vfp_double_packed_exponent(v) (((v) >> VFP_DOUBLE_MANTISSA_BITS) & ((1 << VFP_DOUBLE_EXPONENT_BITS) - 1)) -#define vfp_double_packed_mantissa(v) ((v) & ((1ULL << VFP_DOUBLE_MANTISSA_BITS) - 1)) - -/* - * Unpack a double-precision float. Note that this returns the magnitude - * of the double-precision float mantissa with the 1. if necessary, - * aligned to bit 62. - */ -static inline void vfp_double_unpack(struct vfp_double *s, s64 val) +#define VFP_DOUBLE_MANTISSA_BITS (52) +#define VFP_DOUBLE_EXPONENT_BITS (11) +#define VFP_DOUBLE_LOW_BITS (64 - VFP_DOUBLE_MANTISSA_BITS - 2) +#define VFP_DOUBLE_LOW_BITS_MASK ((1 << VFP_DOUBLE_LOW_BITS) - 1) + +// The bit in an unpacked double which indicates that it is a quiet NaN +#define VFP_DOUBLE_SIGNIFICAND_QNAN (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1 + VFP_DOUBLE_LOW_BITS)) + +// Operations on packed single-precision numbers +#define vfp_double_packed_sign(v) ((v) & (1ULL << 63)) +#define vfp_double_packed_negate(v) ((v) ^ (1ULL << 63)) +#define vfp_double_packed_abs(v) ((v) & ~(1ULL << 63)) +#define vfp_double_packed_exponent(v) (((v) >> VFP_DOUBLE_MANTISSA_BITS) & ((1 << VFP_DOUBLE_EXPONENT_BITS) - 1)) +#define vfp_double_packed_mantissa(v) ((v) & ((1ULL << VFP_DOUBLE_MANTISSA_BITS) - 1)) + +// Unpack a double-precision float. Note that this returns the magnitude +// of the double-precision float mantissa with the 1. if necessary, +// aligned to bit 62. +static inline void vfp_double_unpack(vfp_double* s, s64 val) { - u64 significand; + u64 significand; - s->sign = vfp_double_packed_sign(val) >> 48; - s->exponent = vfp_double_packed_exponent(val); + s->sign = vfp_double_packed_sign(val) >> 48; + s->exponent = vfp_double_packed_exponent(val); - significand = (u64) val; - significand = (significand << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2; - if (s->exponent && s->exponent != 2047) - significand |= (1ULL << 62); - s->significand = significand; + significand = (u64) val; + significand = (significand << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2; + if (s->exponent && s->exponent != 2047) + significand |= (1ULL << 62); + s->significand = significand; } -/* - * Re-pack a double-precision float. This assumes that the float is - * already normalised such that the MSB is bit 30, _not_ bit 31. - */ -static inline s64 vfp_double_pack(struct vfp_double *s) +// Re-pack a double-precision float. This assumes that the float is +// already normalised such that the MSB is bit 30, _not_ bit 31. +static inline s64 vfp_double_pack(vfp_double* s) { - u64 val; - val = ((u64)s->sign << 48) + - ((u64)s->exponent << VFP_DOUBLE_MANTISSA_BITS) + - (s->significand >> VFP_DOUBLE_LOW_BITS); - return (s64)val; + u64 val = ((u64)s->sign << 48) + + ((u64)s->exponent << VFP_DOUBLE_MANTISSA_BITS) + + (s->significand >> VFP_DOUBLE_LOW_BITS); + return (s64)val; } -static inline int vfp_double_type(struct vfp_double *s) +static inline int vfp_double_type(vfp_double* s) { - int type = VFP_NUMBER; - if (s->exponent == 2047) { - if (s->significand == 0) - type = VFP_INFINITY; - else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN) - type = VFP_QNAN; - else - type = VFP_SNAN; - } else if (s->exponent == 0) { - if (s->significand == 0) - type |= VFP_ZERO; - else - type |= VFP_DENORMAL; - } - return type; + int type = VFP_NUMBER; + if (s->exponent == 2047) { + if (s->significand == 0) + type = VFP_INFINITY; + else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN) + type = VFP_QNAN; + else + type = VFP_SNAN; + } else if (s->exponent == 0) { + if (s->significand == 0) + type |= VFP_ZERO; + else + type |= VFP_DENORMAL; + } + return type; } -u32 vfp_double_normaliseround(ARMul_State* state, int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func); - u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand); -/* - * A special flag to tell the normalisation code not to normalise. - */ -#define VFP_NAN_FLAG 0x100 - -/* - * A bit pattern used to indicate the initial (unset) value of the - * exception mask, in case nothing handles an instruction. This - * doesn't include the NAN flag, which get masked out before - * we check for an error. - */ -#define VFP_EXCEPTION_ERROR ((u32)-1 & ~VFP_NAN_FLAG) - -/* - * A flag to tell vfp instruction type. - * OP_SCALAR - this operation always operates in scalar mode - * OP_SD - the instruction exceptionally writes to a single precision result. - * OP_DD - the instruction exceptionally writes to a double precision result. - * OP_SM - the instruction exceptionally reads from a single precision operand. - */ -#define OP_SCALAR (1 << 0) -#define OP_SD (1 << 1) -#define OP_DD (1 << 1) -#define OP_SM (1 << 2) +// A special flag to tell the normalisation code not to normalise. +#define VFP_NAN_FLAG 0x100 + +// A bit pattern used to indicate the initial (unset) value of the +// exception mask, in case nothing handles an instruction. This +// doesn't include the NAN flag, which get masked out before +// we check for an error. +#define VFP_EXCEPTION_ERROR ((u32)-1 & ~VFP_NAN_FLAG) + +// A flag to tell vfp instruction type. +// OP_SCALAR - This operation always operates in scalar mode +// OP_SD - The instruction exceptionally writes to a single precision result. +// OP_DD - The instruction exceptionally writes to a double precision result. +// OP_SM - The instruction exceptionally reads from a single precision operand. +enum : u32 { + OP_SCALAR = (1 << 0), + OP_SD = (1 << 1), + OP_DD = (1 << 1), + OP_SM = (1 << 2) +}; struct op { - u32 (* const fn)(ARMul_State* state, int dd, int dn, int dm, u32 fpscr); - u32 flags; + u32 (* const fn)(ARMul_State* state, int dd, int dn, int dm, u32 fpscr); + u32 flags; }; static inline u32 fls(ARMword x) { - int r = 32; - - if (!x) - return 0; - if (!(x & 0xffff0000u)) { - x <<= 16; - r -= 16; - } - if (!(x & 0xff000000u)) { - x <<= 8; - r -= 8; - } - if (!(x & 0xf0000000u)) { - x <<= 4; - r -= 4; - } - if (!(x & 0xc0000000u)) { - x <<= 2; - r -= 2; - } - if (!(x & 0x80000000u)) { - x <<= 1; - r -= 1; - } - return r; + int r = 32; + + if (!x) + return 0; + if (!(x & 0xffff0000u)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xff000000u)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xf0000000u)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xc0000000u)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x80000000u)) { + x <<= 1; + r -= 1; + } + return r; } -u32 vfp_double_normaliseroundintern(ARMul_State* state, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func); -u32 vfp_double_multiply(struct vfp_double *vdd, struct vfp_double *vdn, struct vfp_double *vdm, u32 fpscr); -u32 vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn, struct vfp_double *vdm, u32 fpscr); -u32 vfp_double_fcvtsinterncutting(ARMul_State* state, int sd, struct vfp_double* dm, u32 fpscr); +u32 vfp_double_multiply(vfp_double* vdd, vfp_double* vdn, vfp_double* vdm, u32 fpscr); +u32 vfp_double_add(vfp_double* vdd, vfp_double* vdn, vfp_double *vdm, u32 fpscr); +u32 vfp_double_normaliseround(ARMul_State* state, int dd, vfp_double* vd, u32 fpscr, u32 exceptions, const char* func); diff --git a/src/core/arm/skyeye_common/vfp/vfpdouble.cpp b/src/core/arm/skyeye_common/vfp/vfpdouble.cpp index d35ca510a..2c15db12b 100644 --- a/src/core/arm/skyeye_common/vfp/vfpdouble.cpp +++ b/src/core/arm/skyeye_common/vfp/vfpdouble.cpp @@ -83,134 +83,6 @@ static void vfp_double_normalise_denormal(struct vfp_double *vd) vfp_double_dump("normalise_denormal: out", vd); } -u32 vfp_double_normaliseroundintern(ARMul_State* state, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func) -{ - u64 significand, incr; - int exponent, shift, underflow; - u32 rmode; - - vfp_double_dump("pack: in", vd); - - /* - * Infinities and NaNs are a special case. - */ - if (vd->exponent == 2047 && (vd->significand == 0 || exceptions)) - goto pack; - - /* - * Special-case zero. - */ - if (vd->significand == 0) { - vd->exponent = 0; - goto pack; - } - - exponent = vd->exponent; - significand = vd->significand; - - shift = 32 - fls((ARMword)(significand >> 32)); - if (shift == 32) - shift = 64 - fls((ARMword)significand); - if (shift) { - exponent -= shift; - significand <<= shift; - } - -#if 1 - vd->exponent = exponent; - vd->significand = significand; - vfp_double_dump("pack: normalised", vd); -#endif - - /* - * Tiny number? - */ - underflow = exponent < 0; - if (underflow) { - significand = vfp_shiftright64jamming(significand, -exponent); - exponent = 0; -#if 1 - vd->exponent = exponent; - vd->significand = significand; - vfp_double_dump("pack: tiny number", vd); -#endif - if (!(significand & ((1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1))) - underflow = 0; - } - - /* - * Select rounding increment. - */ - incr = 0; - rmode = fpscr & FPSCR_RMODE_MASK; - - if (rmode == FPSCR_ROUND_NEAREST) { - incr = 1ULL << VFP_DOUBLE_LOW_BITS; - if ((significand & (1ULL << (VFP_DOUBLE_LOW_BITS + 1))) == 0) - incr -= 1; - } - else if (rmode == FPSCR_ROUND_TOZERO) { - incr = 0; - } - else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vd->sign != 0)) - incr = (1ULL << (VFP_DOUBLE_LOW_BITS + 1)) - 1; - - LOG_TRACE(Core_ARM11, "VFP: rounding increment = 0x%08llx\n", incr); - - /* - * Is our rounding going to overflow? - */ - if ((significand + incr) < significand) { - exponent += 1; - significand = (significand >> 1) | (significand & 1); - incr >>= 1; -#if 1 - vd->exponent = exponent; - vd->significand = significand; - vfp_double_dump("pack: overflow", vd); -#endif - } - - /* - * If any of the low bits (which will be shifted out of the - * number) are non-zero, the result is inexact. - */ - if (significand & ((1 << (VFP_DOUBLE_LOW_BITS + 1)) - 1)) - exceptions |= FPSCR_IXC; - - /* - * Do our rounding. - */ - significand += incr; - - /* - * Infinity? - */ - if (exponent >= 2046) { - exceptions |= FPSCR_OFC | FPSCR_IXC; - if (incr == 0) { - vd->exponent = 2045; - vd->significand = 0x7fffffffffffffffULL; - } - else { - vd->exponent = 2047; /* infinity */ - vd->significand = 0; - } - } - else { - if (significand >> (VFP_DOUBLE_LOW_BITS + 1) == 0) - exponent = 0; - if (exponent || significand > 0x8000000000000000ULL) - underflow = 0; - if (underflow) - exceptions |= FPSCR_UFC; - vd->exponent = exponent; - vd->significand = significand >> 1; - } - pack: - return 0; -} - u32 vfp_double_normaliseround(ARMul_State* state, int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func) { u64 significand, incr; @@ -511,7 +383,7 @@ static u32 vfp_compare(ARMul_State* state, int dd, int signal_on_qnan, int dm, u LOG_TRACE(Core_ARM11, "In %s, state=0x%x, fpscr=0x%x\n", __FUNCTION__, state, fpscr); m = vfp_get_double(state, dm); if (vfp_double_packed_exponent(m) == 2047 && vfp_double_packed_mantissa(m)) { - ret |= FPSCR_C | FPSCR_V; + ret |= FPSCR_CFLAG | FPSCR_VFLAG; if (signal_on_qnan || !(vfp_double_packed_mantissa(m) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) /* * Signalling NaN, or signalling on quiet NaN @@ -521,7 +393,7 @@ static u32 vfp_compare(ARMul_State* state, int dd, int signal_on_qnan, int dm, u d = vfp_get_double(state, dd); if (vfp_double_packed_exponent(d) == 2047 && vfp_double_packed_mantissa(d)) { - ret |= FPSCR_C | FPSCR_V; + ret |= FPSCR_CFLAG | FPSCR_VFLAG; if (signal_on_qnan || !(vfp_double_packed_mantissa(d) & (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1)))) /* * Signalling NaN, or signalling on quiet NaN @@ -535,7 +407,7 @@ static u32 vfp_compare(ARMul_State* state, int dd, int signal_on_qnan, int dm, u /* * equal */ - ret |= FPSCR_Z | FPSCR_C; + ret |= FPSCR_ZFLAG | FPSCR_CFLAG; //printf("In %s,1 ret=0x%x\n", __FUNCTION__, ret); } else if (vfp_double_packed_sign(d ^ m)) { /* @@ -545,22 +417,22 @@ static u32 vfp_compare(ARMul_State* state, int dd, int signal_on_qnan, int dm, u /* * d is negative, so d < m */ - ret |= FPSCR_N; + ret |= FPSCR_NFLAG; else /* * d is positive, so d > m */ - ret |= FPSCR_C; + ret |= FPSCR_CFLAG; } else if ((vfp_double_packed_sign(d) != 0) ^ (d < m)) { /* * d < m */ - ret |= FPSCR_N; + ret |= FPSCR_NFLAG; } else if ((vfp_double_packed_sign(d) != 0) ^ (d > m)) { /* * d > m */ - ret |= FPSCR_C; + ret |= FPSCR_CFLAG; } } LOG_TRACE(Core_ARM11, "In %s, state=0x%x, ret=0x%x\n", __FUNCTION__, state, ret); @@ -592,49 +464,6 @@ static u32 vfp_double_fcmpez(ARMul_State* state, int dd, int unused, int dm, u32 return vfp_compare(state, dd, 1, VFP_REG_ZERO, fpscr); } -u32 vfp_double_fcvtsinterncutting(ARMul_State* state, int sd, struct vfp_double* dm, u32 fpscr) //ichfly for internal use only -{ - struct vfp_single vsd; - int tm; - u32 exceptions = 0; - - LOG_TRACE(Core_ARM11, "In %s\n", __FUNCTION__); - - tm = vfp_double_type(dm); - - /* - * If we have a signalling NaN, signal invalid operation. - */ - if (tm == VFP_SNAN) - exceptions = FPSCR_IOC; - - if (tm & VFP_DENORMAL) - vfp_double_normalise_denormal(dm); - - vsd.sign = dm->sign; - vsd.significand = vfp_hi64to32jamming(dm->significand); - - /* - * If we have an infinity or a NaN, the exponent must be 255 - */ - if (tm & (VFP_INFINITY | VFP_NAN)) { - vsd.exponent = 255; - if (tm == VFP_QNAN) - vsd.significand |= VFP_SINGLE_SIGNIFICAND_QNAN; - goto pack_nan; - } - else if (tm & VFP_ZERO) - vsd.exponent = 0; - else - vsd.exponent = dm->exponent - (1023 - 127); - - return vfp_single_normaliseround(state, sd, &vsd, fpscr, exceptions, "fcvts"); - -pack_nan: - vfp_put_float(state, vfp_single_pack(&vsd), sd); - return exceptions; -} - static u32 vfp_double_fcvts(ARMul_State* state, int sd, int unused, int dm, u32 fpscr) { struct vfp_double vdm; @@ -723,7 +552,7 @@ static u32 vfp_double_ftoui(ARMul_State* state, int sd, int unused, int dm, u32 exceptions |= FPSCR_IDC; if (tm & VFP_NAN) - vdm.sign = 0; + vdm.sign = 1; if (vdm.exponent >= 1023 + 32) { d = vdm.sign ? 0 : 0xffffffff; diff --git a/src/core/arm/skyeye_common/vfp/vfpsingle.cpp b/src/core/arm/skyeye_common/vfp/vfpsingle.cpp index b7872bdc4..678b63f51 100644 --- a/src/core/arm/skyeye_common/vfp/vfpsingle.cpp +++ b/src/core/arm/skyeye_common/vfp/vfpsingle.cpp @@ -419,7 +419,7 @@ static u32 vfp_compare(ARMul_State* state, int sd, int signal_on_qnan, s32 m, u3 d = vfp_get_float(state, sd); if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) { - ret |= FPSCR_C | FPSCR_V; + ret |= FPSCR_CFLAG | FPSCR_VFLAG; if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) /* * Signalling NaN, or signalling on quiet NaN @@ -428,7 +428,7 @@ static u32 vfp_compare(ARMul_State* state, int sd, int signal_on_qnan, s32 m, u3 } if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) { - ret |= FPSCR_C | FPSCR_V; + ret |= FPSCR_CFLAG | FPSCR_VFLAG; if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1)))) /* * Signalling NaN, or signalling on quiet NaN @@ -441,7 +441,7 @@ static u32 vfp_compare(ARMul_State* state, int sd, int signal_on_qnan, s32 m, u3 /* * equal */ - ret |= FPSCR_Z | FPSCR_C; + ret |= FPSCR_ZFLAG | FPSCR_CFLAG; } else if (vfp_single_packed_sign(d ^ m)) { /* * different signs @@ -450,22 +450,22 @@ static u32 vfp_compare(ARMul_State* state, int sd, int signal_on_qnan, s32 m, u3 /* * d is negative, so d < m */ - ret |= FPSCR_N; + ret |= FPSCR_NFLAG; else /* * d is positive, so d > m */ - ret |= FPSCR_C; + ret |= FPSCR_CFLAG; } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) { /* * d < m */ - ret |= FPSCR_N; + ret |= FPSCR_NFLAG; } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) { /* * d > m */ - ret |= FPSCR_C; + ret |= FPSCR_CFLAG; } } return ret; @@ -491,46 +491,6 @@ static u32 vfp_single_fcmpez(ARMul_State* state, int sd, int unused, s32 m, u32 return vfp_compare(state, sd, 1, 0, fpscr); } -static s64 vfp_single_to_doubleintern(ARMul_State* state, s32 m, u32 fpscr) //ichfly for internal use only -{ - struct vfp_single vsm; - struct vfp_double vdd; - int tm; - u32 exceptions = 0; - - vfp_single_unpack(&vsm, m); - - tm = vfp_single_type(&vsm); - - /* - * If we have a signalling NaN, signal invalid operation. - */ - if (tm == VFP_SNAN) - exceptions = FPSCR_IOC; - - if (tm & VFP_DENORMAL) - vfp_single_normalise_denormal(&vsm); - - vdd.sign = vsm.sign; - vdd.significand = (u64)vsm.significand << 32; - - /* - * If we have an infinity or NaN, the exponent must be 2047. - */ - if (tm & (VFP_INFINITY | VFP_NAN)) { - vdd.exponent = 2047; - if (tm == VFP_QNAN) - vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN; - goto pack_nan; - } else if (tm & VFP_ZERO) - vdd.exponent = 0; - else - vdd.exponent = vsm.exponent + (1023 - 127); -pack_nan: - vfp_double_normaliseroundintern(state, &vdd, fpscr, exceptions, "fcvtd"); - return vfp_double_pack(&vdd); -} - static u32 vfp_single_fcvtd(ARMul_State* state, int dd, int unused, s32 m, u32 fpscr) { struct vfp_single vsm; diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 52dca4dd8..a2ffbcdb7 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -153,12 +153,8 @@ void Shutdown() { * @return True on success, otherwise false */ bool LoadExec(u32 entry_point) { - Core::g_app_core->SetPC(entry_point); - // 0x30 is the typical main thread priority I've seen used so far - g_main_thread = Kernel::SetupMainThread(0x30, Kernel::DEFAULT_STACK_SIZE); - // Setup the idle thread - Kernel::SetupIdleThread(); + g_main_thread = Kernel::SetupMainThread(Kernel::DEFAULT_STACK_SIZE, entry_point, 0x30); return true; } diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp index 9f7166ca4..a811db392 100644 --- a/src/core/hle/kernel/mutex.cpp +++ b/src/core/hle/kernel/mutex.cpp @@ -21,7 +21,7 @@ namespace Kernel { */ static void ResumeWaitingThread(Mutex* mutex) { // Reset mutex lock thread handle, nothing is waiting - mutex->locked = false; + mutex->lock_count = 0; mutex->holding_thread = nullptr; // Find the next waiting thread for the mutex... @@ -44,8 +44,7 @@ Mutex::~Mutex() {} SharedPtr<Mutex> Mutex::Create(bool initial_locked, std::string name) { SharedPtr<Mutex> mutex(new Mutex); - mutex->initial_locked = initial_locked; - mutex->locked = false; + mutex->lock_count = 0; mutex->name = std::move(name); mutex->holding_thread = nullptr; @@ -57,7 +56,7 @@ SharedPtr<Mutex> Mutex::Create(bool initial_locked, std::string name) { } bool Mutex::ShouldWait() { - return locked && holding_thread != GetCurrentThread(); + return lock_count > 0 && holding_thread != GetCurrentThread();; } void Mutex::Acquire() { @@ -66,21 +65,27 @@ void Mutex::Acquire() { void Mutex::Acquire(SharedPtr<Thread> thread) { _assert_msg_(Kernel, !ShouldWait(), "object unavailable!"); - if (locked) - return; - locked = true; + // Actually "acquire" the mutex only if we don't already have it... + if (lock_count == 0) { + thread->held_mutexes.insert(this); + holding_thread = std::move(thread); + } - thread->held_mutexes.insert(this); - holding_thread = std::move(thread); + lock_count++; } void Mutex::Release() { - if (!locked) - return; - - holding_thread->held_mutexes.erase(this); - ResumeWaitingThread(this); + // Only release if the mutex is held... + if (lock_count > 0) { + lock_count--; + + // Yield to the next thread only if we've fully released the mutex... + if (lock_count == 0) { + holding_thread->held_mutexes.erase(this); + ResumeWaitingThread(this); + } + } } } // namespace diff --git a/src/core/hle/kernel/mutex.h b/src/core/hle/kernel/mutex.h index 548403614..d6d5328be 100644 --- a/src/core/hle/kernel/mutex.h +++ b/src/core/hle/kernel/mutex.h @@ -30,8 +30,7 @@ public: static const HandleType HANDLE_TYPE = HandleType::Mutex; HandleType GetHandleType() const override { return HANDLE_TYPE; } - bool initial_locked; ///< Initial lock state when mutex was created - bool locked; ///< Current locked state + int lock_count; ///< Number of times the mutex has been acquired std::string name; ///< Name of mutex (optional) SharedPtr<Thread> holding_thread; ///< Thread that has acquired the mutex diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 3987f9608..7f629c20e 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -21,8 +21,11 @@ namespace Kernel { +/// Event type for the thread wake up event +static int ThreadWakeupEventType = -1; + bool Thread::ShouldWait() { - return status != THREADSTATUS_DORMANT; + return status != THREADSTATUS_DEAD; } void Thread::Acquire() { @@ -33,12 +36,20 @@ void Thread::Acquire() { static std::vector<SharedPtr<Thread>> thread_list; // Lists only ready thread ids. -static Common::ThreadQueueList<Thread*, THREADPRIO_LOWEST+1> thread_ready_queue; +static Common::ThreadQueueList<Thread*, THREADPRIO_LOWEST+1> ready_queue; static Thread* current_thread; -static const u32 INITIAL_THREAD_ID = 1; ///< The first available thread id at startup -static u32 next_thread_id; ///< The next available thread id +// The first available thread id at startup +static u32 next_thread_id = 1; + +/** + * Creates a new thread ID + * @return The new thread ID + */ +inline static u32 const NewThreadId() { + return next_thread_id++; +} Thread::Thread() {} Thread::~Thread() {} @@ -47,86 +58,53 @@ Thread* GetCurrentThread() { return current_thread; } -/// Resets a thread -static void ResetThread(Thread* t, u32 arg, s32 lowest_priority) { - memset(&t->context, 0, sizeof(Core::ThreadContext)); - - t->context.cpu_registers[0] = arg; - t->context.pc = t->entry_point; - t->context.sp = t->stack_top; - t->context.cpsr = 0x1F; // Usermode - - // TODO(bunnei): This instructs the CPU core to start the execution as if it is "resuming" a - // thread. This is somewhat Sky-Eye specific, and should be re-architected in the future to be - // agnostic of the CPU core. - t->context.mode = 8; - - if (t->current_priority < lowest_priority) { - t->current_priority = t->initial_priority; - } - - t->wait_objects.clear(); - t->wait_address = 0; -} - -/// Change a thread to "ready" state -static void ChangeReadyState(Thread* t, bool ready) { - if (t->IsReady()) { - if (!ready) { - thread_ready_queue.remove(t->current_priority, t); - } - } else if (ready) { - if (t->IsRunning()) { - thread_ready_queue.push_front(t->current_priority, t); - } else { - thread_ready_queue.push_back(t->current_priority, t); - } - t->status = THREADSTATUS_READY; - } -} - -/// Check if a thread is waiting on a the specified wait object +/** + * Check if a thread is waiting on the specified wait object + * @param thread The thread to test + * @param wait_object The object to test against + * @return True if the thread is waiting, false otherwise + */ static bool CheckWait_WaitObject(const Thread* thread, WaitObject* wait_object) { - auto itr = std::find(thread->wait_objects.begin(), thread->wait_objects.end(), wait_object); + if (thread->status != THREADSTATUS_WAIT_SYNCH) + return false; - if (itr != thread->wait_objects.end()) - return thread->IsWaiting(); - - return false; + auto itr = std::find(thread->wait_objects.begin(), thread->wait_objects.end(), wait_object); + return itr != thread->wait_objects.end(); } -/// Check if the specified thread is waiting on the specified address to be arbitrated +/** + * Check if the specified thread is waiting on the specified address to be arbitrated + * @param thread The thread to test + * @param wait_address The address to test against + * @return True if the thread is waiting, false otherwise + */ static bool CheckWait_AddressArbiter(const Thread* thread, VAddr wait_address) { - return thread->IsWaiting() && thread->wait_objects.empty() && wait_address == thread->wait_address; + return thread->status == THREADSTATUS_WAIT_ARB && wait_address == thread->wait_address; } -/// Stops the current thread -void Thread::Stop(const char* reason) { +void Thread::Stop() { // Release all the mutexes that this thread holds ReleaseThreadMutexes(this); - ChangeReadyState(this, false); - status = THREADSTATUS_DORMANT; + // Cancel any outstanding wakeup events for this thread + CoreTiming::UnscheduleEvent(ThreadWakeupEventType, callback_handle); + + // Clean up thread from ready queue + // This is only needed when the thread is termintated forcefully (SVC TerminateProcess) + if (status == THREADSTATUS_READY){ + ready_queue.remove(current_priority, this); + } + + status = THREADSTATUS_DEAD; + WakeupAllWaitingThreads(); - // Stopped threads are never waiting. + // Clean up any dangling references in objects that this thread was waiting for for (auto& wait_object : wait_objects) { wait_object->RemoveWaitingThread(this); } - wait_objects.clear(); - wait_address = 0; -} - -/// Changes a threads state -static void ChangeThreadState(Thread* t, ThreadStatus new_status) { - if (!t || t->status == new_status) { - return; - } - ChangeReadyState(t, (new_status & THREADSTATUS_READY) != 0); - t->status = new_status; } -/// Arbitrate the highest priority thread that is waiting Thread* ArbitrateHighestPriorityThread(u32 address) { Thread* highest_priority_thread = nullptr; s32 priority = THREADPRIO_LOWEST; @@ -153,108 +131,113 @@ Thread* ArbitrateHighestPriorityThread(u32 address) { return highest_priority_thread; } -/// Arbitrate all threads currently waiting void ArbitrateAllThreads(u32 address) { - - // Iterate through threads, find highest priority thread that is waiting to be arbitrated... + // Resume all threads found to be waiting on the address for (auto& thread : thread_list) { if (CheckWait_AddressArbiter(thread.get(), address)) thread->ResumeFromWait(); } } -/// Calls a thread by marking it as "ready" (note: will not actually execute until current thread yields) -static void CallThread(Thread* t) { - // Stop waiting - ChangeThreadState(t, THREADSTATUS_READY); -} +/** + * Switches the CPU's active thread context to that of the specified thread + * @param new_thread The thread to switch to + */ +static void SwitchContext(Thread* new_thread) { + _dbg_assert_msg_(Kernel, new_thread->status == THREADSTATUS_READY, "Thread must be ready to become running."); -/// Switches CPU context to that of the specified thread -static void SwitchContext(Thread* t) { - Thread* cur = GetCurrentThread(); + Thread* previous_thread = GetCurrentThread(); - // Save context for current thread - if (cur) { - Core::g_app_core->SaveContext(cur->context); + // Save context for previous thread + if (previous_thread) { + Core::g_app_core->SaveContext(previous_thread->context); - if (cur->IsRunning()) { - ChangeReadyState(cur, true); + if (previous_thread->status == THREADSTATUS_RUNNING) { + // This is only the case when a reschedule is triggered without the current thread + // yielding execution (i.e. an event triggered, system core time-sliced, etc) + ready_queue.push_front(previous_thread->current_priority, previous_thread); + previous_thread->status = THREADSTATUS_READY; } } + // Load context of new thread - if (t) { - current_thread = t; - ChangeReadyState(t, false); - t->status = (t->status | THREADSTATUS_RUNNING) & ~THREADSTATUS_READY; - Core::g_app_core->LoadContext(t->context); + if (new_thread) { + current_thread = new_thread; + + ready_queue.remove(new_thread->current_priority, new_thread); + new_thread->status = THREADSTATUS_RUNNING; + + Core::g_app_core->LoadContext(new_thread->context); } else { current_thread = nullptr; } } -/// Gets the next thread that is ready to be run by priority -static Thread* NextThread() { +/** + * Pops and returns the next thread from the thread queue + * @return A pointer to the next ready thread + */ +static Thread* PopNextReadyThread() { Thread* next; - Thread* cur = GetCurrentThread(); + Thread* thread = GetCurrentThread(); - if (cur && cur->IsRunning()) { - next = thread_ready_queue.pop_first_better(cur->current_priority); + if (thread && thread->status == THREADSTATUS_RUNNING) { + // We have to do better than the current thread. + // This call returns null when that's not possible. + next = ready_queue.pop_first_better(thread->current_priority); } else { - next = thread_ready_queue.pop_first(); - } - if (next == 0) { - return nullptr; + next = ready_queue.pop_first(); } + return next; } void WaitCurrentThread_Sleep() { Thread* thread = GetCurrentThread(); - ChangeThreadState(thread, ThreadStatus(THREADSTATUS_WAIT | (thread->status & THREADSTATUS_SUSPEND))); + thread->status = THREADSTATUS_WAIT_SLEEP; } -void WaitCurrentThread_WaitSynchronization(SharedPtr<WaitObject> wait_object, bool wait_set_output, bool wait_all) { +void WaitCurrentThread_WaitSynchronization(std::vector<SharedPtr<WaitObject>> wait_objects, bool wait_set_output, bool wait_all) { Thread* thread = GetCurrentThread(); thread->wait_set_output = wait_set_output; thread->wait_all = wait_all; - - // It's possible to call WaitSynchronizationN without any objects passed in... - if (wait_object != nullptr) - thread->wait_objects.push_back(wait_object); - - ChangeThreadState(thread, ThreadStatus(THREADSTATUS_WAIT | (thread->status & THREADSTATUS_SUSPEND))); + thread->wait_objects = std::move(wait_objects); + thread->status = THREADSTATUS_WAIT_SYNCH; } void WaitCurrentThread_ArbitrateAddress(VAddr wait_address) { Thread* thread = GetCurrentThread(); thread->wait_address = wait_address; - ChangeThreadState(thread, ThreadStatus(THREADSTATUS_WAIT | (thread->status & THREADSTATUS_SUSPEND))); + thread->status = THREADSTATUS_WAIT_ARB; } -/// Event type for the thread wake up event -static int ThreadWakeupEventType = -1; // TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future, allowing // us to simply use a pool index or similar. static Kernel::HandleTable wakeup_callback_handle_table; -/// Callback that will wake up the thread it was scheduled for +/** + * Callback that will wake up the thread it was scheduled for + * @param thread_handle The handle of the thread that's been awoken + * @param cycles_late The number of CPU cycles that have passed since the desired wakeup time + */ static void ThreadWakeupCallback(u64 thread_handle, int cycles_late) { SharedPtr<Thread> thread = wakeup_callback_handle_table.Get<Thread>((Handle)thread_handle); if (thread == nullptr) { - LOG_CRITICAL(Kernel, "Callback fired for invalid thread %08X", thread_handle); + LOG_CRITICAL(Kernel, "Callback fired for invalid thread %08X", (Handle)thread_handle); return; } - thread->SetWaitSynchronizationResult(ResultCode(ErrorDescription::Timeout, ErrorModule::OS, - ErrorSummary::StatusChanged, ErrorLevel::Info)); + if (thread->status == THREADSTATUS_WAIT_SYNCH) { + thread->SetWaitSynchronizationResult(ResultCode(ErrorDescription::Timeout, ErrorModule::OS, + ErrorSummary::StatusChanged, ErrorLevel::Info)); - if (thread->wait_set_output) - thread->SetWaitSynchronizationOutput(-1); + if (thread->wait_set_output) + thread->SetWaitSynchronizationOutput(-1); + } thread->ResumeFromWait(); } - void Thread::WakeAfterDelay(s64 nanoseconds) { // Don't schedule a wakeup if the thread wants to wait forever if (nanoseconds == -1) @@ -265,7 +248,7 @@ void Thread::WakeAfterDelay(s64 nanoseconds) { } void Thread::ReleaseWaitObject(WaitObject* wait_object) { - if (wait_objects.empty()) { + if (status != THREADSTATUS_WAIT_SYNCH || wait_objects.empty()) { LOG_CRITICAL(Kernel, "thread is not waiting on any objects!"); return; } @@ -307,34 +290,48 @@ void Thread::ReleaseWaitObject(WaitObject* wait_object) { } void Thread::ResumeFromWait() { - // Cancel any outstanding wakeup events + // Cancel any outstanding wakeup events for this thread CoreTiming::UnscheduleEvent(ThreadWakeupEventType, callback_handle); - status &= ~THREADSTATUS_WAIT; - - // Remove this thread from all other WaitObjects - for (auto wait_object : wait_objects) - wait_object->RemoveWaitingThread(this); - - wait_objects.clear(); - wait_set_output = false; - wait_all = false; - wait_address = 0; - - if (!(status & (THREADSTATUS_WAITSUSPEND | THREADSTATUS_DORMANT | THREADSTATUS_DEAD))) { - ChangeReadyState(this, true); + switch (status) { + case THREADSTATUS_WAIT_SYNCH: + // Remove this thread from all other WaitObjects + for (auto wait_object : wait_objects) + wait_object->RemoveWaitingThread(this); + break; + case THREADSTATUS_WAIT_ARB: + case THREADSTATUS_WAIT_SLEEP: + break; + case THREADSTATUS_RUNNING: + case THREADSTATUS_READY: + LOG_ERROR(Kernel, "Thread with object id %u has already resumed.", GetObjectId()); + _dbg_assert_(Kernel, false); + return; + case THREADSTATUS_DEAD: + // This should never happen, as threads must complete before being stopped. + LOG_CRITICAL(Kernel, "Thread with object id %u cannot be resumed because it's DEAD.", + GetObjectId()); + _dbg_assert_(Kernel, false); + return; } + + ready_queue.push_back(current_priority, this); + status = THREADSTATUS_READY; } -/// Prints the thread queue for debugging purposes +/** + * Prints the thread queue for debugging purposes + */ static void DebugThreadQueue() { Thread* thread = GetCurrentThread(); if (!thread) { - return; + LOG_DEBUG(Kernel, "Current: NO CURRENT THREAD"); + } else { + LOG_DEBUG(Kernel, "0x%02X %u (current)", thread->current_priority, GetCurrentThread()->GetObjectId()); } - LOG_DEBUG(Kernel, "0x%02X %u (current)", thread->current_priority, GetCurrentThread()->GetObjectId()); + for (auto& t : thread_list) { - s32 priority = thread_ready_queue.contains(t.get()); + s32 priority = ready_queue.contains(t.get()); if (priority != -1) { LOG_DEBUG(Kernel, "0x%02X %u", priority, t->GetObjectId()); } @@ -342,14 +339,7 @@ static void DebugThreadQueue() { } ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, s32 priority, - u32 arg, s32 processor_id, VAddr stack_top, u32 stack_size) { - if (stack_size < 0x200) { - LOG_ERROR(Kernel, "(name=%s): invalid stack_size=0x%08X", name.c_str(), stack_size); - // TODO: Verify error - return ResultCode(ErrorDescription::InvalidSize, ErrorModule::Kernel, - ErrorSummary::InvalidArgument, ErrorLevel::Permanent); - } - + u32 arg, s32 processor_id, VAddr stack_top) { if (priority < THREADPRIO_HIGHEST || priority > THREADPRIO_LOWEST) { s32 new_priority = CLAMP(priority, THREADPRIO_HIGHEST, THREADPRIO_LOWEST); LOG_WARNING(Kernel_SVC, "(name=%s): invalid priority=%d, clamping to %d", @@ -369,13 +359,12 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, SharedPtr<Thread> thread(new Thread); thread_list.push_back(thread); - thread_ready_queue.prepare(priority); + ready_queue.prepare(priority); - thread->thread_id = next_thread_id++; + thread->thread_id = NewThreadId(); thread->status = THREADSTATUS_DORMANT; thread->entry_point = entry_point; thread->stack_top = stack_top; - thread->stack_size = stack_size; thread->initial_priority = thread->current_priority = priority; thread->processor_id = processor_id; thread->wait_set_output = false; @@ -385,75 +374,74 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, thread->name = std::move(name); thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom(); - ResetThread(thread.get(), arg, 0); - CallThread(thread.get()); + // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used + // to initialize the context + Core::g_app_core->ResetContext(thread->context, stack_top, entry_point, arg); + + ready_queue.push_back(thread->current_priority, thread.get()); + thread->status = THREADSTATUS_READY; return MakeResult<SharedPtr<Thread>>(std::move(thread)); } -/// Set the priority of the thread specified by handle -void Thread::SetPriority(s32 priority) { - // If priority is invalid, clamp to valid range - if (priority < THREADPRIO_HIGHEST || priority > THREADPRIO_LOWEST) { - s32 new_priority = CLAMP(priority, THREADPRIO_HIGHEST, THREADPRIO_LOWEST); - LOG_WARNING(Kernel_SVC, "invalid priority=%d, clamping to %d", priority, new_priority); +// TODO(peachum): Remove this. Range checking should be done, and an appropriate error should be returned. +static void ClampPriority(const Thread* thread, s32* priority) { + if (*priority < THREADPRIO_HIGHEST || *priority > THREADPRIO_LOWEST) { + _dbg_assert_msg_(Kernel, false, "Application passed an out of range priority. An error should be returned."); + + s32 new_priority = CLAMP(*priority, THREADPRIO_HIGHEST, THREADPRIO_LOWEST); + LOG_WARNING(Kernel_SVC, "(name=%s): invalid priority=%d, clamping to %d", + thread->name.c_str(), *priority, new_priority); // TODO(bunnei): Clamping to a valid priority is not necessarily correct behavior... Confirm // validity of this - priority = new_priority; + *priority = new_priority; } +} - // Change thread priority - s32 old = current_priority; - thread_ready_queue.remove(old, this); - current_priority = priority; - thread_ready_queue.prepare(current_priority); +void Thread::SetPriority(s32 priority) { + ClampPriority(this, &priority); - // Change thread status to "ready" and push to ready queue - if (IsRunning()) { - status = (status & ~THREADSTATUS_RUNNING) | THREADSTATUS_READY; + if (current_priority == priority) { + return; } - if (IsReady()) { - thread_ready_queue.push_back(current_priority, this); + + if (status == THREADSTATUS_READY) { + // If thread was ready, adjust queues + ready_queue.remove(current_priority, this); + ready_queue.prepare(priority); + ready_queue.push_back(priority, this); } + + current_priority = priority; } SharedPtr<Thread> SetupIdleThread() { // We need to pass a few valid values to get around parameter checking in Thread::Create. auto thread = Thread::Create("idle", Memory::KERNEL_MEMORY_VADDR, THREADPRIO_LOWEST, 0, - THREADPROCESSORID_0, 0, Kernel::DEFAULT_STACK_SIZE).MoveFrom(); + THREADPROCESSORID_0, 0).MoveFrom(); thread->idle = true; - CallThread(thread.get()); return thread; } -SharedPtr<Thread> SetupMainThread(s32 priority, u32 stack_size) { +SharedPtr<Thread> SetupMainThread(u32 stack_size, u32 entry_point, s32 priority) { + _dbg_assert_(Kernel, !GetCurrentThread()); + // Initialize new "main" thread - auto thread_res = Thread::Create("main", Core::g_app_core->GetPC(), priority, 0, - THREADPROCESSORID_0, Memory::SCRATCHPAD_VADDR_END, stack_size); - // TODO(yuriks): Propagate error - _dbg_assert_(Kernel, thread_res.Succeeded()); - SharedPtr<Thread> thread = std::move(*thread_res); - - // If running another thread already, set it to "ready" state - Thread* cur = GetCurrentThread(); - if (cur && cur->IsRunning()) { - ChangeReadyState(cur, true); - } + auto thread_res = Thread::Create("main", entry_point, priority, 0, + THREADPROCESSORID_0, Memory::SCRATCHPAD_VADDR_END); + + SharedPtr<Thread> thread = thread_res.MoveFrom(); // Run new "main" thread - current_thread = thread.get(); - thread->status = THREADSTATUS_RUNNING; - Core::g_app_core->LoadContext(thread->context); + SwitchContext(thread.get()); return thread; } - -/// Reschedules to the next available thread (call after current thread is suspended) void Reschedule() { Thread* prev = GetCurrentThread(); - Thread* next = NextThread(); + Thread* next = PopNextReadyThread(); HLE::g_reschedule = false; if (next != nullptr) { @@ -480,8 +468,10 @@ void Thread::SetWaitSynchronizationOutput(s32 output) { //////////////////////////////////////////////////////////////////////////////////////////////////// void ThreadingInit() { - next_thread_id = INITIAL_THREAD_ID; ThreadWakeupEventType = CoreTiming::RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback); + + // Setup the idle thread + SetupIdleThread(); } void ThreadingShutdown() { diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 633bb7c98..cfd073a70 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -31,13 +31,13 @@ enum ThreadProcessorId { }; enum ThreadStatus { - THREADSTATUS_RUNNING = 1, - THREADSTATUS_READY = 2, - THREADSTATUS_WAIT = 4, - THREADSTATUS_SUSPEND = 8, - THREADSTATUS_DORMANT = 16, - THREADSTATUS_DEAD = 32, - THREADSTATUS_WAITSUSPEND = THREADSTATUS_WAIT | THREADSTATUS_SUSPEND + THREADSTATUS_RUNNING, ///< Currently running + THREADSTATUS_READY, ///< Ready to run + THREADSTATUS_WAIT_ARB, ///< Waiting on an address arbiter + THREADSTATUS_WAIT_SLEEP, ///< Waiting due to a SleepThread SVC + THREADSTATUS_WAIT_SYNCH, ///< Waiting due to a WaitSynchronization SVC + THREADSTATUS_DORMANT, ///< Created but not yet made ready + THREADSTATUS_DEAD ///< Run to completion, or forcefully terminated }; namespace Kernel { @@ -46,8 +46,19 @@ class Mutex; class Thread final : public WaitObject { public: + /** + * Creates and returns a new thread. The new thread is immediately scheduled + * @param name The friendly name desired for the thread + * @param entry_point The address at which the thread should start execution + * @param priority The thread's priority + * @param arg User data to pass to the thread + * @param processor_id The ID(s) of the processors on which the thread is desired to be run + * @param stack_top The address of the thread's stack top + * @param stack_size The size of the thread's stack + * @return A shared pointer to the newly created thread + */ static ResultVal<SharedPtr<Thread>> Create(std::string name, VAddr entry_point, s32 priority, - u32 arg, s32 processor_id, VAddr stack_top, u32 stack_size); + u32 arg, s32 processor_id, VAddr stack_top); std::string GetName() const override { return name; } std::string GetTypeName() const override { return "Thread"; } @@ -55,22 +66,32 @@ public: static const HandleType HANDLE_TYPE = HandleType::Thread; HandleType GetHandleType() const override { return HANDLE_TYPE; } - inline bool IsRunning() const { return (status & THREADSTATUS_RUNNING) != 0; } - inline bool IsStopped() const { return (status & THREADSTATUS_DORMANT) != 0; } - inline bool IsReady() const { return (status & THREADSTATUS_READY) != 0; } - inline bool IsWaiting() const { return (status & THREADSTATUS_WAIT) != 0; } - inline bool IsSuspended() const { return (status & THREADSTATUS_SUSPEND) != 0; } - inline bool IsIdle() const { return idle; } - bool ShouldWait() override; void Acquire() override; + /** + * Checks if the thread is an idle (stub) thread + * @return True if the thread is an idle (stub) thread, false otherwise + */ + inline bool IsIdle() const { return idle; } + + /** + * Gets the thread's current priority + * @return The current thread's priority + */ s32 GetPriority() const { return current_priority; } + + /** + * Sets the thread's current priority + * @param priority The new priority + */ void SetPriority(s32 priority); + /** + * Gets the thread's thread ID + * @return The thread's ID + */ u32 GetThreadId() const { return thread_id; } - - void Stop(const char* reason); /** * Release an acquired wait object @@ -78,12 +99,14 @@ public: */ void ReleaseWaitObject(WaitObject* wait_object); - /// Resumes a thread from waiting by marking it as "ready" + /** + * Resumes a thread from waiting + */ void ResumeFromWait(); /** - * Schedules an event to wake up the specified thread after the specified delay. - * @param nanoseconds The time this thread will be allowed to sleep for. + * Schedules an event to wake up the specified thread after the specified delay + * @param nanoseconds The time this thread will be allowed to sleep for */ void WakeAfterDelay(s64 nanoseconds); @@ -99,6 +122,11 @@ public: */ void SetWaitSynchronizationOutput(s32 output); + /** + * Stops a thread, invalidating it from further use + */ + void Stop(); + Core::ThreadContext context; u32 thread_id; @@ -106,7 +134,6 @@ public: u32 status; u32 entry_point; u32 stack_top; - u32 stack_size; s32 initial_priority; s32 current_priority; @@ -136,31 +163,49 @@ private: extern SharedPtr<Thread> g_main_thread; -/// Sets up the primary application thread -SharedPtr<Thread> SetupMainThread(s32 priority, u32 stack_size); +/** + * Sets up the primary application thread + * @param stack_size The size of the thread's stack + * @param entry_point The address at which the thread should start execution + * @param priority The priority to give the main thread + * @return A shared pointer to the main thread + */ +SharedPtr<Thread> SetupMainThread(u32 stack_size, u32 entry_point, s32 priority); -/// Reschedules to the next available thread (call after current thread is suspended) +/** + * Reschedules to the next available thread (call after current thread is suspended) + */ void Reschedule(); -/// Arbitrate the highest priority thread that is waiting +/** + * Arbitrate the highest priority thread that is waiting + * @param address The address for which waiting threads should be arbitrated + */ Thread* ArbitrateHighestPriorityThread(u32 address); -/// Arbitrate all threads currently waiting... +/** + * Arbitrate all threads currently waiting. + * @param address The address for which waiting threads should be arbitrated + */ void ArbitrateAllThreads(u32 address); -/// Gets the current thread +/** + * Gets the current thread + */ Thread* GetCurrentThread(); -/// Waits the current thread on a sleep +/** + * Waits the current thread on a sleep + */ void WaitCurrentThread_Sleep(); /** * Waits the current thread from a WaitSynchronization call - * @param wait_object Kernel object that we are waiting on + * @param wait_objects Kernel objects that we are waiting on * @param wait_set_output If true, set the output parameter on thread wakeup (for WaitSynchronizationN only) * @param wait_all If true, wait on all objects before resuming (for WaitSynchronizationN only) */ -void WaitCurrentThread_WaitSynchronization(SharedPtr<WaitObject> wait_object, bool wait_set_output, bool wait_all); +void WaitCurrentThread_WaitSynchronization(std::vector<SharedPtr<WaitObject>> wait_objects, bool wait_set_output, bool wait_all); /** * Waits the current thread from an ArbitrateAddress call @@ -172,14 +217,18 @@ void WaitCurrentThread_ArbitrateAddress(VAddr wait_address); * Sets up the idle thread, this is a thread that is intended to never execute instructions, * only to advance the timing. It is scheduled when there are no other ready threads in the thread queue * and will try to yield on every call. - * @returns The handle of the idle thread + * @return The handle of the idle thread */ SharedPtr<Thread> SetupIdleThread(); -/// Initialize threading +/** + * Initialize threading + */ void ThreadingInit(); -/// Shutdown threading +/** + * Shutdown threading + */ void ThreadingShutdown(); } // namespace diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp index 34a27917f..96da29923 100644 --- a/src/core/hle/svc.cpp +++ b/src/core/hle/svc.cpp @@ -144,17 +144,17 @@ static ResultCode WaitSynchronization1(Handle handle, s64 nano_seconds) { LOG_TRACE(Kernel_SVC, "called handle=0x%08X(%s:%s), nanoseconds=%lld", handle, object->GetTypeName().c_str(), object->GetName().c_str(), nano_seconds); + HLE::Reschedule(__func__); + // Check for next thread to schedule if (object->ShouldWait()) { object->AddWaitingThread(Kernel::GetCurrentThread()); - Kernel::WaitCurrentThread_WaitSynchronization(object, false, false); + Kernel::WaitCurrentThread_WaitSynchronization({ object }, false, false); // Create an event to wake the thread up after the specified nanosecond delay has passed Kernel::GetCurrentThread()->WakeAfterDelay(nano_seconds); - HLE::Reschedule(__func__); - // NOTE: output of this SVC will be set later depending on how the thread resumes return RESULT_INVALID; } @@ -212,25 +212,29 @@ static ResultCode WaitSynchronizationN(s32* out, Handle* handles, s32 handle_cou // NOTE: This should deadlock the current thread if no timeout was specified if (!wait_all) { wait_thread = true; - Kernel::WaitCurrentThread_WaitSynchronization(nullptr, true, wait_all); } } + HLE::Reschedule(__func__); + // If thread should wait, then set its state to waiting and then reschedule... if (wait_thread) { // Actually wait the current thread on each object if we decided to wait... + std::vector<SharedPtr<Kernel::WaitObject>> wait_objects; + wait_objects.reserve(handle_count); + for (int i = 0; i < handle_count; ++i) { auto object = Kernel::g_handle_table.GetWaitObject(handles[i]); object->AddWaitingThread(Kernel::GetCurrentThread()); - Kernel::WaitCurrentThread_WaitSynchronization(object, true, wait_all); + wait_objects.push_back(object); } + Kernel::WaitCurrentThread_WaitSynchronization(std::move(wait_objects), true, wait_all); + // Create an event to wake the thread up after the specified nanosecond delay has passed Kernel::GetCurrentThread()->WakeAfterDelay(nano_seconds); - HLE::Reschedule(__func__); - // NOTE: output of this SVC will be set later depending on how the thread resumes return RESULT_INVALID; } @@ -319,7 +323,7 @@ static ResultCode CreateThread(u32* out_handle, u32 priority, u32 entry_point, u } CASCADE_RESULT(SharedPtr<Thread> thread, Kernel::Thread::Create( - name, entry_point, priority, arg, processor_id, stack_top, Kernel::DEFAULT_STACK_SIZE)); + name, entry_point, priority, arg, processor_id, stack_top)); CASCADE_RESULT(*out_handle, Kernel::g_handle_table.Create(std::move(thread))); LOG_TRACE(Kernel_SVC, "called entrypoint=0x%08X (%s), arg=0x%08X, stacktop=0x%08X, " @@ -338,7 +342,7 @@ static ResultCode CreateThread(u32* out_handle, u32 priority, u32 entry_point, u static void ExitThread() { LOG_TRACE(Kernel_SVC, "called, pc=0x%08X", Core::g_app_core->GetPC()); - Kernel::GetCurrentThread()->Stop(__func__); + Kernel::GetCurrentThread()->Stop(); HLE::Reschedule(__func__); } |