diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index e31d8b4e5c68c9..a6c522bf51dc61 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -13,6 +13,10 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +#if defined(MS_WINDOWS) +# include // _mm_pause(), __yield() +#endif + //_Py_UNLOCKED is defined as 0 and _Py_LOCKED as 1 in Include/cpython/pylock.h #define _Py_HAS_PARKED 2 #define _Py_ONCE_INITIALIZED 4 @@ -70,8 +74,27 @@ PyMutex_LockFlags(PyMutex *m, _PyLockFlags flags) // error messages) otherwise returns 0. extern int _PyMutex_TryUnlock(PyMutex *m); -// Yield the processor to other threads (e.g., sched_yield). -extern void _Py_yield(void); +// Lightweight CPU pause hint for spin-wait loops (e.g., x86 PAUSE, AArch64 WFE). +// Falls back to sched_yield() on platforms without a known pause instruction. +static inline void +_Py_yield(void) +{ +#if defined(__x86_64__) || defined(__i386__) + __asm__ volatile ("pause" ::: "memory"); +#elif defined(__aarch64__) + __asm__ volatile ("wfe"); +#elif defined(__arm__) && __ARM_ARCH >= 7 + __asm__ volatile ("yield" ::: "memory"); +#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) + __asm__ volatile ("or 27,27,27" ::: "memory"); +#elif defined(_M_X64) || defined(_M_IX86) + _mm_pause(); +#elif defined(_M_ARM64) || defined(_M_ARM) + __yield(); +#elif defined(HAVE_SCHED_H) + sched_yield(); +#endif +} // PyEvent is a one-time event notification diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-02-08-16-43-01.gh-issue-144586.iApw3M.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-08-16-43-01.gh-issue-144586.iApw3M.rst new file mode 100644 index 00000000000000..a9e4122b7423fc --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-08-16-43-01.gh-issue-144586.iApw3M.rst @@ -0,0 +1,2 @@ +Improve _Py_yield to use light weight cpu instruction. Patch By Donghee +Na. diff --git a/Python/lock.c b/Python/lock.c index ad97bfd93c8495..1fec9772229651 100644 --- a/Python/lock.c +++ b/Python/lock.c @@ -8,14 +8,6 @@ #include "pycore_time.h" // _PyTime_Add() #include "pycore_stats.h" // FT_STAT_MUTEX_SLEEP_INC() -#ifdef MS_WINDOWS -# ifndef WIN32_LEAN_AND_MEAN -# define WIN32_LEAN_AND_MEAN -# endif -# include // SwitchToThread() -#elif defined(HAVE_SCHED_H) -# include // sched_yield() -#endif // If a thread waits on a lock for longer than TIME_TO_BE_FAIR_NS (1 ms), then // the unlocking thread directly hands off ownership of the lock. This avoids @@ -40,16 +32,6 @@ struct mutex_entry { int handed_off; }; -void -_Py_yield(void) -{ -#ifdef MS_WINDOWS - SwitchToThread(); -#elif defined(HAVE_SCHED_H) - sched_yield(); -#endif -} - PyLockStatus _PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags) {