From 21bd43c7e5eae76fb2d3738b8f1b1c727b3f545e Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sun, 8 Feb 2026 16:08:01 +0900 Subject: [PATCH 1/5] gh-144586: Improve _Py_yield to improve light weight cpu instruction --- Include/internal/pycore_lock.h | 4 +++- Python/lock.c | 14 +++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index e31d8b4e5c68c9..827366830a6ba6 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -70,7 +70,9 @@ PyMutex_LockFlags(PyMutex *m, _PyLockFlags flags) // error messages) otherwise returns 0. extern int _PyMutex_TryUnlock(PyMutex *m); -// Yield the processor to other threads (e.g., sched_yield). +// Yield the processor using a lightweight CPU pause hint (e.g., x86 PAUSE, +// AArch64 WFE). Falls back to sched_yield()/SwitchToThread() on platforms +// without a known pause instruction. extern void _Py_yield(void); diff --git a/Python/lock.c b/Python/lock.c index ad97bfd93c8495..996fbd718a03ef 100644 --- a/Python/lock.c +++ b/Python/lock.c @@ -43,7 +43,19 @@ struct mutex_entry { void _Py_yield(void) { -#ifdef MS_WINDOWS +#if defined(__GNUC__) || defined(__clang__) +# if defined(__x86_64__) || defined(__i386__) + __asm__ volatile ("pause" ::: "memory"); +# elif defined(__aarch64__) + __asm__ volatile ("wfe"); +# elif defined(__arm__) && __ARM_ARCH >= 7 + __asm__ volatile ("yield" ::: "memory"); +# elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) + __asm__ volatile ("or 27,27,27" ::: "memory"); +# else + sched_yield(); +# endif +#elif defined(MS_WINDOWS) SwitchToThread(); #elif defined(HAVE_SCHED_H) sched_yield(); From 9b90b96fe1b11906233833624d82e3d81a99ba6d Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sun, 8 Feb 2026 16:43:11 +0900 Subject: [PATCH 2/5] Add NEWS.d --- .../2026-02-08-16-43-01.gh-issue-144586.iApw3M.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-02-08-16-43-01.gh-issue-144586.iApw3M.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-02-08-16-43-01.gh-issue-144586.iApw3M.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-08-16-43-01.gh-issue-144586.iApw3M.rst new file mode 100644 index 00000000000000..3ce3f57e777d0c --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-08-16-43-01.gh-issue-144586.iApw3M.rst @@ -0,0 +1,2 @@ +Improve _Py_yield to improve light weight cpu instruction. Patch By Donghee +Na. From d1a986c13246bf5473c945f8b6952ec7c2cb7426 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sun, 8 Feb 2026 22:38:16 +0900 Subject: [PATCH 3/5] Address code review --- Include/internal/pycore_lock.h | 27 +++++++++++++++++++++++---- Python/lock.c | 30 ------------------------------ 2 files changed, 23 insertions(+), 34 deletions(-) diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index 827366830a6ba6..1fefa0ff7b1826 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -13,6 +13,10 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +#if defined(MS_WINDOWS) +# include // _mm_pause() +#endif + //_Py_UNLOCKED is defined as 0 and _Py_LOCKED as 1 in Include/cpython/pylock.h #define _Py_HAS_PARKED 2 #define _Py_ONCE_INITIALIZED 4 @@ -70,10 +74,25 @@ PyMutex_LockFlags(PyMutex *m, _PyLockFlags flags) // error messages) otherwise returns 0. extern int _PyMutex_TryUnlock(PyMutex *m); -// Yield the processor using a lightweight CPU pause hint (e.g., x86 PAUSE, -// AArch64 WFE). Falls back to sched_yield()/SwitchToThread() on platforms -// without a known pause instruction. -extern void _Py_yield(void); +// Lightweight CPU pause hint for spin-wait loops (e.g., x86 PAUSE, AArch64 WFE). +// Falls back to sched_yield() on platforms without a known pause instruction. +static inline void +_Py_yield(void) +{ +#if defined(__x86_64__) || defined(__i386__) + __asm__ volatile ("pause" ::: "memory"); +#elif defined(__aarch64__) + __asm__ volatile ("wfe"); +#elif defined(__arm__) && __ARM_ARCH >= 7 + __asm__ volatile ("yield" ::: "memory"); +#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) + __asm__ volatile ("or 27,27,27" ::: "memory"); +#elif defined(MS_WINDOWS) + _mm_pause(); +#elif defined(HAVE_SCHED_H) + sched_yield(); +#endif +} // PyEvent is a one-time event notification diff --git a/Python/lock.c b/Python/lock.c index 996fbd718a03ef..1fec9772229651 100644 --- a/Python/lock.c +++ b/Python/lock.c @@ -8,14 +8,6 @@ #include "pycore_time.h" // _PyTime_Add() #include "pycore_stats.h" // FT_STAT_MUTEX_SLEEP_INC() -#ifdef MS_WINDOWS -# ifndef WIN32_LEAN_AND_MEAN -# define WIN32_LEAN_AND_MEAN -# endif -# include // SwitchToThread() -#elif defined(HAVE_SCHED_H) -# include // sched_yield() -#endif // If a thread waits on a lock for longer than TIME_TO_BE_FAIR_NS (1 ms), then // the unlocking thread directly hands off ownership of the lock. This avoids @@ -40,28 +32,6 @@ struct mutex_entry { int handed_off; }; -void -_Py_yield(void) -{ -#if defined(__GNUC__) || defined(__clang__) -# if defined(__x86_64__) || defined(__i386__) - __asm__ volatile ("pause" ::: "memory"); -# elif defined(__aarch64__) - __asm__ volatile ("wfe"); -# elif defined(__arm__) && __ARM_ARCH >= 7 - __asm__ volatile ("yield" ::: "memory"); -# elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) - __asm__ volatile ("or 27,27,27" ::: "memory"); -# else - sched_yield(); -# endif -#elif defined(MS_WINDOWS) - SwitchToThread(); -#elif defined(HAVE_SCHED_H) - sched_yield(); -#endif -} - PyLockStatus _PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags) { From 2fe2bbbea7516893fe4f9075ce278980431ebb91 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sun, 8 Feb 2026 22:46:32 +0900 Subject: [PATCH 4/5] Fix for windows --- Include/internal/pycore_lock.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index 1fefa0ff7b1826..a6c522bf51dc61 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -14,7 +14,7 @@ extern "C" { #endif #if defined(MS_WINDOWS) -# include // _mm_pause() +# include // _mm_pause(), __yield() #endif //_Py_UNLOCKED is defined as 0 and _Py_LOCKED as 1 in Include/cpython/pylock.h @@ -87,8 +87,10 @@ _Py_yield(void) __asm__ volatile ("yield" ::: "memory"); #elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) __asm__ volatile ("or 27,27,27" ::: "memory"); -#elif defined(MS_WINDOWS) +#elif defined(_M_X64) || defined(_M_IX86) _mm_pause(); +#elif defined(_M_ARM64) || defined(_M_ARM) + __yield(); #elif defined(HAVE_SCHED_H) sched_yield(); #endif From 29ebf1bd2ffb243b336111f23737a98d2b8b0506 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sun, 8 Feb 2026 23:00:42 +0900 Subject: [PATCH 5/5] nit --- .../2026-02-08-16-43-01.gh-issue-144586.iApw3M.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-02-08-16-43-01.gh-issue-144586.iApw3M.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-08-16-43-01.gh-issue-144586.iApw3M.rst index 3ce3f57e777d0c..a9e4122b7423fc 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2026-02-08-16-43-01.gh-issue-144586.iApw3M.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-02-08-16-43-01.gh-issue-144586.iApw3M.rst @@ -1,2 +1,2 @@ -Improve _Py_yield to improve light weight cpu instruction. Patch By Donghee +Improve _Py_yield to use light weight cpu instruction. Patch By Donghee Na.