-
-
Notifications
You must be signed in to change notification settings - Fork 34k
Open
Labels
interpreter-core(Objects, Python, Grammar, and Parser dirs)(Objects, Python, Grammar, and Parser dirs)performancePerformance or resource usagePerformance or resource usagetopic-free-threadingtype-featureA feature request or enhancementA feature request or enhancement
Description
Proposal:
_Py_yield() currently relies on sched_yield()/SwitchToThread(), which are OS-level syscalls. We can replace these with lightweight CPU pause instructions (x86 PAUSE, AArch64 WFE, etc.) as CPython's bundled mimalloc already does in mi_atomic_yield():
cpython/Include/internal/mimalloc/mimalloc/atomic.h
Lines 323 to 389 in e682141
| // Yield | |
| #if defined(__cplusplus) | |
| #include <thread> | |
| static inline void mi_atomic_yield(void) { | |
| std::this_thread::yield(); | |
| } | |
| #elif defined(_WIN32) | |
| #define WIN32_LEAN_AND_MEAN | |
| #include <windows.h> | |
| static inline void mi_atomic_yield(void) { | |
| YieldProcessor(); | |
| } | |
| #elif defined(__SSE2__) | |
| #include <emmintrin.h> | |
| static inline void mi_atomic_yield(void) { | |
| _mm_pause(); | |
| } | |
| #elif (defined(__GNUC__) || defined(__clang__)) && \ | |
| (defined(__x86_64__) || defined(__i386__) || \ | |
| defined(__aarch64__) || defined(__arm__) || \ | |
| defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__)) | |
| #if defined(__x86_64__) || defined(__i386__) | |
| static inline void mi_atomic_yield(void) { | |
| __asm__ volatile ("pause" ::: "memory"); | |
| } | |
| #elif defined(__aarch64__) | |
| static inline void mi_atomic_yield(void) { | |
| __asm__ volatile("wfe"); | |
| } | |
| #elif defined(__arm__) | |
| #if __ARM_ARCH >= 7 | |
| static inline void mi_atomic_yield(void) { | |
| __asm__ volatile("yield" ::: "memory"); | |
| } | |
| #else | |
| static inline void mi_atomic_yield(void) { | |
| __asm__ volatile ("nop" ::: "memory"); | |
| } | |
| #endif | |
| #elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__) | |
| #ifdef __APPLE__ | |
| static inline void mi_atomic_yield(void) { | |
| __asm__ volatile ("or r27,r27,r27" ::: "memory"); | |
| } | |
| #else | |
| static inline void mi_atomic_yield(void) { | |
| __asm__ __volatile__ ("or 27,27,27" ::: "memory"); | |
| } | |
| #endif | |
| #endif | |
| #elif defined(__sun) | |
| // Fallback for other archs | |
| #include <synch.h> | |
| static inline void mi_atomic_yield(void) { | |
| smt_pause(); | |
| } | |
| #elif defined(__wasi__) | |
| #include <sched.h> | |
| static inline void mi_atomic_yield(void) { | |
| sched_yield(); | |
| } | |
| #else | |
| #include <unistd.h> | |
| static inline void mi_atomic_yield(void) { | |
| sleep(0); | |
| } | |
| #endif |
Has this already been discussed elsewhere?
No response given
Links to previous discussion of this feature:
No response
Linked PRs
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
interpreter-core(Objects, Python, Grammar, and Parser dirs)(Objects, Python, Grammar, and Parser dirs)performancePerformance or resource usagePerformance or resource usagetopic-free-threadingtype-featureA feature request or enhancementA feature request or enhancement