-
-
Notifications
You must be signed in to change notification settings - Fork 34k
gh-144356: Make set iterator __length_hint__ and iternext race-safe under no-gil
#144357
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
47d75fe
3e3785c
229ced3
cdcf88a
21f1478
a18c698
79b5fbc
6ac15e0
3222eef
7fb39bc
78241a8
52285a2
c9ece31
e41e852
b32b5ec
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -148,6 +148,61 @@ def read_set(): | |||||
| for t in threads: | ||||||
| t.join() | ||||||
|
|
||||||
| @threading_helper.reap_threads | ||||||
| def test_length_hint_used_race(self): | ||||||
| NUM_ITERS = 10 | ||||||
| NUM_THREADS = 10 | ||||||
| NUM_LOOPS = 2_000 | ||||||
|
|
||||||
| for _ in range(NUM_ITERS): | ||||||
| s = set(range(2000)) | ||||||
| it = iter(s) | ||||||
|
|
||||||
| def worker(): | ||||||
| for i in range(NUM_LOOPS): | ||||||
| it.__length_hint__() | ||||||
| s.add(i) | ||||||
| s.discard(i - 1) | ||||||
|
|
||||||
| threading_helper.run_concurrently(worker, nthreads=NUM_THREADS) | ||||||
|
|
||||||
| @threading_helper.reap_threads | ||||||
| def test_length_hint_exhaust_race(self): | ||||||
| NUM_ITERS = 50 | ||||||
| NUM_THREADS = 10 | ||||||
|
|
||||||
| for _ in range(NUM_ITERS): | ||||||
| s = set(range(256)) | ||||||
| it = iter(s) | ||||||
|
|
||||||
| def worker(): | ||||||
| while True: | ||||||
| it.__length_hint__() | ||||||
| try: | ||||||
| next(it) | ||||||
| except StopIteration: | ||||||
| break | ||||||
|
|
||||||
| threading_helper.run_concurrently(worker, nthreads=NUM_THREADS) | ||||||
|
|
||||||
| @threading_helper.reap_threads | ||||||
| def test_iternext_concurrent_exhaust_race(self): | ||||||
| NUM_ITERS = 200 | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Reduce this to keep computation time low, same for some of the other parameters. TSAN will catch most threading issues even with a low number of iterations/loops. |
||||||
| NUM_THREADS = 10 | ||||||
|
|
||||||
| for _ in range(NUM_ITERS): | ||||||
| s = set(range(64)) | ||||||
| it = iter(s) | ||||||
|
|
||||||
| def worker(): | ||||||
| while True: | ||||||
| try: | ||||||
| next(it) | ||||||
| except StopIteration: | ||||||
| break | ||||||
|
|
||||||
| threading_helper.run_concurrently(worker, nthreads=NUM_THREADS) | ||||||
|
|
||||||
|
|
||||||
| @threading_helper.requires_working_threading() | ||||||
| class SmallSetTest(RaceTestBase, unittest.TestCase): | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| Fix potential races in set iterators (``__length_hint__`` and iteration) in free-threaded builds. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1056,8 +1056,23 @@ setiter_len(PyObject *op, PyObject *Py_UNUSED(ignored)) | |
| { | ||
| setiterobject *si = (setiterobject*)op; | ||
| Py_ssize_t len = 0; | ||
| if (si->si_set != NULL && si->si_used == si->si_set->used) | ||
|
|
||
| #ifdef Py_GIL_DISABLED | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This might work for For several other iterations the approach is to keep the reference Note: I tried creating a minimal example where concurrent iteration fails, but I have succeeded yet (the example does not crash, although I have not run thread sanitizer on it yet) Test for concurrent iteration on set iterator
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you. I think your points make a lot of sense, and I really appreciate the two links you shared—they helped me get a more complete picture of the iterator-related data race.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, we should fix this like we have fixed others and as Sam suggested only clear the associated set in non-free-threading builds. The current code is incorrect because it uses try incref which can fail spuriously if the set object is not marked to enable try incref.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we also set Also the two code paths in
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks — using a flag makes the code clearer. |
||
| PySetObject *so = si->si_set; | ||
| if (so != NULL) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the FT build |
||
| Py_BEGIN_CRITICAL_SECTION2(op, so); | ||
| if (si->si_pos >= 0 && si->si_used == so->used) | ||
| { | ||
| len = si->len; | ||
| } | ||
| Py_END_CRITICAL_SECTION2(); | ||
| } | ||
| #else | ||
| if (si->si_set != NULL && si->si_used == si->si_set->used) { | ||
| len = si->len; | ||
| } | ||
| #endif | ||
|
|
||
| return PyLong_FromSsize_t(len); | ||
| } | ||
|
|
||
|
|
@@ -1089,17 +1104,22 @@ static PyMethodDef setiter_methods[] = { | |
| {NULL, NULL} /* sentinel */ | ||
| }; | ||
|
|
||
| static PyObject *setiter_iternext(PyObject *self) | ||
| static PyObject * | ||
| setiter_iternext(PyObject *self) | ||
| { | ||
| setiterobject *si = (setiterobject*)self; | ||
| PyObject *key = NULL; | ||
| Py_ssize_t i, mask; | ||
| setentry *entry; | ||
| PySetObject *so = si->si_set; | ||
| #ifndef Py_GIL_DISABLED | ||
| int decref_so = 0; | ||
| #endif | ||
|
|
||
| if (so == NULL) | ||
| if (so == NULL) { | ||
| return NULL; | ||
| assert (PyAnySet_Check(so)); | ||
| } | ||
| assert(PyAnySet_Check(so)); | ||
|
|
||
| Py_ssize_t so_used = FT_ATOMIC_LOAD_SSIZE_RELAXED(so->used); | ||
| Py_ssize_t si_used = FT_ATOMIC_LOAD_SSIZE_RELAXED(si->si_used); | ||
|
|
@@ -1110,26 +1130,53 @@ static PyObject *setiter_iternext(PyObject *self) | |
| return NULL; | ||
| } | ||
|
|
||
| #ifdef Py_GIL_DISABLED | ||
| Py_BEGIN_CRITICAL_SECTION2(self, so); | ||
| #else | ||
| Py_BEGIN_CRITICAL_SECTION(so); | ||
| #endif | ||
|
|
||
| i = si->si_pos; | ||
| assert(i>=0); | ||
| #ifdef Py_GIL_DISABLED | ||
| if (i < 0) { | ||
| /* iterator already exhausted */ | ||
| goto done; | ||
| } | ||
| #endif | ||
|
|
||
| entry = so->table; | ||
| mask = so->mask; | ||
| while (i <= mask && (entry[i].key == NULL || entry[i].key == dummy)) { | ||
| i++; | ||
| } | ||
| if (i <= mask) { | ||
| key = Py_NewRef(entry[i].key); | ||
| si->si_pos = i + 1; | ||
| si->len--; | ||
| } | ||
| Py_END_CRITICAL_SECTION(); | ||
| si->si_pos = i+1; | ||
| if (key == NULL) { | ||
| else { | ||
| /* exhausted */ | ||
| si->si_pos = -1; | ||
| si->len = 0; | ||
| #ifndef Py_GIL_DISABLED | ||
| si->si_set = NULL; | ||
| decref_so = 1; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can do the |
||
| #endif | ||
| } | ||
|
|
||
| #ifdef Py_GIL_DISABLED | ||
| done: | ||
| Py_END_CRITICAL_SECTION2(); | ||
| return key; | ||
| #else | ||
| Py_END_CRITICAL_SECTION(); | ||
|
|
||
| if (decref_so) { | ||
| Py_DECREF(so); | ||
| return NULL; | ||
| } | ||
| si->len--; | ||
| return key; | ||
| #endif | ||
| } | ||
|
|
||
| PyTypeObject PySetIter_Type = { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
test_iternext_concurrent_exhaust_raceandtest_length_hint_exhaust_racetest slightly difference things, but I think they can be combined (to reduce pressure on the CI)