From d9a2e587fca4b4583dcea66d855b36b84320f98f Mon Sep 17 00:00:00 2001 From: NewUserHa <32261870+NewUserHa@users.noreply.github.com> Date: Fri, 6 Feb 2026 01:45:14 +0800 Subject: [PATCH 1/6] gh-142407: Clarify copy performance on Windows in shutil docs (GH-142408) --- Doc/library/shutil.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index 2c15fed8dd5e4d..ec3c8d600ad171 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -540,7 +540,9 @@ On Solaris :func:`os.sendfile` is used. On Windows :func:`shutil.copyfile` uses a bigger default buffer size (1 MiB instead of 64 KiB) and a :func:`memoryview`-based variant of -:func:`shutil.copyfileobj` is used. +:func:`shutil.copyfileobj` is used, which is still reads and writes in a loop. +:func:`shutil.copy2` uses the native ``CopyFile2`` call on Windows, which is the most +efficient method, supports copy-on-write, and preserves metadata. If the fast-copy operation fails and no data was written in the destination file then shutil will silently fallback on using less efficient From 50e107f14996b55c60355fb901513e84cc2e589a Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 5 Feb 2026 12:04:12 -0600 Subject: [PATCH 2/6] More realistic lru_cache example (gh-144517) --- Doc/library/functools.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/functools.rst b/Doc/library/functools.rst index 221c0712c7c96a..b7c34bc64135ba 100644 --- a/Doc/library/functools.rst +++ b/Doc/library/functools.rst @@ -180,8 +180,8 @@ The :mod:`functools` module defines the following functions: the *maxsize* at its default value of 128:: @lru_cache - def count_vowels(sentence): - return sum(sentence.count(vowel) for vowel in 'AEIOUaeiou') + def count_vowels(word): + return sum(word.count(vowel) for vowel in 'AEIOUaeiou') If *maxsize* is set to ``None``, the LRU feature is disabled and the cache can grow without bound. From c81e1843d4bc0a51cf4f77d19b5ac4e49f714a0d Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 5 Feb 2026 20:06:33 +0200 Subject: [PATCH 3/6] gh-74955: Document that __all__ must contain strings in normalization form NFKC (GH-144504) --- Doc/reference/simple_stmts.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Doc/reference/simple_stmts.rst b/Doc/reference/simple_stmts.rst index 643ca106548367..36b30c9b16b0db 100644 --- a/Doc/reference/simple_stmts.rst +++ b/Doc/reference/simple_stmts.rst @@ -831,7 +831,9 @@ where the :keyword:`import` statement occurs. The *public names* defined by a module are determined by checking the module's namespace for a variable named ``__all__``; if defined, it must be a sequence -of strings which are names defined or imported by that module. The names +of strings which are names defined or imported by that module. +Names containing non-ASCII characters must be in the `normalization form`_ +NFKC; see :ref:`lexical-names-nonascii` for details. The names given in ``__all__`` are all considered public and are required to exist. If ``__all__`` is not defined, the set of public names includes all names found in the module's namespace which do not begin with an underscore character @@ -865,6 +867,8 @@ determine dynamically the modules to be loaded. .. audit-event:: import module,filename,sys.path,sys.meta_path,sys.path_hooks import +.. _normalization form: https://www.unicode.org/reports/tr15/#Norm_Forms + .. _future: Future statements From 4644fed8190e4646663605f3e824f0767a0d026d Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 5 Feb 2026 21:14:49 +0200 Subject: [PATCH 4/6] gh-144001: Support ignoring the invalid pad character in Base64 decoding (GH-144306) --- Doc/library/base64.rst | 3 + Doc/library/binascii.rst | 3 + Lib/test/test_base64.py | 13 ++-- Lib/test/test_binascii.py | 126 ++++++++++++++++++++++++++------------ Modules/binascii.c | 34 +++++----- 5 files changed, 118 insertions(+), 61 deletions(-) diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index 478686bc30035c..554d6e7d04ded2 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -87,6 +87,9 @@ POST request. If *ignorechars* is specified, it should be a :term:`bytes-like object` containing characters to ignore from the input when *validate* is true. + If *ignorechars* contains the pad character ``'='``, the pad characters + presented before the end of the encoded data and the excess pad characters + will be ignored. The default value of *validate* is ``True`` if *ignorechars* is specified, ``False`` otherwise. diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst index d9f0baedec85f2..eb801175ee6179 100644 --- a/Doc/library/binascii.rst +++ b/Doc/library/binascii.rst @@ -56,6 +56,9 @@ The :mod:`binascii` module defines the following functions: If *ignorechars* is specified, it should be a :term:`bytes-like object` containing characters to ignore from the input when *strict_mode* is true. + If *ignorechars* contains the pad character ``'='``, the pad characters + presented before the end of the encoded data and the excess pad characters + will be ignored. The default value of *strict_mode* is ``True`` if *ignorechars* is specified, ``False`` otherwise. diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 0f947409f0694b..fef18a1b757c08 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -306,7 +306,7 @@ def test_b64decode_invalid_chars(self): # issue 1466065: Test some invalid characters. tests = ((b'%3d==', b'\xdd', b'%$'), (b'$3d==', b'\xdd', b'%$'), - (b'[==', b'', None), + (b'[==', b'', b'[='), (b'YW]3=', b'am', b']'), (b'3{d==', b'\xdd', b'{}'), (b'3d}==', b'\xdd', b'{}'), @@ -314,6 +314,12 @@ def test_b64decode_invalid_chars(self): (b'!', b'', b'@!'), (b"YWJj\n", b"abc", b'\n'), (b'YWJj\nYWI=', b'abcab', b'\n'), + (b'=YWJj', b'abc', b'='), + (b'Y=WJj', b'abc', b'='), + (b'Y==WJj', b'abc', b'='), + (b'Y===WJj', b'abc', b'='), + (b'YW=Jj', b'abc', b'='), + (b'YWJj=', b'abc', b'='), (b'YW\nJj', b'abc', b'\n'), (b'YW\nJj', b'abc', bytearray(b'\n')), (b'YW\nJj', b'abc', memoryview(b'\n')), @@ -335,9 +341,8 @@ def test_b64decode_invalid_chars(self): with self.assertRaises(binascii.Error): # Even empty ignorechars enables the strict mode. base64.b64decode(bstr, ignorechars=b'') - if ignorechars is not None: - r = base64.b64decode(bstr, ignorechars=ignorechars) - self.assertEqual(r, res) + r = base64.b64decode(bstr, ignorechars=ignorechars) + self.assertEqual(r, res) with self.assertRaises(TypeError): base64.b64decode(b'', ignorechars='') diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 49accb08b62e40..a4928794e0acfb 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -118,66 +118,78 @@ def addnoise(line): # empty strings. TBD: shouldn't it raise an exception instead ? self.assertEqual(binascii.a2b_base64(self.type2test(fillers)), b'') - def test_base64_strict_mode(self): - # Test base64 with strict mode on - def _assertRegexTemplate(assert_regex: str, data: bytes, non_strict_mode_expected_result: bytes): + def test_base64_bad_padding(self): + # Test malformed padding + def _assertRegexTemplate(assert_regex, data, + non_strict_mode_expected_result): + data = self.type2test(data) with self.assertRaisesRegex(binascii.Error, assert_regex): - binascii.a2b_base64(self.type2test(data), strict_mode=True) - self.assertEqual(binascii.a2b_base64(self.type2test(data), strict_mode=False), + binascii.a2b_base64(data, strict_mode=True) + self.assertEqual(binascii.a2b_base64(data, strict_mode=False), non_strict_mode_expected_result) - self.assertEqual(binascii.a2b_base64(self.type2test(data)), + self.assertEqual(binascii.a2b_base64(data, strict_mode=True, + ignorechars=b'='), + non_strict_mode_expected_result) + self.assertEqual(binascii.a2b_base64(data), non_strict_mode_expected_result) - def assertExcessData(data, non_strict_mode_expected_result: bytes): - _assertRegexTemplate(r'(?i)Excess data', data, non_strict_mode_expected_result) - - def assertNonBase64Data(data, non_strict_mode_expected_result: bytes): - _assertRegexTemplate(r'(?i)Only base64 data', data, non_strict_mode_expected_result) + def assertLeadingPadding(*args): + _assertRegexTemplate(r'(?i)Leading padding', *args) - def assertLeadingPadding(data, non_strict_mode_expected_result: bytes): - _assertRegexTemplate(r'(?i)Leading padding', data, non_strict_mode_expected_result) + def assertDiscontinuousPadding(*args): + _assertRegexTemplate(r'(?i)Discontinuous padding', *args) - def assertDiscontinuousPadding(data, non_strict_mode_expected_result: bytes): - _assertRegexTemplate(r'(?i)Discontinuous padding', data, non_strict_mode_expected_result) + def assertExcessPadding(*args): + _assertRegexTemplate(r'(?i)Excess padding', *args) - def assertExcessPadding(data, non_strict_mode_expected_result: bytes): - _assertRegexTemplate(r'(?i)Excess padding', data, non_strict_mode_expected_result) + def assertInvalidLength(*args): + _assertRegexTemplate(r'(?i)Invalid.+number of data characters', *args) - # Test excess data exceptions - assertExcessData(b'ab==a', b'i') assertExcessPadding(b'ab===', b'i') assertExcessPadding(b'ab====', b'i') - assertNonBase64Data(b'ab==:', b'i') - assertExcessData(b'abc=a', b'i\xb7') - assertNonBase64Data(b'abc=:', b'i\xb7') - assertNonBase64Data(b'ab==\n', b'i') assertExcessPadding(b'abc==', b'i\xb7') assertExcessPadding(b'abc===', b'i\xb7') assertExcessPadding(b'abc====', b'i\xb7') assertExcessPadding(b'abc=====', b'i\xb7') - # Test non-base64 data exceptions - assertNonBase64Data(b'\nab==', b'i') - assertNonBase64Data(b'ab:(){:|:&};:==', b'i') - assertNonBase64Data(b'a\nb==', b'i') - assertNonBase64Data(b'a\x00b==', b'i') - - # Test malformed padding assertLeadingPadding(b'=', b'') assertLeadingPadding(b'==', b'') assertLeadingPadding(b'===', b'') assertLeadingPadding(b'====', b'') assertLeadingPadding(b'=====', b'') + assertLeadingPadding(b'=abcd', b'i\xb7\x1d') + assertLeadingPadding(b'==abcd', b'i\xb7\x1d') + assertLeadingPadding(b'===abcd', b'i\xb7\x1d') + assertLeadingPadding(b'====abcd', b'i\xb7\x1d') + assertLeadingPadding(b'=====abcd', b'i\xb7\x1d') + + assertInvalidLength(b'a=b==', b'i') + assertInvalidLength(b'a=bc=', b'i\xb7') + assertInvalidLength(b'a=bc==', b'i\xb7') + assertInvalidLength(b'a=bcd', b'i\xb7\x1d') + assertInvalidLength(b'a=bcd=', b'i\xb7\x1d') + assertDiscontinuousPadding(b'ab=c=', b'i\xb7') - assertDiscontinuousPadding(b'ab=ab==', b'i\xb6\x9b') - assertNonBase64Data(b'ab=:=', b'i') + assertDiscontinuousPadding(b'ab=cd', b'i\xb7\x1d') + assertDiscontinuousPadding(b'ab=cd==', b'i\xb7\x1d') + assertExcessPadding(b'abcd=', b'i\xb7\x1d') assertExcessPadding(b'abcd==', b'i\xb7\x1d') assertExcessPadding(b'abcd===', b'i\xb7\x1d') assertExcessPadding(b'abcd====', b'i\xb7\x1d') assertExcessPadding(b'abcd=====', b'i\xb7\x1d') + assertExcessPadding(b'abcd==', b'i\xb7\x1d') + assertExcessPadding(b'abcd===', b'i\xb7\x1d') + assertExcessPadding(b'abcd====', b'i\xb7\x1d') + assertExcessPadding(b'abcd=====', b'i\xb7\x1d') + assertExcessPadding(b'abcd=efgh', b'i\xb7\x1dy\xf8!') + assertExcessPadding(b'abcd==efgh', b'i\xb7\x1dy\xf8!') + assertExcessPadding(b'abcd===efgh', b'i\xb7\x1dy\xf8!') + assertExcessPadding(b'abcd====efgh', b'i\xb7\x1dy\xf8!') + assertExcessPadding(b'abcd=====efgh', b'i\xb7\x1dy\xf8!') def test_base64_invalidchars(self): + # Test non-base64 data exceptions def assertNonBase64Data(data, expected, ignorechars): data = self.type2test(data) assert_regex = r'(?i)Only base64 data' @@ -195,10 +207,11 @@ def assertNonBase64Data(data, expected, ignorechars): assertNonBase64Data(b'ab:(){:|:&};:==', b'i', ignorechars=b':;(){}|&') assertNonBase64Data(b'a\nb==', b'i', ignorechars=b'\n') assertNonBase64Data(b'a\x00b==', b'i', ignorechars=b'\x00') + assertNonBase64Data(b'ab:==', b'i', ignorechars=b':') + assertNonBase64Data(b'ab=:=', b'i', ignorechars=b':') assertNonBase64Data(b'ab==:', b'i', ignorechars=b':') assertNonBase64Data(b'abc=:', b'i\xb7', ignorechars=b':') assertNonBase64Data(b'ab==\n', b'i', ignorechars=b'\n') - assertNonBase64Data(b'ab=:=', b'i', ignorechars=b':') assertNonBase64Data(b'a\nb==', b'i', ignorechars=bytearray(b'\n')) assertNonBase64Data(b'a\nb==', b'i', ignorechars=memoryview(b'\n')) @@ -221,11 +234,37 @@ def assertNonBase64Data(data, expected, ignorechars): with self.assertRaises(TypeError): binascii.a2b_base64(data, ignorechars=None) + def test_base64_excess_data(self): + # Test excess data exceptions + def assertExcessData(data, non_strict_expected, + ignore_padchar_expected=None): + assert_regex = r'(?i)Excess data' + data = self.type2test(data) + with self.assertRaisesRegex(binascii.Error, assert_regex): + binascii.a2b_base64(data, strict_mode=True) + self.assertEqual(binascii.a2b_base64(data, strict_mode=False), + non_strict_expected) + if ignore_padchar_expected is not None: + self.assertEqual(binascii.a2b_base64(data, strict_mode=True, + ignorechars=b'='), + ignore_padchar_expected) + self.assertEqual(binascii.a2b_base64(data), non_strict_expected) + + assertExcessData(b'ab==c', b'i') + assertExcessData(b'ab==cd', b'i', b'i\xb7\x1d') + assertExcessData(b'abc=d', b'i\xb7', b'i\xb7\x1d') + def test_base64errors(self): # Test base64 with invalid padding - def assertIncorrectPadding(data): + def assertIncorrectPadding(data, strict_mode=True): + data = self.type2test(data) with self.assertRaisesRegex(binascii.Error, r'(?i)Incorrect padding'): - binascii.a2b_base64(self.type2test(data)) + binascii.a2b_base64(data) + with self.assertRaisesRegex(binascii.Error, r'(?i)Incorrect padding'): + binascii.a2b_base64(data, strict_mode=False) + if strict_mode: + with self.assertRaisesRegex(binascii.Error, r'(?i)Incorrect padding'): + binascii.a2b_base64(data, strict_mode=True) assertIncorrectPadding(b'ab') assertIncorrectPadding(b'ab=') @@ -233,16 +272,22 @@ def assertIncorrectPadding(data): assertIncorrectPadding(b'abcdef') assertIncorrectPadding(b'abcdef=') assertIncorrectPadding(b'abcdefg') - assertIncorrectPadding(b'a=b=') - assertIncorrectPadding(b'a\nb=') + assertIncorrectPadding(b'a=b=', strict_mode=False) + assertIncorrectPadding(b'a\nb=', strict_mode=False) # Test base64 with invalid number of valid characters (1 mod 4) - def assertInvalidLength(data): + def assertInvalidLength(data, strict_mode=True): n_data_chars = len(re.sub(br'[^A-Za-z0-9/+]', br'', data)) + data = self.type2test(data) expected_errmsg_re = \ r'(?i)Invalid.+number of data characters.+' + str(n_data_chars) with self.assertRaisesRegex(binascii.Error, expected_errmsg_re): - binascii.a2b_base64(self.type2test(data)) + binascii.a2b_base64(data) + with self.assertRaisesRegex(binascii.Error, expected_errmsg_re): + binascii.a2b_base64(data, strict_mode=False) + if strict_mode: + with self.assertRaisesRegex(binascii.Error, expected_errmsg_re): + binascii.a2b_base64(data, strict_mode=True) assertInvalidLength(b'a') assertInvalidLength(b'a=') @@ -250,7 +295,8 @@ def assertInvalidLength(data): assertInvalidLength(b'a===') assertInvalidLength(b'a' * 5) assertInvalidLength(b'a' * (4 * 87 + 1)) - assertInvalidLength(b'A\tB\nC ??DE') # only 5 valid characters + assertInvalidLength(b'A\tB\nC ??DE', # only 5 valid characters + strict_mode=False) def test_uu(self): MAX_UU = 45 diff --git a/Modules/binascii.c b/Modules/binascii.c index 201e7798bb7a8c..6d3d4e1a6d6daa 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -564,26 +564,24 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, pads++; if (strict_mode) { - if (quad_pos == 0) { - state = get_binascii_state(module); - if (state) { - PyErr_SetString(state->Error, (ascii_data == data->buf) - ? "Leading padding not allowed" - : "Excess padding not allowed"); - } - goto error_end; + if (quad_pos >= 2 && quad_pos + pads <= 4) { + continue; + } + if (ignorechar(BASE64_PAD, ignorechars, ignorecache)) { + continue; } if (quad_pos == 1) { /* Set an error below. */ break; } - if (quad_pos + pads > 4) { - state = get_binascii_state(module); - if (state) { - PyErr_SetString(state->Error, "Excess padding not allowed"); - } - goto error_end; + state = get_binascii_state(module); + if (state) { + PyErr_SetString(state->Error, + (quad_pos == 0 && ascii_data == data->buf) + ? "Leading padding not allowed" + : "Excess padding not allowed"); } + goto error_end; } else { if (quad_pos >= 2 && quad_pos + pads >= 4) { @@ -592,8 +590,8 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, */ goto done; } + continue; } - continue; } unsigned char v = table_a2b_base64[this_ch]; @@ -609,7 +607,9 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, } // Characters that are not '=', in the middle of the padding, are not allowed - if (strict_mode && pads) { + if (pads && strict_mode && + !ignorechar(BASE64_PAD, ignorechars, ignorecache)) + { state = get_binascii_state(module); if (state) { PyErr_SetString(state->Error, (quad_pos + pads == 4) @@ -662,7 +662,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, goto error_end; } - if (quad_pos != 0 && quad_pos + pads != 4) { + if (quad_pos != 0 && quad_pos + pads < 4) { state = get_binascii_state(module); if (state) { PyErr_SetString(state->Error, "Incorrect padding"); From 01a1dd283b2d39af822f38f005233d1f5cadc927 Mon Sep 17 00:00:00 2001 From: Zackery Spytz Date: Thu, 5 Feb 2026 11:50:51 -0800 Subject: [PATCH 5/6] gh-77188: Add support for pickling private methods and nested classes (GH-21480) Co-authored-by: Serhiy Storchaka --- Doc/whatsnew/3.15.rst | 7 +++ Include/internal/pycore_symtable.h | 7 ++- Lib/pickle.py | 11 +++++ Lib/test/picklecommon.py | 45 +++++++++++++++++++ Lib/test/pickletester.py | 27 +++++++++++ ...0-07-14-23-54-18.gh-issue-77188.TyI3_Q.rst | 1 + Modules/_pickle.c | 41 +++++++++++++++++ Objects/classobject.c | 15 +++++++ Python/symtable.c | 21 +++++++++ 9 files changed, 174 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2020-07-14-23-54-18.gh-issue-77188.TyI3_Q.rst diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 05cd7404066167..20250003dca34e 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -704,6 +704,13 @@ os.path (Contributed by Petr Viktorin for :cve:`2025-4517`.) +pickle +------ + +* Add support for pickling private methods and nested classes. + (Contributed by Zackery Spytz and Serhiy Storchaka in :gh:`77188`.) + + resource -------- diff --git a/Include/internal/pycore_symtable.h b/Include/internal/pycore_symtable.h index 9dbfa913219afa..c0164507ea033e 100644 --- a/Include/internal/pycore_symtable.h +++ b/Include/internal/pycore_symtable.h @@ -151,7 +151,12 @@ extern int _PySymtable_LookupOptional(struct symtable *, void *, PySTEntryObject extern void _PySymtable_Free(struct symtable *); extern PyObject *_Py_MaybeMangle(PyObject *privateobj, PySTEntryObject *ste, PyObject *name); -extern PyObject* _Py_Mangle(PyObject *p, PyObject *name); + +// Export for '_pickle' shared extension +PyAPI_FUNC(PyObject *) +_Py_Mangle(PyObject *, PyObject *); +PyAPI_FUNC(int) +_Py_IsPrivateName(PyObject *); /* Flags for def-use information */ diff --git a/Lib/pickle.py b/Lib/pickle.py index 71c12c50f7f035..3e7cf25cb05337 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -1175,6 +1175,17 @@ def save_global(self, obj, name=None): if name is None: name = obj.__name__ + if '.__' in name: + # Mangle names of private attributes. + dotted_path = name.split('.') + for i, subpath in enumerate(dotted_path): + if i and subpath.startswith('__') and not subpath.endswith('__'): + prev = prev.lstrip('_') + if prev: + dotted_path[i] = f"_{prev.lstrip('_')}{subpath}" + prev = subpath + name = '.'.join(dotted_path) + module_name = whichmodule(obj, name) if self.proto >= 2: code = _extension_registry.get((module_name, name), _NoValue) diff --git a/Lib/test/picklecommon.py b/Lib/test/picklecommon.py index 4c19b6c421fc61..b749ee09f564bf 100644 --- a/Lib/test/picklecommon.py +++ b/Lib/test/picklecommon.py @@ -388,3 +388,48 @@ def pie(self): class Subclass(tuple): class Nested(str): pass + +# For test_private_methods +class PrivateMethods: + def __init__(self, value): + self.value = value + + def __private_method(self): + return self.value + + def get_method(self): + return self.__private_method + + @classmethod + def get_unbound_method(cls): + return cls.__private_method + + @classmethod + def __private_classmethod(cls): + return 43 + + @classmethod + def get_classmethod(cls): + return cls.__private_classmethod + + @staticmethod + def __private_staticmethod(): + return 44 + + @classmethod + def get_staticmethod(cls): + return cls.__private_staticmethod + +# For test_private_nested_classes +class PrivateNestedClasses: + @classmethod + def get_nested(cls): + return cls.__Nested + + class __Nested: + @classmethod + def get_nested2(cls): + return cls.__Nested2 + + class __Nested2: + pass diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index d2b8d036bfd9e7..7b1b117d6d3e32 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -4118,6 +4118,33 @@ def test_c_methods(self): with self.subTest(proto=proto, descr=descr): self.assertRaises(TypeError, self.dumps, descr, proto) + def test_private_methods(self): + if self.py_version < (3, 15): + self.skipTest('not supported in Python < 3.15') + obj = PrivateMethods(42) + for proto in protocols: + with self.subTest(proto=proto): + unpickled = self.loads(self.dumps(obj.get_method(), proto)) + self.assertEqual(unpickled(), 42) + unpickled = self.loads(self.dumps(obj.get_unbound_method(), proto)) + self.assertEqual(unpickled(obj), 42) + unpickled = self.loads(self.dumps(obj.get_classmethod(), proto)) + self.assertEqual(unpickled(), 43) + unpickled = self.loads(self.dumps(obj.get_staticmethod(), proto)) + self.assertEqual(unpickled(), 44) + + def test_private_nested_classes(self): + if self.py_version < (3, 15): + self.skipTest('not supported in Python < 3.15') + cls1 = PrivateNestedClasses.get_nested() + cls2 = cls1.get_nested2() + for proto in protocols: + with self.subTest(proto=proto): + unpickled = self.loads(self.dumps(cls1, proto)) + self.assertIs(unpickled, cls1) + unpickled = self.loads(self.dumps(cls2, proto)) + self.assertIs(unpickled, cls2) + def test_object_with_attrs(self): obj = Object() obj.a = 1 diff --git a/Misc/NEWS.d/next/Library/2020-07-14-23-54-18.gh-issue-77188.TyI3_Q.rst b/Misc/NEWS.d/next/Library/2020-07-14-23-54-18.gh-issue-77188.TyI3_Q.rst new file mode 100644 index 00000000000000..3e956409d52a58 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-07-14-23-54-18.gh-issue-77188.TyI3_Q.rst @@ -0,0 +1 @@ +The :mod:`pickle` module now properly handles name-mangled private methods. diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 063547c9a4d020..a897e45f00fab6 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -19,6 +19,7 @@ #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_runtime.h" // _Py_ID() #include "pycore_setobject.h" // _PySet_NextEntry() +#include "pycore_symtable.h" // _Py_Mangle() #include "pycore_sysmodule.h" // _PySys_GetSizeOf() #include "pycore_unicodeobject.h" // _PyUnicode_EqualToASCIIString() @@ -1928,6 +1929,37 @@ get_dotted_path(PyObject *name) return PyUnicode_Split(name, _Py_LATIN1_CHR('.'), -1); } +static PyObject * +join_dotted_path(PyObject *dotted_path) +{ + return PyUnicode_Join(_Py_LATIN1_CHR('.'), dotted_path); +} + +/* Returns -1 (with an exception set) on error, 0 if there were no changes, + * 1 if some names were mangled. */ +static int +mangle_dotted_path(PyObject *dotted_path) +{ + int rc = 0; + Py_ssize_t n = PyList_GET_SIZE(dotted_path); + for (Py_ssize_t i = n-1; i > 0; i--) { + PyObject *subpath = PyList_GET_ITEM(dotted_path, i); + if (_Py_IsPrivateName(subpath)) { + PyObject *parent = PyList_GET_ITEM(dotted_path, i-1); + PyObject *mangled = _Py_Mangle(parent, subpath); + if (mangled == NULL) { + return -1; + } + if (mangled != subpath) { + rc = 1; + } + PyList_SET_ITEM(dotted_path, i, mangled); + Py_DECREF(subpath); + } + } + return rc; +} + static int check_dotted_path(PickleState *st, PyObject *obj, PyObject *dotted_path) { @@ -3809,6 +3841,15 @@ save_global(PickleState *st, PicklerObject *self, PyObject *obj, dotted_path = get_dotted_path(global_name); if (dotted_path == NULL) goto error; + switch (mangle_dotted_path(dotted_path)) { + case -1: + goto error; + case 1: + Py_SETREF(global_name, join_dotted_path(dotted_path)); + if (global_name == NULL) { + goto error; + } + } module_name = whichmodule(st, obj, global_name, dotted_path); if (module_name == NULL) goto error; diff --git a/Objects/classobject.c b/Objects/classobject.c index e71f301f2efd77..4c99c194df53a5 100644 --- a/Objects/classobject.c +++ b/Objects/classobject.c @@ -7,6 +7,7 @@ #include "pycore_object.h" #include "pycore_pyerrors.h" #include "pycore_pystate.h" // _PyThreadState_GET() +#include "pycore_symtable.h" // _Py_Mangle() #include "pycore_weakref.h" // FT_CLEAR_WEAKREFS() @@ -143,6 +144,20 @@ method___reduce___impl(PyMethodObject *self) if (funcname == NULL) { return NULL; } + if (_Py_IsPrivateName(funcname)) { + PyObject *classname = PyType_Check(funcself) + ? PyType_GetName((PyTypeObject *)funcself) + : PyType_GetName(Py_TYPE(funcself)); + if (classname == NULL) { + Py_DECREF(funcname); + return NULL; + } + Py_SETREF(funcname, _Py_Mangle(classname, funcname)); + Py_DECREF(classname); + if (funcname == NULL) { + return NULL; + } + } return Py_BuildValue( "N(ON)", _PyEval_GetBuiltin(&_Py_ID(getattr)), funcself, funcname); } diff --git a/Python/symtable.c b/Python/symtable.c index 29cf9190a4e95b..29ac8f6880c575 100644 --- a/Python/symtable.c +++ b/Python/symtable.c @@ -3183,6 +3183,27 @@ _Py_MaybeMangle(PyObject *privateobj, PySTEntryObject *ste, PyObject *name) return _Py_Mangle(privateobj, name); } +int +_Py_IsPrivateName(PyObject *ident) +{ + if (!PyUnicode_Check(ident)) { + return 0; + } + Py_ssize_t nlen = PyUnicode_GET_LENGTH(ident); + if (nlen < 3 || + PyUnicode_READ_CHAR(ident, 0) != '_' || + PyUnicode_READ_CHAR(ident, 1) != '_') + { + return 0; + } + if (PyUnicode_READ_CHAR(ident, nlen-1) == '_' && + PyUnicode_READ_CHAR(ident, nlen-2) == '_') + { + return 0; /* Don't mangle __whatever__ */ + } + return 1; +} + PyObject * _Py_Mangle(PyObject *privateobj, PyObject *ident) { From 957f9fe162398fceeaa9ddba8b40046b8a03176d Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Thu, 5 Feb 2026 14:37:05 -0600 Subject: [PATCH 6/6] gh-74453: Deprecate os.path.commonprefix (#144436) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/deprecations/pending-removal-in-future.rst | 8 ++++++++ Doc/library/os.path.rst | 8 ++++++++ Lib/genericpath.py | 9 +++++++++ Lib/posixpath.py | 2 +- Lib/test/test_genericpath.py | 9 +++++++-- Lib/test/test_ntpath.py | 5 +++++ Lib/unittest/util.py | 13 +++++++++++-- .../2026-02-02-12-09-38.gh-issue-74453.19h4Z5.rst | 8 ++++++++ 8 files changed, 57 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-02-02-12-09-38.gh-issue-74453.19h4Z5.rst diff --git a/Doc/deprecations/pending-removal-in-future.rst b/Doc/deprecations/pending-removal-in-future.rst index 301867416701ea..a54f98d6866e9f 100644 --- a/Doc/deprecations/pending-removal-in-future.rst +++ b/Doc/deprecations/pending-removal-in-future.rst @@ -78,6 +78,14 @@ although there is currently no date scheduled for their removal. * :mod:`os`: Calling :func:`os.register_at_fork` in a multi-threaded process. +* :mod:`os.path`: :func:`os.path.commonprefix` is deprecated, use + :func:`os.path.commonpath` for path prefixes. The :func:`os.path.commonprefix` + function is being deprecated due to having a misleading name and module. + The function is not safe to use for path prefixes despite being included in a + module about path manipulation, meaning it is easy to accidentally + introduce path traversal vulnerabilities into Python programs by using this + function. + * :class:`!pydoc.ErrorDuringImport`: A tuple value for *exc_info* parameter is deprecated, use an exception instance. diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index bfd59fc5a82049..409fcf4adb754b 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -120,6 +120,14 @@ the :mod:`glob` module.) .. versionchanged:: 3.6 Accepts a :term:`path-like object`. + .. deprecated:: next + Deprecated in favor of :func:`os.path.commonpath` for path prefixes. + The :func:`os.path.commonprefix` function is being deprecated due to + having a misleading name and module. The function is not safe to use for + path prefixes despite being included in a module about path manipulation, + meaning it is easy to accidentally introduce path traversal + vulnerabilities into Python programs by using this function. + .. function:: dirname(path, /) diff --git a/Lib/genericpath.py b/Lib/genericpath.py index 7588fe5e8020f9..71ae19190839ae 100644 --- a/Lib/genericpath.py +++ b/Lib/genericpath.py @@ -105,6 +105,15 @@ def getctime(filename, /): # Return the longest prefix of all list elements. def commonprefix(m, /): "Given a list of pathnames, returns the longest common leading component" + import warnings + warnings.warn('os.path.commonprefix() is deprecated. Use ' + 'os.path.commonpath() for longest path prefix.', + category=DeprecationWarning, + stacklevel=2) + return _commonprefix(m) + +def _commonprefix(m, /): + "Internal implementation of commonprefix()" if not m: return '' # Some people pass in a list of pathname parts to operate in an OS-agnostic # fashion; don't try to translate in that case as that's an abuse of the diff --git a/Lib/posixpath.py b/Lib/posixpath.py index 1ee27de3206c7f..8025b063397a03 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -542,7 +542,7 @@ def relpath(path, start=None): start_list = start_tail.split(sep) if start_tail else [] path_list = path_tail.split(sep) if path_tail else [] # Work out how much of the filepath is shared by start and path. - i = len(commonprefix([start_list, path_list])) + i = len(genericpath._commonprefix([start_list, path_list])) rel_list = [pardir] * (len(start_list)-i) + path_list[i:] if not rel_list: diff --git a/Lib/test/test_genericpath.py b/Lib/test/test_genericpath.py index dfc0817da45fa2..10d3f409d883c5 100644 --- a/Lib/test/test_genericpath.py +++ b/Lib/test/test_genericpath.py @@ -34,6 +34,10 @@ def test_no_argument(self): .format(self.pathmodule.__name__, attr)) def test_commonprefix(self): + with warnings_helper.check_warnings((".*commonpath().*", DeprecationWarning)): + self.do_test_commonprefix() + + def do_test_commonprefix(self): commonprefix = self.pathmodule.commonprefix self.assertEqual( commonprefix([]), @@ -606,8 +610,9 @@ def test_path_isdir(self): self.assertPathEqual(os.path.isdir) def test_path_commonprefix(self): - self.assertEqual(os.path.commonprefix([self.file_path, self.file_name]), - self.file_name) + with warnings_helper.check_warnings((".*commonpath().*", DeprecationWarning)): + self.assertEqual(os.path.commonprefix([self.file_path, self.file_name]), + self.file_name) def test_path_getsize(self): self.assertPathEqual(os.path.getsize) diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index 3a3c60dea1345f..a3728b58335e63 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -10,6 +10,7 @@ from ntpath import ALL_BUT_LAST, ALLOW_MISSING from test import support from test.support import os_helper +from test.support import warnings_helper from test.support.os_helper import FakePath from test import test_genericpath from tempfile import TemporaryFile @@ -298,6 +299,10 @@ def test_isabs(self): tester('ntpath.isabs("\\\\.\\C:")', 1) def test_commonprefix(self): + with warnings_helper.check_warnings((".*commonpath().*", DeprecationWarning)): + self.do_test_commonprefix() + + def do_test_commonprefix(self): tester('ntpath.commonprefix(["/home/swenson/spam", "/home/swen/spam"])', "/home/swen") tester('ntpath.commonprefix(["\\home\\swen\\spam", "\\home\\swen\\eggs"])', diff --git a/Lib/unittest/util.py b/Lib/unittest/util.py index 050eaed0b3f58f..c7e6b941978cd5 100644 --- a/Lib/unittest/util.py +++ b/Lib/unittest/util.py @@ -1,7 +1,6 @@ """Various utility functions.""" from collections import namedtuple, Counter -from os.path import commonprefix __unittest = True @@ -21,13 +20,23 @@ def _shorten(s, prefixlen, suffixlen): s = '%s[%d chars]%s' % (s[:prefixlen], skip, s[len(s) - suffixlen:]) return s +def _common_prefix(m): + if not m: + return "" + s1 = min(m) + s2 = max(m) + for i, c in enumerate(s1): + if c != s2[i]: + return s1[:i] + return s1 + def _common_shorten_repr(*args): args = tuple(map(safe_repr, args)) maxlen = max(map(len, args)) if maxlen <= _MAX_LENGTH: return args - prefix = commonprefix(args) + prefix = _common_prefix(args) prefixlen = len(prefix) common_len = _MAX_LENGTH - \ diff --git a/Misc/NEWS.d/next/Library/2026-02-02-12-09-38.gh-issue-74453.19h4Z5.rst b/Misc/NEWS.d/next/Library/2026-02-02-12-09-38.gh-issue-74453.19h4Z5.rst new file mode 100644 index 00000000000000..8629c834e5b0cd --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-02-02-12-09-38.gh-issue-74453.19h4Z5.rst @@ -0,0 +1,8 @@ +Deprecate :func:`os.path.commonprefix` in favor of +:func:`os.path.commonpath` for path segment prefixes. + +The :func:`os.path.commonprefix` function is being deprecated due to +having a misleading name and module. The function is not safe to use for +path prefixes despite being included in a module about path manipulation, +meaning it is easy to accidentally introduce path traversal +vulnerabilities into Python programs by using this function.