From 25e13c4c606d1c5270230f06dc9c9d5dea3ecc18 Mon Sep 17 00:00:00 2001 From: James Gerity Date: Wed, 26 Jul 2023 01:19:16 -0400 Subject: [PATCH 1/5] Add test for case-sensitivity in unicode escape literals --- Lib/test/test_ucn.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/test/test_ucn.py b/Lib/test/test_ucn.py index cbfd5af2bb751c..aa3841ee18dd2d 100644 --- a/Lib/test/test_ucn.py +++ b/Lib/test/test_ucn.py @@ -236,6 +236,10 @@ def test_issue16335(self, size): x.decode, 'unicode-escape' ) + def test_issue80667(self): + self.assertEqual(str(b'\\N{cjK UniFIeD idEogRAph-732B}', "unicode-escape"), '猫') + self.assertEqual(str(b'\\N{haNGul SYllABle WAe}', "unicode-escape"), '왜') + if __name__ == "__main__": unittest.main() From abd39e40faacdbd9ab6c7888b0fa6393769bcdc0 Mon Sep 17 00:00:00 2001 From: James Gerity Date: Wed, 26 Jul 2023 11:09:14 -0400 Subject: [PATCH 2/5] Use case-insensitive comparison when checking \N escapes --- .../2023-07-26-00-03-00.gh-issue-80667.N7Dh8B.rst | 2 ++ Modules/unicodedata.c | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-07-26-00-03-00.gh-issue-80667.N7Dh8B.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-07-26-00-03-00.gh-issue-80667.N7Dh8B.rst b/Misc/NEWS.d/next/Core and Builtins/2023-07-26-00-03-00.gh-issue-80667.N7Dh8B.rst new file mode 100644 index 00000000000000..db87a5ed9c7fc2 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-07-26-00-03-00.gh-issue-80667.N7Dh8B.rst @@ -0,0 +1,2 @@ +Literals using the ``\N{name}`` escape syntax can now construct CJK +ideographs and Hangul syllables using case-insensitive names. diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c index c1e22f3868931f..3edecc5c4ac386 100644 --- a/Modules/unicodedata.c +++ b/Modules/unicodedata.c @@ -1181,7 +1181,7 @@ find_syllable(const char *str, int *len, int *pos, int count, int column) len1 = Py_SAFE_DOWNCAST(strlen(s), size_t, int); if (len1 <= *len) continue; - if (strncmp(str, s, len1) == 0) { + if (PyOS_strnicmp(str, s, len1) == 0) { *len = len1; *pos = i; } @@ -1219,7 +1219,7 @@ _getcode(PyObject* self, unsigned int i, incr; /* Check for hangul syllables. */ - if (strncmp(name, "HANGUL SYLLABLE ", 16) == 0) { + if (PyOS_strnicmp(name, "HANGUL SYLLABLE ", 16) == 0) { int len, L = -1, V = -1, T = -1; const char *pos = name + 16; find_syllable(pos, &len, &L, LCount, 0); @@ -1237,7 +1237,7 @@ _getcode(PyObject* self, } /* Check for unified ideographs. */ - if (strncmp(name, "CJK UNIFIED IDEOGRAPH-", 22) == 0) { + if (PyOS_strnicmp(name, "CJK UNIFIED IDEOGRAPH-", 22) == 0) { /* Four or five hexdigits must follow. */ v = 0; name += 22; From 82bbba3fd35e9c46120212299d4efa588bb732de Mon Sep 17 00:00:00 2001 From: James Gerity Date: Wed, 26 Jul 2023 13:42:25 -0400 Subject: [PATCH 3/5] Handle lower-case hex digits in CJK ideograph escapes --- Lib/test/test_ucn.py | 1 + Modules/unicodedata.c | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_ucn.py b/Lib/test/test_ucn.py index aa3841ee18dd2d..00d80676b13a60 100644 --- a/Lib/test/test_ucn.py +++ b/Lib/test/test_ucn.py @@ -238,6 +238,7 @@ def test_issue16335(self, size): def test_issue80667(self): self.assertEqual(str(b'\\N{cjK UniFIeD idEogRAph-732B}', "unicode-escape"), '猫') + self.assertEqual(str(b'\\N{cjK UniFIeD idEogRAph-732b}', "unicode-escape"), '猫') self.assertEqual(str(b'\\N{haNGul SYllABle WAe}', "unicode-escape"), '왜') diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c index 3edecc5c4ac386..83f038cab3a06e 100644 --- a/Modules/unicodedata.c +++ b/Modules/unicodedata.c @@ -1246,10 +1246,11 @@ _getcode(PyObject* self, return 0; while (namelen--) { v *= 16; - if (*name >= '0' && *name <= '9') - v += *name - '0'; - else if (*name >= 'A' && *name <= 'F') - v += *name - 'A' + 10; + Py_UCS1 c = Py_TOUPPER(*name); + if (c >= '0' && c <= '9') + v += c - '0'; + else if (c >= 'A' && c <= 'F') + v += c - 'A' + 10; else return 0; name++; From ab0ee8ef1db3ad386a0305dd2e9c6c0bfac0fca6 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 12 Feb 2026 17:28:17 +0200 Subject: [PATCH 4/5] Move the NEWS file to a patch without spaces. --- .../2023-07-26-00-03-00.gh-issue-80667.N7Dh8B.rst | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Misc/NEWS.d/next/{Core and Builtins => Core_and_Builtins}/2023-07-26-00-03-00.gh-issue-80667.N7Dh8B.rst (100%) diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-07-26-00-03-00.gh-issue-80667.N7Dh8B.rst b/Misc/NEWS.d/next/Core_and_Builtins/2023-07-26-00-03-00.gh-issue-80667.N7Dh8B.rst similarity index 100% rename from Misc/NEWS.d/next/Core and Builtins/2023-07-26-00-03-00.gh-issue-80667.N7Dh8B.rst rename to Misc/NEWS.d/next/Core_and_Builtins/2023-07-26-00-03-00.gh-issue-80667.N7Dh8B.rst From 13571e197c3eb0cde680a5fe42e3034e24b6f8b1 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 12 Feb 2026 18:12:02 +0200 Subject: [PATCH 5/5] Reorganize tests. --- Lib/test/test_ucn.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_ucn.py b/Lib/test/test_ucn.py index 9db1f2f9f7bfc6..0c641a455c0747 100644 --- a/Lib/test/test_ucn.py +++ b/Lib/test/test_ucn.py @@ -88,6 +88,9 @@ def test_hangul_syllables(self): self.checkletter("HANGUL SYLLABLE HWEOK", "\ud6f8") self.checkletter("HANGUL SYLLABLE HIH", "\ud7a3") + self.checkletter("haNGul SYllABle WAe", '\uc65c') + self.checkletter("HAngUL syLLabLE waE", '\uc65c') + self.assertRaises(ValueError, unicodedata.name, "\ud7a4") def test_cjk_unified_ideographs(self): @@ -103,6 +106,11 @@ def test_cjk_unified_ideographs(self): self.checkletter("CJK UNIFIED IDEOGRAPH-2B81D", "\U0002B81D") self.checkletter("CJK UNIFIED IDEOGRAPH-3134A", "\U0003134A") + self.checkletter("cjK UniFIeD idEogRAph-3aBc", "\u3abc") + self.checkletter("CJk uNIfiEd IDeOGraPH-3AbC", "\u3abc") + self.checkletter("cjK UniFIeD idEogRAph-2aBcD", "\U0002abcd") + self.checkletter("CJk uNIfiEd IDeOGraPH-2AbCd", "\U0002abcd") + def test_bmp_characters(self): for code in range(0x10000): char = chr(code) @@ -240,11 +248,6 @@ def test_issue16335(self, size): x.decode, 'unicode-escape' ) - def test_issue80667(self): - self.assertEqual(str(b'\\N{cjK UniFIeD idEogRAph-732B}', "unicode-escape"), '猫') - self.assertEqual(str(b'\\N{cjK UniFIeD idEogRAph-732b}', "unicode-escape"), '猫') - self.assertEqual(str(b'\\N{haNGul SYllABle WAe}', "unicode-escape"), '왜') - if __name__ == "__main__": unittest.main()