Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions Lib/test/test_unicodedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,12 @@ def test_decomposition(self):
# New in 17.0.0
self.assertEqual(self.db.decomposition('\uA7F1'), '' if self.old else '<super> 0053')

# Hangul characters
self.assertEqual(self.db.decomposition('\uAC00'), '1100 1161')
self.assertEqual(self.db.decomposition('\uD4DB'), '1111 1171 11B6')
self.assertEqual(self.db.decomposition('\uC2F8'), '110A 1161')
self.assertEqual(self.db.decomposition('\uD7A3'), '1112 1175 11C2')

self.assertRaises(TypeError, self.db.decomposition)
self.assertRaises(TypeError, self.db.decomposition, 'xx')

Expand Down Expand Up @@ -687,9 +693,9 @@ class UnicodeFunctionsTest(unittest.TestCase, BaseUnicodeFunctionsTest):

# Update this if the database changes. Make sure to do a full rebuild
# (e.g. 'make distclean && make') to get the correct checksum.
expectedchecksum = ('668dbbea1136e69d4f00677a5988b23bc78aefc6'
expectedchecksum = ('00b13fa975a60b1d3f490f1fc8c126ab24990c75'
if quicktest else
'b869af769bd8fe352c04622ab90533dc54df5cf3')
'ebfc9dd281c2226998fd435744dd2e9321899beb')

@requires_resource('network')
def test_all_names(self):
Expand Down Expand Up @@ -977,9 +983,9 @@ def graphemes(*args):
class Unicode_3_2_0_FunctionsTest(unittest.TestCase, BaseUnicodeFunctionsTest):
db = unicodedata.ucd_3_2_0
old = True
expectedchecksum = ('2164a66700e03cba9c9f5ed9e9a8d594d2da136a'
expectedchecksum = ('cb5bbbd1f55b67371e18222b90a8e21c87f16b72'
if quicktest else
'a8276cec9b6991779c5bdaa46c1ae7cc50bc2403')
'74936dffe949d99203a47e6a66565b2fc337bae7')


class UnicodeMiscTest(unittest.TestCase):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix :func:`unicodedata.decomposition` for Hangul characters.
44 changes: 33 additions & 11 deletions Modules/unicodedata.c
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,17 @@ unicodedata_UCD_east_asian_width_impl(PyObject *self, int chr)
return PyUnicode_FromString(_PyUnicode_EastAsianWidthNames[index]);
}

// For Hangul decomposition
#define SBase 0xAC00
#define LBase 0x1100
#define VBase 0x1161
#define TBase 0x11A7
#define LCount 19
#define VCount 21
#define TCount 28
#define NCount (VCount*TCount)
#define SCount (LCount*NCount)

/*[clinic input]
@permit_long_summary
unicodedata.UCD.decomposition
Expand Down Expand Up @@ -460,6 +471,25 @@ unicodedata_UCD_decomposition_impl(PyObject *self, int chr)
return Py_GetConstant(Py_CONSTANT_EMPTY_STR); /* unassigned */
}

// Hangul Decomposition.
// See section 3.12.2, "Hangul Syllable Decomposition"
// https://www.unicode.org/versions/latest/core-spec/chapter-3/#G56669
if (SBase <= code && code < (SBase + SCount)) {
int SIndex = code - SBase;
int L = LBase + SIndex / NCount;
int V = VBase + (SIndex % NCount) / TCount;
int T = TBase + SIndex % TCount;
if (T != TBase) {
PyOS_snprintf(decomp, sizeof(decomp),
"%04X %04X %04X", L, V, T);
}
else {
PyOS_snprintf(decomp, sizeof(decomp),
"%04X %04X", L, V);
}
return PyUnicode_FromString(decomp);
}

if (code < 0 || code >= 0x110000)
index = 0;
else {
Expand Down Expand Up @@ -522,16 +552,6 @@ get_decomp_record(PyObject *self, Py_UCS4 code,
(*index)++;
}

#define SBase 0xAC00
#define LBase 0x1100
#define VBase 0x1161
#define TBase 0x11A7
#define LCount 19
#define VCount 21
#define TCount 28
#define NCount (VCount*TCount)
#define SCount (LCount*NCount)

static PyObject*
nfd_nfkd(PyObject *self, PyObject *input, int k)
{
Expand Down Expand Up @@ -585,7 +605,9 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
}
output = new_output;
}
/* Hangul Decomposition. */
// Hangul Decomposition.
// See section 3.12.2, "Hangul Syllable Decomposition"
// https://www.unicode.org/versions/latest/core-spec/chapter-3/#G56669
if (SBase <= code && code < (SBase+SCount)) {
int SIndex = code - SBase;
int L = LBase + SIndex / NCount;
Expand Down
Loading