@@ -388,6 +388,17 @@ unicodedata_UCD_east_asian_width_impl(PyObject *self, int chr)
388388 return PyUnicode_FromString (_PyUnicode_EastAsianWidthNames [index ]);
389389}
390390
391+ // For Hangul decomposition
392+ #define SBase 0xAC00
393+ #define LBase 0x1100
394+ #define VBase 0x1161
395+ #define TBase 0x11A7
396+ #define LCount 19
397+ #define VCount 21
398+ #define TCount 28
399+ #define NCount (VCount*TCount)
400+ #define SCount (LCount*NCount)
401+
391402/*[clinic input]
392403unicodedata.UCD.decomposition
393404
@@ -418,6 +429,25 @@ unicodedata_UCD_decomposition_impl(PyObject *self, int chr)
418429 return Py_GetConstant (Py_CONSTANT_EMPTY_STR ); /* unassigned */
419430 }
420431
432+ // Hangul Decomposition.
433+ // See section 3.12.2, "Hangul Syllable Decomposition"
434+ // https://www.unicode.org/versions/latest/core-spec/chapter-3/#G56669
435+ if (SBase <= code && code < (SBase + SCount )) {
436+ int SIndex = code - SBase ;
437+ int L = LBase + SIndex / NCount ;
438+ int V = VBase + (SIndex % NCount ) / TCount ;
439+ int T = TBase + SIndex % TCount ;
440+ if (T != TBase ) {
441+ PyOS_snprintf (decomp , sizeof (decomp ),
442+ "%04X %04X %04X" , L , V , T );
443+ }
444+ else {
445+ PyOS_snprintf (decomp , sizeof (decomp ),
446+ "%04X %04X" , L , V );
447+ }
448+ return PyUnicode_FromString (decomp );
449+ }
450+
421451 if (code < 0 || code >= 0x110000 )
422452 index = 0 ;
423453 else {
@@ -480,16 +510,6 @@ get_decomp_record(PyObject *self, Py_UCS4 code,
480510 (* index )++ ;
481511}
482512
483- #define SBase 0xAC00
484- #define LBase 0x1100
485- #define VBase 0x1161
486- #define TBase 0x11A7
487- #define LCount 19
488- #define VCount 21
489- #define TCount 28
490- #define NCount (VCount*TCount)
491- #define SCount (LCount*NCount)
492-
493513static PyObject *
494514nfd_nfkd (PyObject * self , PyObject * input , int k )
495515{
@@ -543,7 +563,9 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
543563 }
544564 output = new_output ;
545565 }
546- /* Hangul Decomposition. */
566+ // Hangul Decomposition.
567+ // See section 3.12.2, "Hangul Syllable Decomposition"
568+ // https://www.unicode.org/versions/latest/core-spec/chapter-3/#G56669
547569 if (SBase <= code && code < (SBase + SCount )) {
548570 int SIndex = code - SBase ;
549571 int L = LBase + SIndex / NCount ;
0 commit comments