@@ -386,6 +386,17 @@ unicodedata_UCD_east_asian_width_impl(PyObject *self, int chr)
386386 return PyUnicode_FromString (_PyUnicode_EastAsianWidthNames [index ]);
387387}
388388
389+ // For Hangul decomposition
390+ #define SBase 0xAC00
391+ #define LBase 0x1100
392+ #define VBase 0x1161
393+ #define TBase 0x11A7
394+ #define LCount 19
395+ #define VCount 21
396+ #define TCount 28
397+ #define NCount (VCount*TCount)
398+ #define SCount (LCount*NCount)
399+
389400/*[clinic input]
390401unicodedata.UCD.decomposition
391402
@@ -416,6 +427,25 @@ unicodedata_UCD_decomposition_impl(PyObject *self, int chr)
416427 return PyUnicode_FromString ("" ); /* unassigned */
417428 }
418429
430+ // Hangul Decomposition.
431+ // See section 3.12.2, "Hangul Syllable Decomposition"
432+ // https://www.unicode.org/versions/latest/core-spec/chapter-3/#G56669
433+ if (SBase <= code && code < (SBase + SCount )) {
434+ int SIndex = code - SBase ;
435+ int L = LBase + SIndex / NCount ;
436+ int V = VBase + (SIndex % NCount ) / TCount ;
437+ int T = TBase + SIndex % TCount ;
438+ if (T != TBase ) {
439+ PyOS_snprintf (decomp , sizeof (decomp ),
440+ "%04X %04X %04X" , L , V , T );
441+ }
442+ else {
443+ PyOS_snprintf (decomp , sizeof (decomp ),
444+ "%04X %04X" , L , V );
445+ }
446+ return PyUnicode_FromString (decomp );
447+ }
448+
419449 if (code < 0 || code >= 0x110000 )
420450 index = 0 ;
421451 else {
@@ -478,16 +508,6 @@ get_decomp_record(PyObject *self, Py_UCS4 code,
478508 (* index )++ ;
479509}
480510
481- #define SBase 0xAC00
482- #define LBase 0x1100
483- #define VBase 0x1161
484- #define TBase 0x11A7
485- #define LCount 19
486- #define VCount 21
487- #define TCount 28
488- #define NCount (VCount*TCount)
489- #define SCount (LCount*NCount)
490-
491511static PyObject *
492512nfd_nfkd (PyObject * self , PyObject * input , int k )
493513{
@@ -541,7 +561,9 @@ nfd_nfkd(PyObject *self, PyObject *input, int k)
541561 }
542562 output = new_output ;
543563 }
544- /* Hangul Decomposition. */
564+ // Hangul Decomposition.
565+ // See section 3.12.2, "Hangul Syllable Decomposition"
566+ // https://www.unicode.org/versions/latest/core-spec/chapter-3/#G56669
545567 if (SBase <= code && code < (SBase + SCount )) {
546568 int SIndex = code - SBase ;
547569 int L = LBase + SIndex / NCount ;
0 commit comments