We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent c3657b3 commit d80c3c4Copy full SHA for d80c3c4
normalizer/normalize.py
@@ -49,7 +49,7 @@ def normalize(
49
50
# apply unicode replacements
51
text = const.UNICODE_REPLACEMENTS_REGEX.sub(
52
- lambda match: const.UNICODE_REPLACEMENTS[match.group(0)],
+ lambda match: const.UNICODE_REPLACEMENTS.get(match.group(0), f"{match.group(1)}\u09cc"),
53
text
54
)
55
tests/test.py
@@ -65,12 +65,16 @@ def test_unicode_replacements():
65
[
66
"\u00a0",
67
" "
68
+ ],
69
+ [
70
+ 'েশৗ',
71
+ 'শৌ'
72
]
73
74
75
for d in data:
76
normalized_text = normalize(d[0])
- assert normalized_text == d[1]
77
+ assert normalized_text == d[1], f"{normalized_text} != {d[1]}"
78
79
80
0 commit comments