Skip to content

Commit d80c3c4

Browse files
committed
Bugfix
1 parent c3657b3 commit d80c3c4

File tree

2 files changed

+6
-2
lines changed

2 files changed

+6
-2
lines changed

normalizer/normalize.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def normalize(
4949

5050
# apply unicode replacements
5151
text = const.UNICODE_REPLACEMENTS_REGEX.sub(
52-
lambda match: const.UNICODE_REPLACEMENTS[match.group(0)],
52+
lambda match: const.UNICODE_REPLACEMENTS.get(match.group(0), f"{match.group(1)}\u09cc"),
5353
text
5454
)
5555

tests/test.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,16 @@ def test_unicode_replacements():
6565
[
6666
"\u00a0",
6767
" "
68+
],
69+
[
70+
'েশৗ',
71+
'শৌ'
6872
]
6973
]
7074

7175
for d in data:
7276
normalized_text = normalize(d[0])
73-
assert normalized_text == d[1]
77+
assert normalized_text == d[1], f"{normalized_text} != {d[1]}"
7478

7579

7680

0 commit comments

Comments
 (0)