Bugfix

abhik1505040 · abhik1505040 · commit d80c3c484e1b · 2022-05-07T17:41:04.000+06:00
diff --git a/normalizer/normalize.py b/normalizer/normalize.py
@@ -49,7 +49,7 @@ def normalize(
 
     # apply unicode replacements    
     text = const.UNICODE_REPLACEMENTS_REGEX.sub(
-        lambda match: const.UNICODE_REPLACEMENTS[match.group(0)], 
+        lambda match: const.UNICODE_REPLACEMENTS.get(match.group(0), f"{match.group(1)}\u09cc"), 
         text
     )
 
diff --git a/tests/test.py b/tests/test.py
@@ -65,12 +65,16 @@ def test_unicode_replacements():
         [
             "\u00a0",
             " "
+        ],
+        [
+            'েশৗ',
+            'শৌ'
         ]
     ]
     
     for d in data:
         normalized_text = normalize(d[0])
-        assert normalized_text == d[1]
+        assert normalized_text == d[1], f"{normalized_text} != {d[1]}" 
 
 
 

Original file line number	Diff line number	Diff line change
`@@ -49,7 +49,7 @@ def normalize(`
`49`	`49`
`50`	`50`	`# apply unicode replacements`
`51`	`51`	`text = const.UNICODE_REPLACEMENTS_REGEX.sub(`
`52`		`- lambda match: const.UNICODE_REPLACEMENTS[match.group(0)],`
	`52`	`+ lambda match: const.UNICODE_REPLACEMENTS.get(match.group(0), f"{match.group(1)}\u09cc"),`
`53`	`53`	`text`
`54`	`54`	`)`
`55`	`55`
Original file line number	Diff line number	Diff line change
`@@ -65,12 +65,16 @@ def test_unicode_replacements():`
`65`	`65`	`[`
`66`	`66`	`"\u00a0",`
`67`	`67`	`" "`
	`68`	`+ ],`
	`69`	`+ [`
	`70`	`+ 'েশৗ',`
	`71`	`+ 'শৌ'`
`68`	`72`	`]`
`69`	`73`	`]`
`70`	`74`
`71`	`75`	`for d in data:`
`72`	`76`	`normalized_text = normalize(d[0])`
`73`		`- assert normalized_text == d[1]`
	`77`	`+ assert normalized_text == d[1], f"{normalized_text} != {d[1]}"`
`74`	`78`
`75`	`79`
`76`	`80`