Skip to content

Commit e43dce4

Browse files
author
Konstantinos Bairaktaris
authored
Merge pull request #264 from transifex/surrogates
KEYVALUEJSON: properly unescape surrogates
2 parents 36ec44b + 2df1dae commit e43dce4

File tree

1 file changed

+8
-0
lines changed

1 file changed

+8
-0
lines changed

openformats/utils/json.py

+8
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,14 @@ def _unescape_generator(string):
395395
yield u'u'
396396
ptr += 2
397397
continue
398+
# Surrogates: https://unicode.org/faq/utf_bom.html#utf16-2
399+
if 0xd800 <= ord(unescaped) <= 0xdfff:
400+
unicode_escaped = string[ptr:ptr+12]
401+
escaped = json.loads('"' + unicode_escaped + '"')
402+
if len(escaped) == 1:
403+
yield escaped
404+
ptr += 12
405+
continue
398406
yield unescaped
399407
ptr += 6
400408

0 commit comments

Comments
 (0)