Skip to content

Commit dba0a4c

Browse files
authored
Fix the wrong Content-Length in python-server.py for non-ascii characters. (#24480)
Resolves: #24479 `python-server.py` currently uses `sys.stdin.read` for reading the input, and it receives the length in `str` (utf-8 string). ref: https://docs.python.org/3/library/sys.html On the other "Content-Length" is the size in **bytes**, therefore we should not pass `content_length` to `sys.stdin.read`. For example, `print("こんにちは世界")`'s length is 16 in str, but 30 in bytes. ``` >>> len('print("こんにちは世界")') 16 >>> len('print("こんにちは世界")'.encode()) 30 ``` This PR have two changes. 1. Replace `sys.stdin.read(content_length)` with `sys.stdin.buffer.read(content_length).decode()`. 2. Make `_send_message` calculate "Content-Length" from bytes, not str. By these changes, original issue #24479 can be resolved. ![image](https://github.com/user-attachments/assets/20e72a26-d4ad-4e16-9c5b-ed41055c95d9)
1 parent 42b63b9 commit dba0a4c

File tree

1 file changed

+9
-5
lines changed

1 file changed

+9
-5
lines changed

python_files/python_server.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414

1515

1616
def _send_message(msg: str):
17-
length_msg = len(msg)
17+
# Content-Length is the data size in bytes.
18+
length_msg = len(msg.encode())
1819
STDOUT.buffer.write(f"Content-Length: {length_msg}\r\n\r\n{msg}".encode())
1920
STDOUT.buffer.flush()
2021

@@ -55,10 +56,11 @@ def custom_input(prompt=""):
5556
try:
5657
send_request({"prompt": prompt})
5758
headers = get_headers()
59+
# Content-Length is the data size in bytes.
5860
content_length = int(headers.get("Content-Length", 0))
5961

6062
if content_length:
61-
message_text = STDIN.read(content_length)
63+
message_text = STDIN.buffer.read(content_length).decode()
6264
message_json = json.loads(message_text)
6365
return message_json["result"]["userInput"]
6466
except Exception:
@@ -74,10 +76,11 @@ def handle_response(request_id):
7476
while not STDIN.closed:
7577
try:
7678
headers = get_headers()
79+
# Content-Length is the data size in bytes.
7780
content_length = int(headers.get("Content-Length", 0))
7881

7982
if content_length:
80-
message_text = STDIN.read(content_length)
83+
message_text = STDIN.buffer.read(content_length).decode()
8184
message_json = json.loads(message_text)
8285
our_user_input = message_json["result"]["userInput"]
8386
if message_json["id"] == request_id:
@@ -160,7 +163,7 @@ def get_value(self) -> str:
160163
def get_headers():
161164
headers = {}
162165
while True:
163-
line = STDIN.readline().strip()
166+
line = STDIN.buffer.readline().decode().strip()
164167
if not line:
165168
break
166169
name, value = line.split(":", 1)
@@ -172,10 +175,11 @@ def get_headers():
172175
while not STDIN.closed:
173176
try:
174177
headers = get_headers()
178+
# Content-Length is the data size in bytes.
175179
content_length = int(headers.get("Content-Length", 0))
176180

177181
if content_length:
178-
request_text = STDIN.read(content_length)
182+
request_text = STDIN.buffer.read(content_length).decode()
179183
request_json = json.loads(request_text)
180184
if request_json["method"] == "execute":
181185
execute(request_json, USER_GLOBALS)

0 commit comments

Comments
 (0)