fix: handle ClientDisconnect gracefully instead of returning HTTP 500

skyvanguard · skyvanguard · commit f501dcc2e7c0 · 2026-01-24T09:01:57.000-03:00
When a client disconnects during a request (network timeout, user cancels, load balancer timeout, mobile network interruption), the server was catching the exception with a broad `except Exception` handler, logging it as ERROR with full traceback, and returning HTTP 500. ClientDisconnect is a client-side event, not a server failure. This change catches it explicitly at the request dispatch level and in SSE stream handlers, logging at DEBUG level instead. Changes: - Import ClientDisconnect from starlette.requests - Add except ClientDisconnect handler in handle_request() to catch disconnects across all HTTP methods (POST, GET, DELETE) - Add handlers in _handle_get_request SSE streams and event replay to prevent ERROR logging on client disconnect - Add regression tests verifying no ERROR logs are produced and server remains healthy after client disconnection Github-Issue: #1648 Reported-by: FanisPapakonstantinou
diff --git a/src/mcp/server/streamable_http.py b/src/mcp/server/streamable_http.py
@@ -20,7 +20,7 @@
 from anyio.streams.memory import MemoryObjectReceiveStream, MemoryObjectSendStream
 from pydantic import ValidationError
 from sse_starlette import EventSourceResponse
-from starlette.requests import Request
+from starlette.requests import ClientDisconnect, Request
 from starlette.responses import Response
 from starlette.types import Receive, Scope, Send
 
@@ -379,14 +379,17 @@ async def handle_request(self, scope: Scope, receive: Receive, send: Send) -> No
             await response(scope, receive, send)
             return
 
-        if request.method == "POST":
-            await self._handle_post_request(scope, request, receive, send)
-        elif request.method == "GET":  # pragma: no cover
-            await self._handle_get_request(request, send)
-        elif request.method == "DELETE":  # pragma: no cover
-            await self._handle_delete_request(request, send)
-        else:  # pragma: no cover
-            await self._handle_unsupported_request(request, send)
+        try:
+            if request.method == "POST":
+                await self._handle_post_request(scope, request, receive, send)
+            elif request.method == "GET":  # pragma: no cover
+                await self._handle_get_request(request, send)
+            elif request.method == "DELETE":  # pragma: no cover
+                await self._handle_delete_request(request, send)
+            else:  # pragma: no cover
+                await self._handle_unsupported_request(request, send)
+        except ClientDisconnect:
+            logger.debug(f"Client disconnected during {request.method} request")
 
     def _check_accept_headers(self, request: Request) -> tuple[bool, bool]:
         """Check if the request accepts the required media types."""
@@ -704,6 +707,8 @@ async def standalone_sse_writer():
                         # Send the message via SSE
                         event_data = self._create_event_data(event_message)
                         await sse_stream_writer.send(event_data)
+            except ClientDisconnect:
+                logger.debug("Client disconnected from standalone SSE stream")
             except Exception:
                 logger.exception("Error in standalone SSE writer")
             finally:
@@ -720,6 +725,11 @@ async def standalone_sse_writer():
         try:
             # This will send headers immediately and establish the SSE connection
             await response(request.scope, request.receive, send)
+        except ClientDisconnect:
+            logger.debug("Client disconnected from GET SSE stream")
+            await sse_stream_writer.aclose()
+            await sse_stream_reader.aclose()
+            await self._clean_up_memory_streams(GET_STREAM_KEY)
         except Exception:
             logger.exception("Error in standalone SSE response")
             await sse_stream_writer.aclose()
@@ -910,6 +920,8 @@ async def send_event(event_message: EventMessage) -> None:
                 except anyio.ClosedResourceError:
                     # Expected when close_sse_stream() is called
                     logger.debug("Replay SSE stream closed by close_sse_stream()")
+                except ClientDisconnect:
+                    logger.debug("Client disconnected during event replay")
                 except Exception:
                     logger.exception("Error in replay sender")
 
@@ -922,12 +934,16 @@ async def send_event(event_message: EventMessage) -> None:
 
             try:
                 await response(request.scope, request.receive, send)
+            except ClientDisconnect:
+                logger.debug("Client disconnected during replay response")
             except Exception:
                 logger.exception("Error in replay response")
             finally:
                 await sse_stream_writer.aclose()
                 await sse_stream_reader.aclose()
 
+        except ClientDisconnect:
+            logger.debug("Client disconnected during event replay request")
         except Exception:
             logger.exception("Error replaying events")
             response = self._create_error_response(
diff --git a/tests/issues/test_1648_client_disconnect_500.py b/tests/issues/test_1648_client_disconnect_500.py
@@ -0,0 +1,204 @@
+"""Test for issue #1648 - ClientDisconnect returns HTTP 500.
+
+When a client disconnects during a request (network timeout, user cancels, load
+balancer timeout, mobile network interruption), the server should handle this
+gracefully instead of returning HTTP 500 and logging as ERROR.
+
+ClientDisconnect is a client-side event, not a server failure.
+"""
+
+import logging
+import threading
+from collections.abc import AsyncGenerator
+from contextlib import asynccontextmanager
+
+import anyio
+import httpx
+import pytest
+from starlette.applications import Starlette
+from starlette.routing import Mount
+
+from mcp.server import Server
+from mcp.server.streamable_http_manager import StreamableHTTPSessionManager
+from mcp.types import Tool
+
+SERVER_NAME = "test_client_disconnect_server"
+
+
+class SlowServer(Server):
+    """Server with a slow tool to allow time for client disconnect."""
+
+    def __init__(self):
+        super().__init__(SERVER_NAME)
+
+        @self.list_tools()
+        async def handle_list_tools() -> list[Tool]:
+            return [
+                Tool(
+                    name="slow_tool",
+                    description="A tool that takes time to respond",
+                    input_schema={"type": "object", "properties": {}},
+                ),
+            ]
+
+        @self.call_tool()
+        async def handle_call_tool(name: str, arguments: dict) -> list:
+            if name == "slow_tool":
+                await anyio.sleep(10)
+                return [{"type": "text", "text": "done"}]
+            raise ValueError(f"Unknown tool: {name}")
+
+
+def create_app() -> Starlette:
+    """Create a Starlette application for testing."""
+    server = SlowServer()
+    session_manager = StreamableHTTPSessionManager(
+        app=server,
+        json_response=True,
+        stateless=True,
+    )
+
+    @asynccontextmanager
+    async def lifespan(app: Starlette) -> AsyncGenerator[None, None]:
+        async with session_manager.run():
+            yield
+
+    routes = [Mount("/", app=session_manager.handle_request)]
+    return Starlette(routes=routes, lifespan=lifespan)
+
+
+class ServerThread(threading.Thread):
+    """Thread that runs the ASGI application lifespan."""
+
+    def __init__(self, app: Starlette):
+        super().__init__(daemon=True)
+        self.app = app
+        self._stop_event = threading.Event()
+
+    def run(self) -> None:
+        async def run_lifespan():
+            lifespan_context = getattr(self.app.router, "lifespan_context", None)
+            assert lifespan_context is not None
+            async with lifespan_context(self.app):
+                while not self._stop_event.is_set():
+                    await anyio.sleep(0.1)
+
+        anyio.run(run_lifespan)
+
+    def stop(self) -> None:
+        self._stop_event.set()
+
+
+@pytest.mark.anyio
+async def test_client_disconnect_does_not_produce_500(caplog: pytest.LogCaptureFixture):
+    """Client disconnect should not produce HTTP 500 or ERROR log entries.
+
+    Regression test for issue #1648: when a client disconnects mid-request,
+    the server was catching the exception with a broad `except Exception` handler,
+    logging it as ERROR, and returning HTTP 500.
+    """
+    app = create_app()
+    server_thread = ServerThread(app)
+    server_thread.start()
+
+    try:
+        await anyio.sleep(0.2)
+
+        with caplog.at_level(logging.DEBUG):
+            async with httpx.AsyncClient(
+                transport=httpx.ASGITransport(app=app),
+                base_url="http://testserver",
+                timeout=1.0,
+            ) as client:
+                # Send a tool call that will take a long time, client will timeout
+                try:
+                    await client.post(
+                        "/",
+                        json={
+                            "jsonrpc": "2.0",
+                            "method": "tools/call",
+                            "id": "call-1",
+                            "params": {"name": "slow_tool", "arguments": {}},
+                        },
+                        headers={
+                            "Accept": "application/json, text/event-stream",
+                            "Content-Type": "application/json",
+                        },
+                    )
+                except (httpx.ReadTimeout, httpx.ReadError):
+                    pass  # Expected - client timed out
+
+        # Wait briefly for any async error logging to complete
+        await anyio.sleep(0.1)
+
+        # Verify no ERROR-level log entries about handling POST requests
+        error_records = [r for r in caplog.records if r.levelno >= logging.ERROR and "POST" in r.getMessage()]
+        assert not error_records, (
+            f"Server logged ERROR for client disconnect: {[r.getMessage() for r in error_records]}"
+        )
+    finally:
+        server_thread.stop()
+        server_thread.join(timeout=2)
+
+
+@pytest.mark.anyio
+async def test_server_healthy_after_client_disconnect():
+    """Server should remain healthy and accept new requests after a client disconnects."""
+    app = create_app()
+    server_thread = ServerThread(app)
+    server_thread.start()
+
+    try:
+        await anyio.sleep(0.2)
+
+        async with httpx.AsyncClient(
+            transport=httpx.ASGITransport(app=app),
+            base_url="http://testserver",
+            timeout=1.0,
+        ) as client:
+            # First request - will timeout (simulating client disconnect)
+            try:
+                await client.post(
+                    "/",
+                    json={
+                        "jsonrpc": "2.0",
+                        "method": "tools/call",
+                        "id": "call-timeout",
+                        "params": {"name": "slow_tool", "arguments": {}},
+                    },
+                    headers={
+                        "Accept": "application/json, text/event-stream",
+                        "Content-Type": "application/json",
+                    },
+                )
+            except (httpx.ReadTimeout, httpx.ReadError):
+                pass  # Expected - client timed out
+
+        # Create a new client for the second request
+        async with httpx.AsyncClient(
+            transport=httpx.ASGITransport(app=app),
+            base_url="http://testserver",
+            timeout=5.0,
+        ) as client:
+            # Second request - should succeed (server still healthy)
+            response = await client.post(
+                "/",
+                json={
+                    "jsonrpc": "2.0",
+                    "method": "initialize",
+                    "id": "init-after-disconnect",
+                    "params": {
+                        "clientInfo": {"name": "test-client", "version": "1.0"},
+                        "protocolVersion": "2025-03-26",
+                        "capabilities": {},
+                    },
+                },
+                headers={
+                    "Accept": "application/json, text/event-stream",
+                    "Content-Type": "application/json",
+                },
+            )
+            assert response.status_code == 200
+    finally:
+        server_thread.stop()
+        server_thread.join(timeout=2)