Skip to content

Rewrite FastAPI instrumentor middleware stack to be failsafe #3664

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 24 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
50120ce
rewrite FastAPIInstrumentor:build_middleware_stack to become failsafe
outergod Jul 30, 2025
8a1a39e
add test cases for FastAPI failsafe handling
outergod Jul 30, 2025
2150e5c
add CHANGELOG entry
outergod Jul 30, 2025
2faf005
remove unused import
outergod Jul 30, 2025
5665a0a
[lint] don't return from failsafe wrapper
outergod Jul 30, 2025
425a7f3
[lint] allow broad exceptions
outergod Jul 30, 2025
6c48703
[lint] more allowing
outergod Jul 30, 2025
55e9c43
record FastAPI hook exceptions in active span
outergod Aug 14, 2025
a7a4949
remove comment
outergod Aug 14, 2025
5632d1b
properly deal with hooks not being set
outergod Aug 14, 2025
0203e4b
add custom FastAPI exception recording
outergod Aug 14, 2025
4c17f00
move failsafe hook handling down to OpenTelemetryMiddleware
outergod Aug 14, 2025
56139a3
shut up pylint
outergod Aug 14, 2025
843d8c7
optimize failsafe to check for `None` only once
outergod Aug 18, 2025
c2c326e
remove confusing comment and simplify wrapper logic
outergod Aug 18, 2025
3a19941
add clarifying comment
outergod Aug 18, 2025
4e29a76
test proper exception / status code recording
outergod Aug 18, 2025
1902548
add HTTP status code check
outergod Aug 18, 2025
fce5164
test HTTP status on the exception recording span
outergod Aug 18, 2025
3cc31d0
improve test by removing TypeError
outergod Aug 18, 2025
217763b
rectify comment/explanation on inner middleware for exception handling
outergod Aug 18, 2025
b4cdc0e
minor typo
outergod Aug 18, 2025
8fe25f3
Merge remote-tracking branch 'upstream/main' into fix/GH-3642-fastapi…
outergod Aug 18, 2025
2739fbb
move ExceptionHandlingMiddleware as the outermost inner middleware
outergod Aug 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed

- `opentelemetry-instrumentation-fastapi`: Implement failsafe middleware stack.
([#3664](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3664))
- `opentelemetry-instrumentation`: Avoid calls to `context.detach` with `None` token.
([#3673](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3673))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def client_response_hook(span: Span, scope: Scope, message: dict[str, Any]):
HTTP_SERVER_REQUEST_DURATION,
)
from opentelemetry.semconv.trace import SpanAttributes
from opentelemetry.trace import set_span_in_context
from opentelemetry.trace import Span, set_span_in_context
from opentelemetry.util.http import (
OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS,
OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST,
Expand Down Expand Up @@ -646,9 +646,23 @@ def __init__(
self.default_span_details = (
default_span_details or get_default_span_details
)
self.server_request_hook = server_request_hook
self.client_request_hook = client_request_hook
self.client_response_hook = client_response_hook

def failsafe(func):
if func is None:
return None

@wraps(func)
def wrapper(span: Span, *args, **kwargs):
try:
func(span, *args, **kwargs)
except Exception as exc: # pylint: disable=broad-exception-caught
span.record_exception(exc)

return wrapper

self.server_request_hook = failsafe(server_request_hook)
self.client_request_hook = failsafe(client_request_hook)
self.client_response_hook = failsafe(client_response_hook)
self.content_length_header = None
self._sem_conv_opt_in_mode = sem_conv_opt_in_mode

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def client_response_hook(span: Span, scope: dict[str, Any], message: dict[str, A
from starlette.applications import Starlette
from starlette.middleware.errors import ServerErrorMiddleware
from starlette.routing import Match
from starlette.types import ASGIApp
from starlette.types import ASGIApp, Receive, Scope, Send

from opentelemetry.instrumentation._semconv import (
_get_schema_url,
Expand All @@ -210,7 +210,8 @@ def client_response_hook(span: Span, scope: dict[str, Any], message: dict[str, A
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
from opentelemetry.metrics import MeterProvider, get_meter
from opentelemetry.semconv.attributes.http_attributes import HTTP_ROUTE
from opentelemetry.trace import TracerProvider, get_tracer
from opentelemetry.trace import TracerProvider, get_current_span, get_tracer
from opentelemetry.trace.status import Status, StatusCode
from opentelemetry.util.http import (
get_excluded_urls,
parse_excluded_urls,
Expand Down Expand Up @@ -242,7 +243,7 @@ def instrument_app(
http_capture_headers_server_response: list[str] | None = None,
http_capture_headers_sanitize_fields: list[str] | None = None,
exclude_spans: list[Literal["receive", "send"]] | None = None,
):
): # pylint: disable=too-many-locals
"""Instrument an uninstrumented FastAPI application.
Args:
Expand Down Expand Up @@ -289,17 +290,78 @@ def instrument_app(
schema_url=_get_schema_url(sem_conv_opt_in_mode),
)

# Instead of using `app.add_middleware` we monkey patch `build_middleware_stack` to insert our middleware
# as the outermost middleware.
# Otherwise `OpenTelemetryMiddleware` would have unhandled exceptions tearing through it and would not be able
# to faithfully record what is returned to the client since it technically cannot know what `ServerErrorMiddleware` is going to do.

def build_middleware_stack(self: Starlette) -> ASGIApp:
inner_server_error_middleware: ASGIApp = ( # type: ignore
# Define an additional middleware for exception handling that gets
# added as a regular user middleware.
Comment on lines +294 to +295
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Define an additional middleware for exception handling that gets
# added as a regular user middleware.
# Define an additional middleware for exception handling

# Normally, `opentelemetry.trace.use_span` covers the recording of
# exceptions into the active span, but `OpenTelemetryMiddleware`
# ends the span too early before the exception can be recorded.
class ExceptionHandlerMiddleware:
def __init__(self, app):
self.app = app

async def __call__(
self, scope: Scope, receive: Receive, send: Send
) -> None:
try:
await self.app(scope, receive, send)
except Exception as exc: # pylint: disable=broad-exception-caught
span = get_current_span()
span.record_exception(exc)
span.set_status(
Status(
status_code=StatusCode.ERROR,
description=f"{type(exc).__name__}: {exc}",
)
)
raise

# For every possible use case of error handling, exception
# handling, trace availability in exception handlers and
# automatic exception recording to work, we need to make a
# series of wrapping and re-wrapping middlewares.

# First, grab the original middleware stack from Starlette. It
# comprises a stack of
# `ServerErrorMiddleware` -> [user defined middlewares] -> `ExceptionMiddleware`
inner_server_error_middleware: ServerErrorMiddleware = ( # type: ignore
self._original_build_middleware_stack() # type: ignore
)

if not isinstance(
inner_server_error_middleware, ServerErrorMiddleware
):
# Oops, something changed about how Starlette creates middleware stacks
_logger.error(
"Cannot instrument FastAPI as the expected middleware stack has changed"
)
return inner_server_error_middleware

# We take [user defined middlewares] -> `ExceptionHandlerMiddleware`
# out of the outermost `ServerErrorMiddleware` and instead pass
# it to our own `ExceptionHandlerMiddleware`
exception_middleware = ExceptionHandlerMiddleware(
inner_server_error_middleware.app
)

# Now, we create a new `ServerErrorMiddleware` that wraps
# `ExceptionHandlerMiddleware` but otherwise uses the same
# original `handler` and debug setting. The end result is a
# middleware stack that's identical to the original stack except
# all user middlewares are covered by our
# `ExceptionHandlerMiddleware`.
error_middleware = ServerErrorMiddleware(
app=exception_middleware,
handler=inner_server_error_middleware.handler,
debug=inner_server_error_middleware.debug,
)

# Finally, we wrap the stack above in our actual OTEL
# middleware. As a result, an active tracing context exists for
# every use case of user-defined error and exception handlers as
# well as automatic recording of exceptions in active spans.
otel_middleware = OpenTelemetryMiddleware(
inner_server_error_middleware,
error_middleware,
excluded_urls=excluded_urls,
default_span_details=_get_default_span_details,
server_request_hook=server_request_hook,
Expand All @@ -313,23 +375,18 @@ def build_middleware_stack(self: Starlette) -> ASGIApp:
http_capture_headers_sanitize_fields=http_capture_headers_sanitize_fields,
exclude_spans=exclude_spans,
)
# Wrap in an outer layer of ServerErrorMiddleware so that any exceptions raised in OpenTelemetryMiddleware
# are handled.
# This should not happen unless there is a bug in OpenTelemetryMiddleware, but if there is we don't want that
# to impact the user's application just because we wrapped the middlewares in this order.
if isinstance(
inner_server_error_middleware, ServerErrorMiddleware
): # usually true
outer_server_error_middleware = ServerErrorMiddleware(
app=otel_middleware,
)
else:
# Something else seems to have patched things, or maybe Starlette changed.
# Just create a default ServerErrorMiddleware.
outer_server_error_middleware = ServerErrorMiddleware(
app=otel_middleware
)
return outer_server_error_middleware

# Ultimately, wrap everything in another default
# `ServerErrorMiddleware` (w/o user handlers) so that any
# exceptions raised in `OpenTelemetryMiddleware` are handled.
#
# This should not happen unless there is a bug in
# OpenTelemetryMiddleware, but if there is we don't want that to
# impact the user's application just because we wrapped the
# middlewares in this order.
return ServerErrorMiddleware(
app=otel_middleware,
)

app._original_build_middleware_stack = app.build_middleware_stack
app.build_middleware_stack = types.MethodType(
Expand Down
Loading