Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/sentry/grouping/parameterization.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,17 @@ def _pattern(self, experimental: bool = False) -> str:
""",
),
ParameterizationRegex(name="duration", raw_pattern=r"""\b(\d+ms) | (\d+(\.\d+)?s)\b"""),
ParameterizationRegex(
name="ulid",
raw_pattern=r"""
# ULIDs: 26 character Crockford's Base32 strings (case-insensitive)
# Excludes I, L, O, U to avoid ambiguity
# https://github.com/ulid/spec
# Lookaheads require both a letter and a digit to avoid matching
# pure-alpha words or pure-numeric strings
(\b(?!0[xX])(?=[0-9A-HJ-KM-NP-TV-Za-hj-km-np-tv-z]*[A-HJ-KM-NP-TV-Za-hj-km-np-tv-z])(?=[0-9A-HJ-KM-NP-TV-Za-hj-km-np-tv-z]*[0-9])[0-9A-HJ-KM-NP-TV-Za-hj-km-np-tv-z]{26}\b)
""",
),
ParameterizationRegex(
name="hex",
raw_pattern=r"""
Expand Down
10 changes: 10 additions & 0 deletions tests/sentry/grouping/test_parameterization.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,16 @@ def experimental_parameterizer() -> Parameterizer:
("duration - 1.234s", "1.234s", "<duration>"),
("duration - 10s", "10s", "<duration>"),
("duration - 100.0000s", "100.0000s", "<duration>"),
# ULIDs
("ulid - uppercase", "01ARZ3NDEKTSV4RRFFQ69G5FAV", "<ulid>"),
("ulid - lowercase", "01arz3ndektsv4rrffq69g5fav", "<ulid>"),
(
"ulid - in message",
"Failed to process 01H5V5KBSMQ8E6MTMHKQ8KT3SY",
"Failed to process <ulid>",
),
Comment on lines +79 to +84
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can remove this

("ulid - not pure alpha 26 chars", "ABCDEFGHJKMNPQRSTVWXYZABCD", "ABCDEFGHJKMNPQRSTVWXYZABCD"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment here as in the other one - this does in fact seem like pure alpha.

("ulid - contains excluded char I", "01ARZ3NDEKISV4RRFFQ69G5FAV", "01ARZ3NDEKISV4RRFFQ69G5FAV"),
("hex with prefix - lowercase, 4 digits", "0x9af8", "<hex>"),
("hex with prefix - uppercase, 4 digits", "0x9AF8", "<hex>"),
("hex with prefix - lowercase, 8 digits", "0x9af8c3be", "<hex>"),
Expand Down
Loading