Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

Comment thread
jonathanMLDev marked this conversation as resolved.
### Added

- Pydantic boundary schemas at GitHub, Slack, and Discord ingestion (`api_schemas.py` per app; Discord ChatExporter uses `staging_schema.py`); fetchers validate with `model_validate()`; services accept typed payloads; `classify_failure` maps validation errors to `VALIDATION`.
- `core` — shared utilities, collector base classes, and cross-cutting operations (e.g. GitHub, Slack, files, markdown).
- `boost_collector_runner` — YAML-driven schedules, Celery tasks, and `run_scheduled_collectors` management command.
- `github_activity_tracker` — GitHub repos, commits, issues, and related activity.
Expand Down
28 changes: 28 additions & 0 deletions core/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,4 +263,32 @@ def classify_failure(exc: BaseException) -> CollectorFailureCategory:
# Often validation-ish in collectors
return CollectorFailureCategory.VALIDATION

try:
from pydantic import ValidationError as PydanticValidationError
except ImportError:
PydanticValidationError = () # type: ignore[misc, assignment]
if PydanticValidationError and isinstance(exc, PydanticValidationError):
return CollectorFailureCategory.VALIDATION

for mod_name, exc_name in (
("github_activity_tracker.api_schemas", "GitHubApiValidationError"),
("cppa_slack_tracker.api_schemas", "SlackApiValidationError"),
("discord_activity_tracker.staging_schema", "StagingValidationError"),
(
"discord_activity_tracker.api_schemas",
"DiscordLiveSyncValidationError",
),
):
try:
import importlib

mod = importlib.import_module(mod_name)
app_exc = getattr(mod, exc_name, None)
if isinstance(app_exc, type) and isinstance(exc, app_exc):
return CollectorFailureCategory.VALIDATION
except ImportError:
continue
Comment thread
jonathanMLDev marked this conversation as resolved.
except Exception:
continue

return CollectorFailureCategory.UNKNOWN
33 changes: 33 additions & 0 deletions core/tests/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,3 +301,36 @@ def fake_import(name, globals=None, locals=None, fromlist=(), level=0):

monkeypatch.setattr(builtins, "__import__", fake_import)
assert classify_failure(CommandError("x")) is CollectorFailureCategory.UNKNOWN


def test_classify_pydantic_validation_error():
pydantic = pytest.importorskip("pydantic")
from pydantic import BaseModel

class M(BaseModel):
x: int

try:
M.model_validate({"x": "not-int"})
except pydantic.ValidationError as exc:
assert classify_failure(exc) is CollectorFailureCategory.VALIDATION
else:
pytest.fail("expected ValidationError")


def test_classify_github_api_validation_error():
from github_activity_tracker.api_schemas import GitHubApiValidationError

assert (
classify_failure(GitHubApiValidationError("bad issue"))
is CollectorFailureCategory.VALIDATION
)


def test_classify_slack_api_validation_error():
from cppa_slack_tracker.api_schemas import SlackApiValidationError

assert (
classify_failure(SlackApiValidationError("bad slack"))
is CollectorFailureCategory.VALIDATION
)
166 changes: 166 additions & 0 deletions cppa_slack_tracker/api_schemas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
"""Pydantic models for Slack Web API payloads at ingestion boundaries."""

from __future__ import annotations

from typing import Any, NoReturn

from pydantic import (
BaseModel,
ConfigDict,
Field,
ValidationError,
model_validator,
)


class SlackApiValidationError(ValueError):
"""Slack API payload failed Pydantic validation."""


class SlackProfilePayload(BaseModel):
model_config = ConfigDict(extra="allow")

email: str | None = None
image_72: str | None = None
display_name: str | None = None


class SlackTopicPurpose(BaseModel):
model_config = ConfigDict(extra="allow")

value: str = ""


class SlackUserPayload(BaseModel):
model_config = ConfigDict(extra="allow")

id: str = Field(min_length=1)
name: str = ""
real_name: str = ""
profile: SlackProfilePayload = Field(default_factory=SlackProfilePayload)
updated: int | None = None
is_bot: bool = False


class SlackTeamPayload(BaseModel):
"""Internal shape: team_id + team_name (from API id + name)."""

model_config = ConfigDict(extra="allow")

team_id: str = Field(min_length=1)
team_name: str = ""

@model_validator(mode="before")
@classmethod
def _from_api_team(cls, data: Any) -> Any:
if not isinstance(data, dict):
return data
if "team_id" in data:
return data
tid = data.get("id") or data.get("team_id") or ""
tname = (data.get("name") or data.get("team_name") or tid or "").strip()
return {"team_id": str(tid), "team_name": tname or str(tid)}


class SlackChannelPayload(BaseModel):
model_config = ConfigDict(extra="allow")

id: str = Field(min_length=1)
name: str = ""
is_channel: bool = False
is_private: bool = False
is_im: bool = False
is_mpim: bool = False
purpose: SlackTopicPurpose | dict[str, Any] | None = None
topic: SlackTopicPurpose | dict[str, Any] | None = None
creator: str | None = None
created: int | None = None
type: str = "public_channel"


class SlackMessageEdited(BaseModel):
model_config = ConfigDict(extra="allow")

ts: str | None = None


class SlackMessagePayload(BaseModel):
model_config = ConfigDict(extra="allow")

ts: str | None = None
user: str | None = None
text: str = ""
subtype: str | None = None
edited: SlackMessageEdited | dict[str, Any] | None = None
comment: dict[str, Any] | None = None
thread_ts: str | None = None


def _validation_error(prefix: str, err: ValidationError) -> NoReturn:
detail = err.errors()[:5]
msg = f"{prefix}: " + "; ".join(
f"{e.get('loc', ())}: {e.get('msg', '')}" for e in detail
)
if len(err.errors()) > 5:
msg += f" … ({len(err.errors())} errors total)"
raise SlackApiValidationError(msg) from err


def _expect_dict(data: Any, prefix: str) -> dict[str, Any]:
if not isinstance(data, dict):
raise SlackApiValidationError(
f"{prefix}: expected object, got {type(data).__name__}"
)
return data


def parse_team(
data: dict[str, Any],
*,
source: str | None = None,
) -> SlackTeamPayload:
prefix = f"Invalid Slack team{f' ({source})' if source else ''}"
data = _expect_dict(data, prefix)
try:
return SlackTeamPayload.model_validate(data)
except ValidationError as e:
_validation_error(prefix, e)


def parse_channel(
data: dict[str, Any],
*,
source: str | None = None,
) -> SlackChannelPayload:
prefix = f"Invalid Slack channel{f' ({source})' if source else ''}"
data = _expect_dict(data, prefix)
try:
return SlackChannelPayload.model_validate(data)
except ValidationError as e:
_validation_error(prefix, e)


def parse_message(
data: dict[str, Any],
*,
source: str | None = None,
) -> SlackMessagePayload:
prefix = f"Invalid Slack message{f' ({source})' if source else ''}"
data = _expect_dict(data, prefix)
try:
return SlackMessagePayload.model_validate(data)
except ValidationError as e:
_validation_error(prefix, e)


def parse_user(
data: dict[str, Any],
*,
source: str | None = None,
) -> SlackUserPayload:
prefix = f"Invalid Slack user{f' ({source})' if source else ''}"
data = _expect_dict(data, prefix)
try:
return SlackUserPayload.model_validate(data)
except ValidationError as e:
_validation_error(prefix, e)
Loading
Loading