cppalliance · wpak-ai · May 27, 2026 · May 25, 2026 · May 25, 2026 · May 25, 2026
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+- Pydantic boundary schemas at GitHub, Slack, and Discord ingestion (`api_schemas.py` per app; Discord ChatExporter uses `staging_schema.py`); fetchers validate with `model_validate()`; services accept typed payloads; `classify_failure` maps validation errors to `VALIDATION`.
 - `core` — shared utilities, collector base classes, and cross-cutting operations (e.g. GitHub, Slack, files, markdown).
 - `boost_collector_runner` — YAML-driven schedules, Celery tasks, and `run_scheduled_collectors` management command.
 - `github_activity_tracker` — GitHub repos, commits, issues, and related activity.

@@ -263,4 +263,32 @@ def classify_failure(exc: BaseException) -> CollectorFailureCategory:
         # Often validation-ish in collectors
         return CollectorFailureCategory.VALIDATION
 
+    try:
+        from pydantic import ValidationError as PydanticValidationError
+    except ImportError:
+        PydanticValidationError = ()  # type: ignore[misc, assignment]
+    if PydanticValidationError and isinstance(exc, PydanticValidationError):
+        return CollectorFailureCategory.VALIDATION
+
+    for mod_name, exc_name in (
+        ("github_activity_tracker.api_schemas", "GitHubApiValidationError"),
+        ("cppa_slack_tracker.api_schemas", "SlackApiValidationError"),
+        ("discord_activity_tracker.staging_schema", "StagingValidationError"),
+        (
+            "discord_activity_tracker.api_schemas",
+            "DiscordLiveSyncValidationError",
+        ),
+    ):
+        try:
+            import importlib
+
+            mod = importlib.import_module(mod_name)
+            app_exc = getattr(mod, exc_name, None)
+            if isinstance(app_exc, type) and isinstance(exc, app_exc):
+                return CollectorFailureCategory.VALIDATION
+        except ImportError:
+            continue
+        except Exception:
+            continue
+
     return CollectorFailureCategory.UNKNOWN
@@ -301,3 +301,36 @@ def fake_import(name, globals=None, locals=None, fromlist=(), level=0):
 
     monkeypatch.setattr(builtins, "__import__", fake_import)
     assert classify_failure(CommandError("x")) is CollectorFailureCategory.UNKNOWN
+
+
+def test_classify_pydantic_validation_error():
+    pydantic = pytest.importorskip("pydantic")
+    from pydantic import BaseModel
+
+    class M(BaseModel):
+        x: int
+
+    try:
+        M.model_validate({"x": "not-int"})
+    except pydantic.ValidationError as exc:
+        assert classify_failure(exc) is CollectorFailureCategory.VALIDATION
+    else:
+        pytest.fail("expected ValidationError")
+
+
+def test_classify_github_api_validation_error():
+    from github_activity_tracker.api_schemas import GitHubApiValidationError
+
+    assert (
+        classify_failure(GitHubApiValidationError("bad issue"))
+        is CollectorFailureCategory.VALIDATION
+    )
+
+
+def test_classify_slack_api_validation_error():
+    from cppa_slack_tracker.api_schemas import SlackApiValidationError
+
+    assert (
+        classify_failure(SlackApiValidationError("bad slack"))
+        is CollectorFailureCategory.VALIDATION
+    )
@@ -0,0 +1,166 @@
+"""Pydantic models for Slack Web API payloads at ingestion boundaries."""
+
+from __future__ import annotations
+
+from typing import Any, NoReturn
+
+from pydantic import (
+    BaseModel,
+    ConfigDict,
+    Field,
+    ValidationError,
+    model_validator,
+)
+
+
+class SlackApiValidationError(ValueError):
+    """Slack API payload failed Pydantic validation."""
+
+
+class SlackProfilePayload(BaseModel):
+    model_config = ConfigDict(extra="allow")
+
+    email: str | None = None
+    image_72: str | None = None
+    display_name: str | None = None
+
+
+class SlackTopicPurpose(BaseModel):
+    model_config = ConfigDict(extra="allow")
+
+    value: str = ""
+
+
+class SlackUserPayload(BaseModel):
+    model_config = ConfigDict(extra="allow")
+
+    id: str = Field(min_length=1)
+    name: str = ""
+    real_name: str = ""
+    profile: SlackProfilePayload = Field(default_factory=SlackProfilePayload)
+    updated: int | None = None
+    is_bot: bool = False
+
+
+class SlackTeamPayload(BaseModel):
+    """Internal shape: team_id + team_name (from API id + name)."""
+
+    model_config = ConfigDict(extra="allow")
+
+    team_id: str = Field(min_length=1)
+    team_name: str = ""
+
+    @model_validator(mode="before")
+    @classmethod
+    def _from_api_team(cls, data: Any) -> Any:
+        if not isinstance(data, dict):
+            return data
+        if "team_id" in data:
+            return data
+        tid = data.get("id") or data.get("team_id") or ""
+        tname = (data.get("name") or data.get("team_name") or tid or "").strip()
+        return {"team_id": str(tid), "team_name": tname or str(tid)}
+
+
+class SlackChannelPayload(BaseModel):
+    model_config = ConfigDict(extra="allow")
+
+    id: str = Field(min_length=1)
+    name: str = ""
+    is_channel: bool = False
+    is_private: bool = False
+    is_im: bool = False
+    is_mpim: bool = False
+    purpose: SlackTopicPurpose | dict[str, Any] | None = None
+    topic: SlackTopicPurpose | dict[str, Any] | None = None
+    creator: str | None = None
+    created: int | None = None
+    type: str = "public_channel"
+
+
+class SlackMessageEdited(BaseModel):
+    model_config = ConfigDict(extra="allow")
+
+    ts: str | None = None
+
+
+class SlackMessagePayload(BaseModel):
+    model_config = ConfigDict(extra="allow")
+
+    ts: str | None = None
+    user: str | None = None
+    text: str = ""
+    subtype: str | None = None
+    edited: SlackMessageEdited | dict[str, Any] | None = None
+    comment: dict[str, Any] | None = None
+    thread_ts: str | None = None
+
+
+def _validation_error(prefix: str, err: ValidationError) -> NoReturn:
+    detail = err.errors()[:5]
+    msg = f"{prefix}: " + "; ".join(
+        f"{e.get('loc', ())}: {e.get('msg', '')}" for e in detail
+    )
+    if len(err.errors()) > 5:
+        msg += f" … ({len(err.errors())} errors total)"
+    raise SlackApiValidationError(msg) from err
+
+
+def _expect_dict(data: Any, prefix: str) -> dict[str, Any]:
+    if not isinstance(data, dict):
+        raise SlackApiValidationError(
+            f"{prefix}: expected object, got {type(data).__name__}"
+        )
+    return data
+
+
+def parse_team(
+    data: dict[str, Any],
+    *,
+    source: str | None = None,
+) -> SlackTeamPayload:
+    prefix = f"Invalid Slack team{f' ({source})' if source else ''}"
+    data = _expect_dict(data, prefix)
+    try:
+        return SlackTeamPayload.model_validate(data)
+    except ValidationError as e:
+        _validation_error(prefix, e)
+
+
+def parse_channel(
+    data: dict[str, Any],
+    *,
+    source: str | None = None,
+) -> SlackChannelPayload:
+    prefix = f"Invalid Slack channel{f' ({source})' if source else ''}"
+    data = _expect_dict(data, prefix)
+    try:
+        return SlackChannelPayload.model_validate(data)
+    except ValidationError as e:
+        _validation_error(prefix, e)
+
+
+def parse_message(
+    data: dict[str, Any],
+    *,
+    source: str | None = None,
+) -> SlackMessagePayload:
+    prefix = f"Invalid Slack message{f' ({source})' if source else ''}"
+    data = _expect_dict(data, prefix)
+    try:
+        return SlackMessagePayload.model_validate(data)
+    except ValidationError as e:
+        _validation_error(prefix, e)
+
+
+def parse_user(
+    data: dict[str, Any],
+    *,
+    source: str | None = None,
+) -> SlackUserPayload:
+    prefix = f"Invalid Slack user{f' ({source})' if source else ''}"
+    data = _expect_dict(data, prefix)
+    try:
+        return SlackUserPayload.model_validate(data)
+    except ValidationError as e:
+        _validation_error(prefix, e)