HugoRCD · HugoRCD · May 4, 2026 · May 4, 2026
diff --git a/.changeset/e2e-adapter-tests.md b/.changeset/e2e-adapter-tests.md
@@ -0,0 +1,9 @@
+---
+"evlog": patch
+---
+
+Add end-to-end adapter tests against the real Axiom, PostHog, Sentry, and Better Stack APIs (`pnpm run test:e2e`). They run nightly via a dedicated GitHub Actions workflow plus on PRs labelled `e2e`, so any breaking change on a destination platform is caught within 24 hours instead of in production.
+
+The Axiom suite does a full round-trip — it ingests events tagged with a unique correlation ID, queries them back via APL, and asserts presence and shape. PostHog/Sentry/Better Stack are smoke-tested (their write APIs don't expose a read path).
+
+Pure infra: no user-facing API change, no published code change.
diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
@@ -0,0 +1,72 @@
+name: e2e
+
+# E2E tests against real observability platforms (Axiom, PostHog, Sentry,
+# Better Stack). They are NOT run on every PR because:
+#   - secrets are not exposed to fork PRs (security)
+#   - we don't want to spam the destinations on every push
+#   - they are slower than unit tests (real network, ingestion lag)
+#
+# Triggers:
+#   - daily cron (3:00 UTC) — daily health check
+#   - push to main — confirm a merge didn't break anything
+#   - workflow_dispatch — manual run from the Actions tab
+#   - PR labelled `e2e` — opt-in for adapter changes (only on PRs from
+#     the same repo, never on forks)
+
+on:
+  schedule:
+    - cron: '0 3 * * *'
+  push:
+    branches:
+      - main
+    paths:
+      - 'packages/evlog/src/adapters/**'
+      - 'packages/evlog/src/shared/drain.ts'
+      - 'packages/evlog/src/shared/http.ts'
+      - 'packages/evlog/test/e2e/**'
+      - '.github/workflows/e2e.yml'
+  pull_request:
+    types: [labeled, synchronize, opened]
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+concurrency:
+  group: e2e-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  e2e:
+    # Skip PRs that aren't labelled `e2e` (or skip fork PRs even if labelled).
+    if: |
+      github.event_name != 'pull_request' || (
+        contains(github.event.pull_request.labels.*.name, 'e2e')
+        && github.event.pull_request.head.repo.full_name == github.repository
+      )
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+    steps:
+      - uses: actions/checkout@v6
+      - uses: pnpm/action-setup@v6
+        with:
+          version: 10.33.2
+      - uses: actions/setup-node@v6
+        with:
+          node-version: 22
+          cache: 'pnpm'
+      - name: Install dependencies
+        run: pnpm install --frozen-lockfile
+      - name: Prepare
+        run: pnpm run dev:prepare
+      - name: Run e2e tests
+        run: pnpm run test:e2e
+        env:
+          AXIOM_TOKEN: ${{ secrets.AXIOM_TOKEN }}
+          AXIOM_DATASET: ${{ secrets.AXIOM_DATASET }}
+          AXIOM_ORG_ID: ${{ secrets.AXIOM_ORG_ID }}
+          POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
+          POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
+          SENTRY_DSN: ${{ secrets.SENTRY_DSN }}
+          BETTER_STACK_SOURCE_TOKEN: ${{ secrets.BETTER_STACK_SOURCE_TOKEN }}
+          BETTER_STACK_ENDPOINT: ${{ secrets.BETTER_STACK_ENDPOINT }}
diff --git a/AGENTS.md b/AGENTS.md
@@ -56,12 +56,15 @@ packages/evlog/            Main package
 Tests live in `packages/evlog/test/` and use Vitest.
 
 ```bash
-pnpm run test                                          # full suite
+pnpm run test                                          # full suite (mocked, fast)
 pnpm --filter evlog exec vitest run test/path/to/file  # single test file
+pnpm run test:e2e                                      # adapters vs real endpoints
 ```
 
 Write tests for all new functionality. Run tests before considering any task done.
 
+End-to-end adapter tests (`packages/evlog/test/e2e/*.e2e.ts`) hit the real Axiom/PostHog/Sentry/Better Stack APIs. They skip automatically when env vars aren't set. They run on a daily cron + on push to `main` + on PR labelled `e2e` (`.github/workflows/e2e.yml`).
+
 ## Definition of Done
 
 A task is complete when **all** of the following pass:

diff --git a/package.json b/package.json
@@ -40,6 +40,7 @@
     "lint:fix": "turbo run lint:fix",
     "bench": "turbo run bench --filter='./packages/*'",
     "test": "turbo run test",
+    "test:e2e": "pnpm --filter evlog run test:e2e",
     "typecheck": "turbo run typecheck",
     "automd": "npx automd --config .github/automd.json",
     "changeset": "changeset",

diff --git a/packages/evlog/package.json b/packages/evlog/package.json
@@ -316,6 +316,7 @@
     "test": "vitest run",
     "test:watch": "vitest watch",
     "test:coverage": "vitest run --coverage",
+    "test:e2e": "vitest run --config vitest.e2e.config.ts",
     "typecheck": "echo 'Typecheck handled by build'"
   },
   "devDependencies": {

diff --git a/packages/evlog/test/e2e/README.md b/packages/evlog/test/e2e/README.md
@@ -0,0 +1,61 @@
+# Adapter E2E tests
+
+Real-network tests against the platforms evlog ships an adapter for. They are
+the safety net that catches "the destination quietly changed its API" before
+your users do.
+
+## What runs
+
+| File | Adapter | Mode |
+|---|---|---|
+| `axiom.e2e.ts` | Axiom | Round-trip if token has `query:read`, smoke otherwise |
+| `posthog.e2e.ts` | PostHog (OTLP + events API) | Smoke (write-only API) |
+| `sentry.e2e.ts` | Sentry envelope | Smoke (DSN is write-only) |
+| `better-stack.e2e.ts` | Better Stack | Smoke (source token is write-only) |
+
+Every event is tagged with `e2e: true`, `e2e_run_id`, `e2e_branch`, `e2e_sha`,
+`e2e_test`, `e2e_correlation_id` so you can grep / clean it from the
+destination at any time.
+
+## Run locally
+
+```bash
+pnpm run test:e2e
+```
+
+Tokens are read from the workspace `.env` (already gitignored). Suites whose
+required env vars are missing are skipped with a visible "skipped: missing X"
+label, never silently green.
+
+Only `AXIOM_TOKEN` + `AXIOM_DATASET` are required for round-trip; the others
+are smoke-only.
+
+## Run in CI
+
+`.github/workflows/e2e.yml` runs on:
+
+- daily cron (`0 3 * * *` UTC)
+- push to `main` (only when adapter source / e2e tests / workflow change)
+- PR labelled `e2e` (only on same-repo PRs — never forks, for secret safety)
+- manual dispatch
+
+## GitHub secrets
+
+The workflow expects these repo secrets:
+
+- `AXIOM_TOKEN` (PAT with `query:read` for round-trip, ingest token works for smoke)
+- `AXIOM_DATASET`
+- `AXIOM_ORG_ID` (required for PATs)
+- `POSTHOG_API_KEY`
+- `SENTRY_DSN`
+- `BETTER_STACK_SOURCE_TOKEN`
+
+Set them with `gh secret set <NAME> --body '<value>'` or in the repo settings UI.
+
+## Get round-trip on Axiom
+
+The default Axiom ingest token (`xaat-...`) cannot read events back. To
+enable full round-trip assertions, generate a Personal Access Token at
+[app.axiom.co/profile](https://app.axiom.co/profile) with the `query:read`
+scope and use it as `AXIOM_TOKEN`. Without it, the suite degrades to smoke
+tests and prints a warning.
diff --git a/packages/evlog/test/e2e/_shared.ts b/packages/evlog/test/e2e/_shared.ts
@@ -0,0 +1,142 @@
+/**
+ * Shared helpers for end-to-end tests against real observability platforms.
+ *
+ * These tests:
+ *   - skip when the required env vars aren't set
+ *   - tag every event with a unique correlation ID + run/branch/sha so the data
+ *     is identifiable and easy to clean up later
+ *   - exercise the public adapter API (`createXxxDrain` / `sendBatchToXxx`)
+ *     against the real endpoint, with no mocks
+ *
+ * They are NOT included in the default `pnpm run test` run — see
+ * `vitest.e2e.config.ts` and `pnpm run test:e2e`.
+ */
+import { randomUUID } from 'node:crypto'
+import { describe, it } from 'vitest'
+import type { WideEvent } from '../../src/types'
+
+export interface RunMetadata {
+  runId: string
+  branch: string
+  sha: string
+  ci: boolean
+}
+
+export interface E2ETags extends Record<string, unknown> {
+  e2e: true
+  e2e_run_id: string
+  e2e_branch: string
+  e2e_sha: string
+  e2e_test: string
+  e2e_correlation_id: string
+}
+
+export function getRunMetadata(): RunMetadata {
+  const ci = Boolean(process.env.GITHUB_ACTIONS)
+  return {
+    runId: process.env.GITHUB_RUN_ID ?? `local-${Date.now()}`,
+    branch: process.env.GITHUB_REF_NAME ?? 'local',
+    sha: (process.env.GITHUB_SHA ?? 'local').slice(0, 7),
+    ci,
+  }
+}
+
+/**
+ * Build a tag bag for a single event. The correlation ID is unique per call,
+ * so each emitted event can be located individually in the destination.
+ */
+export function makeTags(testName: string): E2ETags {
+  const meta = getRunMetadata()
+  return {
+    e2e: true,
+    e2e_run_id: meta.runId,
+    e2e_branch: meta.branch,
+    e2e_sha: meta.sha,
+    e2e_test: testName,
+    e2e_correlation_id: randomUUID(),
+  }
+}
+
+/**
+ * Build a minimal valid wide event with e2e tags merged in.
+ */
+export function makeEvent(
+  testName: string,
+  overrides: Partial<WideEvent> = {},
+): WideEvent & E2ETags {
+  const tags = makeTags(testName)
+  return {
+    timestamp: new Date().toISOString(),
+    level: 'info',
+    service: 'evlog-e2e',
+    environment: 'test',
+    ...tags,
+    ...overrides,
+  }
+}
+
+/**
+ * `describe.skipIf` wrapper that prints why a suite was skipped — important so
+ * a missing token in CI is visible in the logs instead of silently green.
+ */
+export function describeIfEnv(
+  name: string,
+  envVars: string[],
+  fn: () => void,
+): void {
+  const missing = envVars.filter(key => !process.env[key])
+  if (missing.length > 0) {
+    describe.skip(`${name} (skipped: missing ${missing.join(', ')})`, fn)
+    return
+  }
+  describe(name, fn)
+}
+
+/**
+ * Poll a predicate until it returns truthy or the timeout elapses.
+ * Used by adapters with a read API (Axiom) to wait for ingestion lag.
+ */
+export async function pollUntil<T>(
+  predicate: () => Promise<T | null | undefined>,
+  options: { timeoutMs: number, intervalMs: number, label: string },
+): Promise<T> {
+  const { timeoutMs, intervalMs, label } = options
+  const deadline = Date.now() + timeoutMs
+  let lastError: unknown
+  let attempts = 0
+  while (Date.now() < deadline) {
+    attempts += 1
+    try {
+      const result = await predicate()
+      if (result) return result
+    } catch (error) {
+      lastError = error
+    }
+    await new Promise(resolve => setTimeout(resolve, intervalMs))
+  }
+  throw new Error(
+    `[${label}] pollUntil timed out after ${timeoutMs}ms (${attempts} attempts)${
+     lastError ? `; last error: ${(lastError as Error).message ?? lastError}` : ''}`,
+  )
+}
+
+/**
+ * `it` with a friendlier label that prints the correlation ID on failure,
+ * so when a real-world adapter fails you can grep the destination platform.
+ */
+export function itWithCorrelationId(
+  name: string,
+  fn: (correlationId: string) => Promise<void>,
+  timeoutMs?: number,
+): void {
+  it(name, async () => {
+    const correlationId = randomUUID()
+    try {
+      await fn(correlationId)
+    } catch (error) {
+      const err = error instanceof Error ? error : new Error(String(error))
+      err.message = `${err.message}\n   ↳ correlation_id: ${correlationId}`
+      throw err
+    }
+  }, timeoutMs)
+}