docs(pii): describe Presidio as a standalone service, not a sidecar

TheodoreSpeaks · TheodoreSpeaks · commit e7b822a1822d · 2026-07-01T15:39:21.000-07:00
Presidio now runs as its own ECS service (and, in Helm, its own Deployment +
Service) reached over the network via PII_URL — not a sidecar in the app task.
Update README, code comments, env docs, Dockerfiles, and the Helm chart docs to
match, and note the deploy requirement that PII_URL must be reachable.
diff --git a/apps/sim/app/api/guardrails/mask-batch/route.ts b/apps/sim/app/api/guardrails/mask-batch/route.ts
@@ -11,9 +11,10 @@ const logger = createLogger('GuardrailsMaskBatchAPI')
 
 /**
  * Internal batch PII masking. The log-redaction persist path runs in both the
- * Next.js server and the trigger.dev runtime, but the Presidio sidecars live only
- * in the app task — so redaction calls this endpoint server-to-server (internal
- * JWT) to keep Presidio centralized here.
+ * Next.js server and the trigger.dev runtime, but only the app task reaches the
+ * Presidio service (it holds `PII_URL` and the internal-network access) — so
+ * redaction calls this endpoint server-to-server (internal JWT) to keep the
+ * Presidio call centralized here.
  */
 export const POST = withRouteHandler(async (request: NextRequest) => {
   const auth = await checkInternalAuth(request, { requireWorkflowId: false })
@@ -35,7 +36,7 @@ export const POST = withRouteHandler(async (request: NextRequest) => {
     })
     return NextResponse.json({ masked })
   } catch (error) {
-    // An unreachable/misconfigured Presidio sidecar makes maskPIIBatch throw; fail
+    // An unreachable/misconfigured Presidio service makes maskPIIBatch throw; fail
     // loudly here (the caller scrubs to REDACTION_FAILED, so PII is never leaked).
     logger.error('PII batch masking failed', {
       error: getErrorMessage(error),
diff --git a/apps/sim/lib/core/config/env.ts b/apps/sim/lib/core/config/env.ts
@@ -325,8 +325,8 @@ export const env = createEnv({
     PORT:                                  z.number().optional(),                  // Main application port
     INTERNAL_API_BASE_URL:                 z.string().optional(),                  // Optional internal base URL for server-side self-calls; must include protocol if set (e.g., http://sim-app.namespace.svc.cluster.local:3000)
     ALLOWED_ORIGINS:                       z.string().optional(),                  // CORS allowed origins
-    PII_URL:                               z.string().optional(),                  // Presidio PII sidecar base URL serving /analyze + /anonymize (default http://localhost:5001)
-    PII_MASK_CHUNK_CONCURRENCY:            z.coerce.number().int().positive().optional(), // Max in-flight mask-batch requests per redaction (default 4); raise for a scaled Presidio service, lower to 1 for a single sidecar
+    PII_URL:                               z.string().optional(),                  // Presidio PII service base URL serving /analyze + /anonymize (standalone ECS service; default http://localhost:5001 for local dev)
+    PII_MASK_CHUNK_CONCURRENCY:            z.coerce.number().int().positive().optional(), // Max in-flight mask-batch requests per redaction (default 4); raise for a scaled-out Presidio service, lower to 1 for a single instance
 
     // OAuth Integration Credentials - All optional, enables third-party integrations
     GOOGLE_CLIENT_ID:                      z.string().optional(),                  // Google OAuth client ID for Google services
diff --git a/apps/sim/lib/guardrails/README.md b/apps/sim/lib/guardrails/README.md
@@ -19,29 +19,36 @@ For **hallucination detection**, you'll need:
 - A knowledge base with documents
 - An LLM provider API key (or use hosted models)
 
-### PII Detection (Presidio sidecar)
+### PII Detection (Presidio service)
 
-PII detection runs against **one** long-lived Presidio sidecar — a combined service (built from
-`docker/pii.Dockerfile`, source in `apps/pii/server.py`) that constructs a warm `AnalyzerEngine` +
-`AnonymizerEngine` once and exposes both `/analyze` and `/anonymize` (plus `/health`) on a single
-port. In deployment it runs alongside the app container in the same ECS task; locally, build and run
-it:
+PII detection runs against a **standalone Presidio service** — a combined analyzer + anonymizer
+(built from `docker/pii.Dockerfile`, source in `apps/pii/server.py`) that constructs a warm
+`AnalyzerEngine` + `AnonymizerEngine` once and exposes `/analyze`, `/anonymize`, and `/health` on a
+single port. In deployment it is its **own ECS service** (a dedicated task/service, not a sidecar in
+the app task), reached over the network via `PII_URL` and scaled independently of the app. The app
+(both the Next.js server and the trigger.dev runtime) is a thin HTTP client (`validate_pii.ts`) — no
+Python, no local venv.
+
+Locally, build and run it as a container:
 
 ```bash
 docker build -f docker/pii.Dockerfile -t sim-pii .
 docker run -d -p 5001:5001 sim-pii
 ```
 
-Point the app at it (default shown):
+Point the app at it with `PII_URL`:
 
-```bash
-PII_URL=http://localhost:5001
-```
+- **Local**: `PII_URL=http://localhost:5001` (the default)
+- **Deployed**: `PII_URL` points to the Presidio ECS service's internal endpoint (service-discovery
+  DNS / internal load balancer) — never `localhost`, since the service runs in a separate task
 
 The image bakes in the recognizers itself — a check-digit-validated **VIN** recognizer and
-multi-language NLP models (en/es/it/pl/fi) — so the app is a thin HTTP client (`validate_pii.ts`) with
-no Python or local venv. The redaction language is configured per rule (Data Retention) and defaults
-to English.
+multi-language NLP models (en/es/it/pl/fi). The redaction language is configured per rule (Data
+Retention) and defaults to English.
+
+> **Deploy requirement:** the execution-altering redaction stages (workflow input + block outputs)
+> fail-fast and abort a run if the Presidio service is unreachable. Every environment that can run
+> workflows must have a reachable Presidio service at `PII_URL`.
 
 ## Usage
 
@@ -100,7 +107,7 @@ See [Presidio documentation](https://microsoft.github.io/presidio/supported_enti
 - `validate_json.ts` - JSON validation (TypeScript)
 - `validate_regex.ts` - Regex validation (TypeScript)
 - `validate_hallucination.ts` - Hallucination detection with RAG + LLM scoring (TypeScript)
-- `validate_pii.ts` - PII detection client: calls the Presidio sidecar's /analyze + /anonymize (TypeScript)
+- `validate_pii.ts` - PII detection client: calls the Presidio service's /analyze + /anonymize (TypeScript)
 - `pii-entities.ts` - Client-safe PII entity + language catalog (shared by the block and Data Retention)
 - `mask-client.ts` - Internal HTTP client for batch PII masking from the log-redaction persist path
 - `validate.test.ts` - Test suite for JSON and regex validators
diff --git a/apps/sim/lib/guardrails/mask-client.ts b/apps/sim/lib/guardrails/mask-client.ts
@@ -8,19 +8,19 @@ import { chunkIndicesByBudget } from '@/lib/guardrails/pii-batching'
 /**
  * Max in-flight mask-batch requests per call. Each request is a CPU-heavy NER
  * batch, so a single Presidio instance is easily saturated — default 4, raise it
- * via `PII_MASK_CHUNK_CONCURRENCY` for a scaled/load-balanced service, or set 1
- * for a single sidecar. No request timeout: masking a large batch is slow and the
- * (scaled) Presidio service is expected to eventually respond; an unreachable
- * sidecar still rejects fast (connection refused) so the caller scrubs.
+ * via `PII_MASK_CHUNK_CONCURRENCY` for a scaled-out/load-balanced service, or set
+ * 1 for a single instance. No request timeout: masking a large batch is slow and
+ * the (scaled) Presidio service is expected to eventually respond; an unreachable
+ * service still rejects fast (connection refused) so the caller scrubs.
  */
 const CHUNK_CONCURRENCY = env.PII_MASK_CHUNK_CONCURRENCY ?? 4
 
 /**
  * Mask PII across many strings via the internal app-container endpoint.
  *
- * The Presidio sidecars run only in the app task, but the log-redaction persist
- * path also runs inside the trigger.dev runtime — so redaction always routes
- * through HTTP, the same way the guardrails tool does.
+ * Only the app task reaches the Presidio service (it holds `PII_URL`), but the
+ * log-redaction persist path also runs inside the trigger.dev runtime — so
+ * redaction always routes through HTTP, the same way the guardrails tool does.
  * Strings are grouped into byte/count-budgeted chunks (keeping each request far
  * under the 10MB Next body limit) and the chunks are sent with bounded
  * concurrency, so a large payload fans out rather than serializing; order is
diff --git a/apps/sim/lib/guardrails/pii-batching.ts b/apps/sim/lib/guardrails/pii-batching.ts
@@ -1,8 +1,8 @@
 /**
  * Per-request bounds shared by both Presidio hops: the app→route HTTP call
- * (`mask-client`) and the route→sidecar call (`validate_pii`). Keeping a single
+ * (`mask-client`) and the route→service call (`validate_pii`). Keeping a single
  * source of truth ensures every request stays far under the 10MB Next body limit
- * and small enough for one short spaCy NER pass under the sidecar timeout.
+ * and small enough for one short spaCy NER pass per Presidio request.
  */
 
 /** Max UTF-8 bytes of text per Presidio request. ~40× under the 10MB Next limit. */
diff --git a/apps/sim/lib/guardrails/validate_pii.test.ts b/apps/sim/lib/guardrails/validate_pii.test.ts
@@ -27,7 +27,7 @@ function emailSpans(text: string, entities: string[] | undefined): Span[] {
   return idx === -1 ? [] : [{ entity_type: 'EMAIL_ADDRESS', start: idx, end: idx + 7, score: 0.9 }]
 }
 
-describe('validate_pii (Presidio sidecar)', () => {
+describe('validate_pii (Presidio service)', () => {
   let analyzeBodies: Array<{ text: string; language: string; entities?: string[] }>
   let fetchMock: ReturnType<typeof vi.fn>
 
@@ -87,7 +87,7 @@ describe('validate_pii (Presidio sidecar)', () => {
       expect(await maskPIIBatch([''], [])).toEqual([''])
     })
 
-    it('throws on a sidecar failure so the caller can scrub', async () => {
+    it('throws on a service failure so the caller can scrub', async () => {
       fetchMock.mockResolvedValueOnce(new Response('boom', { status: 500 }))
       await expect(maskPIIBatch(['email a@b.com'], [])).rejects.toThrow(/Presidio analyze failed/)
     })
diff --git a/apps/sim/lib/guardrails/validate_pii.ts b/apps/sim/lib/guardrails/validate_pii.ts
@@ -7,14 +7,14 @@ import { chunkIndicesByBudget } from '@/lib/guardrails/pii-batching'
 const logger = createLogger('PIIValidator')
 
 /**
- * Concurrent chunk requests in flight. Each chunk is itself a batched sidecar call
- * (spaCy `nlp.pipe` over many strings), so a small concurrency keeps the single-model
- * sidecar from holding too many parallel docs in memory while still overlapping
- * HTTP/JSON with the next chunk's NER.
+ * Concurrent chunk requests in flight. Each chunk is itself a batched service call
+ * (spaCy `nlp.pipe` over many strings), so a small concurrency keeps a single-model
+ * Presidio instance from holding too many parallel docs in memory while still
+ * overlapping HTTP/JSON with the next chunk's NER.
  */
 const CHUNK_CONCURRENCY = 4
 
-/** Single Presidio sidecar serving both /analyze and /anonymize (VIN is native there). */
+/** Presidio service serving both /analyze and /anonymize (VIN is native there). */
 const PII_URL = env.PII_URL || 'http://localhost:5001'
 
 export interface PIIValidationInput {
@@ -58,7 +58,7 @@ async function analyze(
 ): Promise<AnalyzerSpan[]> {
   const entities = entityTypes.length > 0 ? entityTypes : undefined
 
-  // boundary-raw-fetch: internal call to the Presidio analyzer sidecar over localhost
+  // boundary-raw-fetch: internal call to the Presidio analyzer service via PII_URL
   const response = await fetch(`${PII_URL}/analyze`, {
     method: 'POST',
     headers: { 'content-type': 'application/json' },
@@ -83,7 +83,7 @@ async function analyzeBatch(
 ): Promise<AnalyzerSpan[][]> {
   const entities = entityTypes.length > 0 ? entityTypes : undefined
 
-  // boundary-raw-fetch: internal call to the Presidio analyzer sidecar over localhost
+  // boundary-raw-fetch: internal call to the Presidio analyzer service via PII_URL
   const response = await fetch(`${PII_URL}/analyze_batch`, {
     method: 'POST',
     headers: { 'content-type': 'application/json' },
@@ -110,7 +110,7 @@ interface AnonymizeBatchItem {
 async function anonymizeBatch(items: AnonymizeBatchItem[]): Promise<string[]> {
   if (items.length === 0) return []
 
-  // boundary-raw-fetch: internal call to the Presidio anonymizer sidecar over localhost
+  // boundary-raw-fetch: internal call to the Presidio anonymizer service via PII_URL
   const response = await fetch(`${PII_URL}/anonymize_batch`, {
     method: 'POST',
     headers: { 'content-type': 'application/json' },
@@ -125,13 +125,13 @@ async function anonymizeBatch(items: AnonymizeBatchItem[]): Promise<string[]> {
 }
 
 /**
- * Mask spans via the Presidio anonymizer sidecar. Omitting `anonymizers` uses the
+ * Mask spans via the Presidio anonymizer service. Omitting `anonymizers` uses the
  * default `replace` operator, which yields `<ENTITY_TYPE>`. Throws on failure.
  */
 async function anonymize(text: string, spans: AnalyzerSpan[]): Promise<string> {
   if (spans.length === 0) return text
 
-  // boundary-raw-fetch: internal call to the Presidio anonymizer sidecar over localhost
+  // boundary-raw-fetch: internal call to the Presidio anonymizer service via PII_URL
   const response = await fetch(`${PII_URL}/anonymize`, {
     method: 'POST',
     headers: { 'content-type': 'application/json' },
@@ -146,7 +146,7 @@ async function anonymize(text: string, spans: AnalyzerSpan[]): Promise<string> {
 }
 
 /**
- * Validate text for PII using the Presidio sidecar.
+ * Validate text for PII using the Presidio service.
  *
  * - block: fails validation if any PII is detected
  * - mask: passes and returns masked text with PII replaced by `<ENTITY_TYPE>`
@@ -209,14 +209,14 @@ export async function validatePII(input: PIIValidationInput): Promise<PIIValidat
 }
 
 /**
- * Mask PII across many strings via the Presidio sidecar, preserving input order.
+ * Mask PII across many strings via the Presidio service, preserving input order.
  *
  * Strings are grouped into byte/count-budgeted chunks (see {@link chunkIndicesByBudget}),
  * and each chunk runs one batched `analyze` pass followed by one batched `anonymize`
- * pass over only the strings that actually matched — so the sidecar round-trip count
+ * pass over only the strings that actually matched — so the service round-trip count
  * scales with payload size, not leaf count, and spaCy batches NER via `nlp.pipe`.
  * Chunks run with bounded concurrency. Strings with no detected PII pass through
- * unchanged. Rejects on any sidecar failure (which fails the whole batch) so callers
+ * unchanged. Rejects on any service failure (which fails the whole batch) so callers
  * can apply their own fail-safe (scrub).
  */
 export async function maskPIIBatch(
diff --git a/docker/app.Dockerfile b/docker/app.Dockerfile
@@ -125,8 +125,9 @@ COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/lib/execution/isolated-v
 # apps/sim/lib/execution/sandbox/bundles/build.ts to regenerate.
 COPY --from=builder --chown=nextjs:nodejs /app/apps/sim/lib/execution/sandbox/bundles ./apps/sim/lib/execution/sandbox/bundles
 
-# Guardrails PII runs in dedicated Presidio sidecar containers (analyzer +
-# anonymizer), reached over localhost — no Python/Presidio in this image.
+# Guardrails PII runs in a standalone Presidio service (combined analyzer +
+# anonymizer, docker/pii.Dockerfile), reached over the network via PII_URL —
+# no Python/Presidio in this image.
 
 # Create .next/cache directory with correct ownership
 RUN mkdir -p apps/sim/.next/cache && \
diff --git a/docker/pii.Dockerfile b/docker/pii.Dockerfile
@@ -38,8 +38,8 @@ RUN groupadd -g 1001 pii && \
     chown -R pii:pii /app
 USER pii
 
-# Listen on 5001. In the ECS task all containers share one network namespace
-# (awsvpc) and the app owns 3000, so this sidecar must not use 3000.
+# Listen on 5001. Runs as its own ECS service (separate task), reached via PII_URL;
+# 5001 avoids colliding with the app's 3000 in local/compose runs on one host.
 EXPOSE 5001
 
 # start-period is generous: five large spaCy models load at import before
diff --git a/helm/sim/README.md b/helm/sim/README.md
@@ -48,7 +48,7 @@ Optional components (off by default):
 
 * **`copilot`** — the Sim Copilot service plus its own Postgres StatefulSet.
 * **`ollama`** — local LLM inference, with optional NVIDIA GPU support.
-* **`pii`** — Presidio PII redaction sidecar (analyzer + anonymizer) for the Guardrails PII block and log redaction. See [PII redaction](#pii-redaction).
+* **`pii`** — Presidio PII redaction service (analyzer + anonymizer) for the Guardrails PII block and log redaction. See [PII redaction](#pii-redaction).
 * **`telemetry`** — OpenTelemetry Collector wired to Jaeger / Prometheus / OTLP backends.
 * **`ingress`** — NGINX-style Ingress for the app and realtime services.
 * **`networkPolicy`** — east-west and egress isolation (blocks cloud metadata endpoints by default).
@@ -357,14 +357,14 @@ Requires the Prometheus Operator CRDs. Scrapes `/metrics` on the app and realtim
 
 ## PII redaction
 
-Sim can redact personally identifiable information using a [Presidio](https://microsoft.github.io/presidio/) sidecar (analyzer + anonymizer combined into one image listening on port 5001). Enable it with:
+Sim can redact personally identifiable information using a [Presidio](https://microsoft.github.io/presidio/) service (analyzer + anonymizer combined into one image listening on port 5001). Enable it with:
 
 ```yaml
 pii:
   enabled: true
 ```
 
-When enabled, the chart deploys the sidecar (`<release>-pii` Deployment + Service) and **auto-wires** `PII_URL` on the app to the in-cluster service. The sidecar bundles five large spaCy models (en/es/it/pl/fi, ~2.2GB), so the first start takes ~3 minutes while models load — the `startupProbe` allows for this. Size the `pii.resources` for at least ~4Gi memory.
+When enabled, the chart deploys it as a standalone `<release>-pii` Deployment + Service and **auto-wires** `PII_URL` on the app to the in-cluster service. The service bundles five large spaCy models (en/es/it/pl/fi, ~2.2GB), so the first start takes ~3 minutes while models load — the `startupProbe` allows for this. Size the `pii.resources` for at least ~4Gi memory.
 
 This alone powers the **Guardrails PII block** and on-demand masking. To additionally turn on **automatic log redaction** (the org/workspace data-retention scrub), you must:
 
diff --git a/helm/sim/templates/_helpers.tpl b/helm/sim/templates/_helpers.tpl
@@ -447,7 +447,7 @@ Ollama URL
 {{- end }}
 
 {{/*
-PII (Presidio) sidecar URL
+PII (Presidio) service URL
 */}}
 {{- define "sim.piiUrl" -}}
 {{- if .Values.pii.enabled }}
diff --git a/helm/sim/templates/deployment-pii.yaml b/helm/sim/templates/deployment-pii.yaml
@@ -1,6 +1,6 @@
 {{- if .Values.pii.enabled }}
 ---
-# Deployment for the Presidio PII redaction sidecar (analyzer + anonymizer combined)
+# Deployment for the Presidio PII redaction service (analyzer + anonymizer combined)
 apiVersion: apps/v1
 kind: Deployment
 metadata:
diff --git a/helm/sim/templates/networkpolicy.yaml b/helm/sim/templates/networkpolicy.yaml
@@ -81,7 +81,7 @@ spec:
     - protocol: TCP
       port: {{ .Values.ollama.service.targetPort }}
   {{- end }}
-  # Allow egress to the PII (Presidio) sidecar
+  # Allow egress to the PII (Presidio) service
   {{- if .Values.pii.enabled }}
   - to:
     - podSelector:
@@ -316,7 +316,7 @@ spec:
 
 {{- if .Values.pii.enabled }}
 ---
-# Network Policy for the PII (Presidio) sidecar
+# Network Policy for the PII (Presidio) service
 apiVersion: networking.k8s.io/v1
 kind: NetworkPolicy
 metadata:
diff --git a/helm/sim/templates/services.yaml b/helm/sim/templates/services.yaml
@@ -103,7 +103,7 @@ spec:
 
 {{- if .Values.pii.enabled }}
 ---
-# Service for the Presidio PII redaction sidecar
+# Service fronting the Presidio PII redaction deployment
 apiVersion: v1
 kind: Service
 metadata:
diff --git a/helm/sim/tests/pii_test.yaml b/helm/sim/tests/pii_test.yaml
@@ -1,4 +1,4 @@
-suite: pii — optional Presidio sidecar + PII_URL wiring
+suite: pii — optional Presidio service + PII_URL wiring
 release:
   name: t
   namespace: sim
@@ -42,7 +42,7 @@ tests:
             name: PII_URL
             value: "http://t-sim-pii:5001"
 
-  - it: app pod gets the localhost PII_URL fallback when sidecar disabled
+  - it: app pod gets the localhost PII_URL fallback when service disabled
     template: deployment-app.yaml
     asserts:
       - contains:
@@ -72,7 +72,7 @@ tests:
             name: PII_URL
             value: "http://evil-pii:5001"
 
-  - it: app NetworkPolicy allows egress to the PII sidecar
+  - it: app NetworkPolicy allows egress to the PII service
     template: networkpolicy.yaml
     set:
       networkPolicy.enabled: true
@@ -94,7 +94,7 @@ tests:
               - protocol: TCP
                 port: 5001
 
-  - it: renders a dedicated NetworkPolicy for the PII sidecar
+  - it: renders a dedicated NetworkPolicy for the PII service
     template: networkpolicy.yaml
     set:
       networkPolicy.enabled: true
diff --git a/helm/sim/values.schema.json b/helm/sim/values.schema.json
@@ -709,12 +709,12 @@
       "properties": {
         "enabled": {
           "type": "boolean",
-          "description": "Enable the Presidio PII redaction sidecar"
+          "description": "Enable the Presidio PII redaction service"
         },
         "replicaCount": {
           "type": "integer",
           "minimum": 1,
-          "description": "Number of PII sidecar replicas"
+          "description": "Number of PII service replicas"
         },
         "image": {
           "type": "object",
diff --git a/helm/sim/values.yaml b/helm/sim/values.yaml