Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 159 additions & 0 deletions .github/scripts/check-frontmatter.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
const fs = require('fs')
const path = require('path')
const matter = require('gray-matter')

module.exports = async ({ github, context, core }) => {
const { owner, repo } = context.repo
const pull_number = context.payload.pull_request.number

// Get the list of changed files in the PR
const { data: files } = await github.rest.pulls.listFiles({
owner,
repo,
pull_number,
})

// Filter for markdown files that were added or modified (not renamed/moved)
const changedMdFiles = files.filter((file) => {
const isMd = file.filename.endsWith('.md')
const isAddedOrModified =
file.status === 'added' || file.status === 'modified'
const isNotRenamed = file.status !== 'renamed'
return isMd && isAddedOrModified && isNotRenamed
})

if (changedMdFiles.length === 0) {
core.info('No markdown files to check')
return
}

const today = new Date()
const oneMonthAgo = new Date()
oneMonthAgo.setMonth(oneMonthAgo.getMonth() - 1)

const fileResults = []

for (const file of changedMdFiles) {
const filePath = file.filename

try {
// Read the file content
const content = fs.readFileSync(filePath, 'utf8')

// Parse frontmatter
const { data: frontmatter } = matter(content)

const checks = {
path: filePath,
dateOk: false,
descriptionOk: false,
dateIssue: null,
descriptionIssue: null,
}

// Check date field
if (!frontmatter.date) {
checks.dateIssue = 'Missing'
} else {
const dateString = frontmatter.date
const dateMatch = dateString.match(/^\d{4}-\d{2}-\d{2}$/)

if (!dateMatch) {
checks.dateIssue = 'Invalid format'
} else {
const fileDate = new Date(dateString)
if (fileDate < oneMonthAgo || fileDate > today) {
checks.dateIssue = 'Out of date'
} else {
checks.dateOk = true
}
}
}

// Check description field
if (!frontmatter.description) {
checks.descriptionIssue = 'Missing'
} else if (
typeof frontmatter.description === 'string' &&
frontmatter.description.trim() === ''
) {
checks.descriptionIssue = 'Empty'
} else {
checks.descriptionOk = true
}

fileResults.push(checks)
} catch (error) {
core.warning(`Error processing ${filePath}: ${error.message}`)
}
}

// Build comment
const filesWithIssues = fileResults.filter(
(r) => !r.dateOk || !r.descriptionOk
)

if (filesWithIssues.length > 0) {
const fileWord = filesWithIssues.length === 1 ? 'file' : 'files'
const theseWord = filesWithIssues.length === 1 ? 'this' : 'these'
const countText = filesWithIssues.length === 1 ? '' : `${filesWithIssues.length} `

let commentBody = `## Metadata problems\n\nPage descriptions and accurate modified dates are important for SEO. Found issues in the following ${countText}${fileWord}:\n\n`

if (filesWithIssues.length > 5) {
for (const result of filesWithIssues) {
commentBody += `${result.path}\n`
}
commentBody += `\nPlease update the frontmatter of ${theseWord} ${fileWord}.\n\n`
} else {
for (const result of filesWithIssues) {
const dateStatus = result.dateOk ? '✅' : '❌'
const descStatus = result.descriptionOk ? '✅' : '❌'

commentBody += `**${result.path}**\n`
commentBody += `date ${dateStatus} description ${descStatus} _`

// Build helpful message
const fixes = []
if (
result.dateIssue === 'Out of date' ||
result.dateIssue === 'Missing'
) {
fixes.push('specify a date within the last month')
} else if (result.dateIssue === 'Invalid format') {
fixes.push('use YYYY-MM-DD format for date')
}

if (
result.descriptionIssue === 'Missing' ||
result.descriptionIssue === 'Empty'
) {
fixes.push('add a page description')
}

if (fixes.length > 0) {
commentBody += fixes[0].charAt(0).toUpperCase() + fixes[0].slice(1)
if (fixes.length > 1) {
commentBody += '; ' + fixes.slice(1).join('; ')
}
commentBody += '._\n'
}
commentBody += '\n'
}
commentBody += `\nPlease update the frontmatter of ${theseWord} ${fileWord}.\n\n`
}

await github.rest.issues.createComment({
owner,
repo,
issue_number: pull_number,
body: commentBody,
})

core.warning(
`Found frontmatter issues in ${filesWithIssues.length} file(s)`
)
} else {
core.info('All documentation frontmatter is up to date!')
}
}
8 changes: 8 additions & 0 deletions .github/scripts/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"name": "github-scripts",
"version": "1.0.0",
"description": "GitHub Actions scripts",
"dependencies": {
"gray-matter": "^4.0.3"
}
}
35 changes: 35 additions & 0 deletions .github/workflows/check-frontmatter.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Metadata checks

on:
pull_request:
paths:
- '**.md'

jobs:
check-frontmatter:
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: write

steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '20'

- name: Install dependencies
run: npm install gray-matter
working-directory: .github/scripts

- name: Check page metadata
uses: actions/github-script@v7
with:
script: |
const script = require('./.github/scripts/check-frontmatter.js');
await script({github, context, core});
4 changes: 2 additions & 2 deletions docs/events/cookie-extension/index.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
---
title: "Cookie Extension service"
description: "How to create and use a Cookie Extension service, previously known as ID service, to mitigate against ITP"
date: "2020-02-26"
date: "2025-10-04"
sidebar_position: 3.5
---

A Cookie Extension service works with the Snowplow [browser tracker](/docs/sources/trackers/web-trackers/index.md) to help you maintain consistent user identification despite browser privacy restrictions like Safari's Intelligent Tracking Prevention.
A Cookie Extension service works with the Snowplow [JavaScript tracker](/docs/sources/trackers/web-trackers/index.md) to help you maintain consistent user identification despite browser privacy restrictions like Safari's Intelligent Tracking Prevention.

Deployed within your website's IP space, the service creates and manages persistent browser identifiers.

Expand Down
14 changes: 8 additions & 6 deletions docs/events/going-deeper/http-headers/index.md
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
---
title: "HTTP request headers"
date: "2025-10-05"
description: "Guide to the customizable HTTP headers that can be added to Snowplow event requests."
---

Snowplow Collectors will collect any standard HTTP headers and the values of these headers can be extracted during Enrichment. The [HTTP header extractor enrichment](/docs/pipeline/enrichments/available-enrichments/http-header-extractor-enrichment/index.md) can be configured for the headers you wish to extract.

Additionally, the following two headers can be sent on requests:

| Header | Allowed Values | Description |
|-------------------|--------------------|------------|
| Content-Type | `application/json` | See [MDN Content-Type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type) |
| SP-Anonymous | `*` | Enables Server Side Anonymization, preventing the User IP Address and Network User ID from being collected |
| Header | Allowed Values | Description |
| ------------ | ------------------ | ---------------------------------------------------------------------------------------------------------- |
| Content-Type | `application/json` | See [MDN Content-Type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type) |
| SP-Anonymous | `*` | Enables Server Side Anonymization, preventing the User IP Address and Network User ID from being collected |

#### Cookie Header
## Cookie Header

Snowplow Collectors will collect any cookie information sent in the `Cookie` HTTP header. Cookies can be attached to events using the [Cookie extractor enrichment](/docs/pipeline/enrichments/available-enrichments/cookie-extractor-enrichment/index.md)
Snowplow Collectors will collect any cookie information sent in the `Cookie` HTTP header. Cookies can be attached to events using the [Cookie extractor enrichment](/docs/pipeline/enrichments/available-enrichments/cookie-extractor-enrichment/index.md)
22 changes: 11 additions & 11 deletions docs/events/timestamps/index.md
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
---
title: "Timestamps"
description: ""
date: "2025-05-15"
description: "Explains the different timestamps used in Snowplow events, and how the derived timestamp is calculated."
date: "2025-10-05"
sidebar_position: 4
---

Snowplow events have multiple timestamps that are added as the payload moves through the pipeline. The set of timestamps is designed to account for devices with incorrectly set clocks, or delays in event sending due to network outages. All timestamps are converted to UTC for consistency across events.

The timestamps are:

| Timestamp | Added by | Description | In all events |
| --------------------- | ----------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------- |
| `dvce_created_tstamp` | [Tracker](/docs/sources/trackers/index.md) | The device's timestamp when the event was created - not necessarily the correct time | ✅ |
| `dvce_sent_tstamp` | [Tracker](/docs/sources/trackers/index.md) | The device's timestamp when the event was successfully sent to the Collector endpoint - not necessarily the correct time | ✅ |
| `true_tstamp` | You | Exact timestamp, defined within your tracking code | ❌ |
| `collector_tstamp` | [Collector](/docs/pipeline/collector/index.md) | When the Collector received the event payload | ✅ |
| `derived_tstamp` | [Enrich](/docs/api-reference/enrichment-components/index.md) | Calculated from other timestamps, or the same as `true_tstamp` | ✅ |
| `etl_tstamp` | [Enrich](/docs/api-reference/enrichment-components/index.md) | The time at which Enrich started processing the event | ✅ |
| `refr_device_tstamp` | [Enrich](/docs/api-reference/enrichment-components/index.md) | Timestamp extracted from the [cross-domain navigation query string](/docs/pipeline/enrichments/available-enrichments/cross-navigation-enrichment/index.md), if present | ❌ |
| Timestamp | Added by | Description | In all events |
| --------------------- | ------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------- |
| `dvce_created_tstamp` | [Tracker](/docs/sources/trackers/index.md) | The device's timestamp when the event was created - not necessarily the correct time | ✅ |
| `dvce_sent_tstamp` | [Tracker](/docs/sources/trackers/index.md) | The device's timestamp when the event was successfully sent to the Collector endpoint - not necessarily the correct time | ✅ |
| `true_tstamp` | You | Exact timestamp, defined within your tracking code | ❌ |
| `collector_tstamp` | [Collector](/docs/pipeline/collector/index.md) | When the Collector received the event payload | ✅ |
| `derived_tstamp` | [Enrich](/docs/api-reference/enrichment-components/index.md) | Calculated from other timestamps, or the same as `true_tstamp` | ✅ |
| `etl_tstamp` | [Enrich](/docs/api-reference/enrichment-components/index.md) | The time at which Enrich started processing the event | ✅ |
| `refr_device_tstamp` | [Enrich](/docs/api-reference/enrichment-components/index.md) | Timestamp extracted from the [cross-domain navigation query string](/docs/pipeline/enrichments/available-enrichments/cross-navigation-enrichment/index.md), if present | ❌ |
| `load_tstamp` | [Loader](/docs/destinations/warehouses-lakes/index.md) or warehouse | Timestamp when the event was loaded into the warehouse | ✅ |

The `load_tstamp` is added either by the Loader or by the warehouse at the point of loading, depending on the Loader/warehouse. Use this timestamp for incremental processing.
Expand Down