Skip to content

fix: US state targeting SDK configuration #8420

fix: US state targeting SDK configuration

fix: US state targeting SDK configuration #8420

Workflow file for this run

name: Test
on:
push:
branches: [ master, renovate/** ]
pull_request:
jobs:
build:
name: Docs build
runs-on: ubuntu-latest
steps:
- name: Checkout Source code
uses: actions/checkout@v6
- name: Use Node.js 24
uses: actions/setup-node@v6
with:
node-version: 24
- uses: apify/workflows/pnpm-install@main
- run: pnpm build
env:
INTERCOM_APP_ID: ${{ secrets.INTERCOM_APP_ID }}
SEGMENT_TOKEN: ${{ secrets.SEGMENT_TOKEN }}
- name: Check llms.txt size
run: npm run test:llms-size
- name: Install Nginx
run: |
sudo apt-get update
sudo apt-get install -y nginx
- name: Start Docusaurus server
run: |
nohup pnpm exec docusaurus serve --port 3000 --no-open &
sleep 5
curl -f http://localhost:3000 > /dev/null
- name: Start Nginx with project config
run: |
PWD_PATH="$(pwd)"
cp nginx.conf nginx-test.conf
sed -i 's|https://apify.github.io/apify-docs|http://127.0.0.1:3000|g' nginx-test.conf
cat > default.conf <<EOF
worker_processes auto;
error_log ${PWD_PATH}/logs/error.log;
pid ${PWD_PATH}/logs/nginx.pid;
events {}
http {
access_log ${PWD_PATH}/logs/access.log;
include ${PWD_PATH}/nginx-test.conf;
}
EOF
mkdir -p "${PWD_PATH}/logs"
nginx -c "${PWD_PATH}/default.conf"
sleep 1
- name: Run header assertions
run: |
set -euo pipefail
function assert_header() {
url=$1
header=$2
expected=$3
shift 3
extra_args=("$@")
actual=$(curl -s -D - -o /dev/null "${extra_args[@]}" "$url" | grep -i "^$header" | tr -d '\r' || true)
echo "→ $url → $actual"
echo "$actual" | grep -q "$expected" || (echo "❌ Expected '$expected' in '$header' for $url" && exit 1)
}
function assert_status() {
url=$1
expected=$2
shift 2
extra_args=("$@")
actual=$(curl -s -o /dev/null -w "%{http_code}" "${extra_args[@]}" "$url")
echo "→ $url → HTTP $actual"
[ "$actual" = "$expected" ] || (echo "❌ Expected HTTP $expected but got $actual for $url" && exit 1)
}
function assert_no_redirect() {
url=$1
shift
extra_args=("$@")
response=$(curl -s -D - -o /dev/null -w "\n%{http_code}" "${extra_args[@]}" "$url" 2>/dev/null)
status=$(echo "$response" | tail -1)
location=$(echo "$response" | grep -i "^location:" | tr -d '\r' || true)
echo "→ $url → HTTP $status ${location:+(${location})}"
if [ "$status" = "301" ] || [ "$status" = "302" ]; then
echo "❌ Got redirect for $url: $location" && exit 1
fi
}
# Like assert_header but follows redirects and asserts on the
# final response's Content-Type. Use for URLs that legitimately
# redirect before serving the content (e.g. child repo homepages).
# Only Content-Type is supported because curl's writeout variable
# %{content_type} is the only header it exposes after -L.
function assert_final_content_type() {
url=$1
expected=$2
shift 2
extra_args=("$@")
actual=$(curl -s -L -o /dev/null -w "%{content_type}" "${extra_args[@]}" "$url")
echo "→ $url → final Content-Type: $actual"
echo "$actual" | grep -q "$expected" || (echo "❌ Expected '$expected' in final Content-Type for $url, got '$actual'" && exit 1)
}
# Asserts that the HTML at $url contains a
# <link rel="alternate" type="text/markdown" href="$expected_href">
# tag (attribute order/quoting may vary; we only require
# type=text/markdown and the expected href on the same
# <link> element). AI crawlers rely on this tag to find
# the .md counterpart of an HTML page.
function assert_html_md_alternate() {
url=$1
expected_href=$2
matched=$(curl -s "$url" | grep -oE '<link [^>]*type="?text/markdown[^>]*>' | grep -F "$expected_href" || true)
echo "→ $url → ${matched:-no match}"
if [ -z "$matched" ]; then
echo "❌ Expected <link rel=\"alternate\" type=\"text/markdown\" href=\"$expected_href\"> in HTML for $url"
exit 1
fi
}
echo "🧪 Checking open redirect protection..."
# Backslash URLs must not produce redirects (the redirect Location
# would contain \, which browsers normalize to /, creating
# protocol-relative URLs like //evil.com that redirect externally).
assert_no_redirect "http://localhost:8080///%5Cevil.com/"
assert_no_redirect "http://localhost:8080/%5Cevil.com/"
assert_no_redirect "http://localhost:8080///%5cevil.com/"
# Literal backslash (simulates CDN pre-decoding %5C before forwarding)
assert_no_redirect "http://localhost:8080" --request-target '/\evil.com/'
assert_no_redirect "http://localhost:8080" --request-target '///\evil.com/'
# Normal trailing-slash redirect must still work
assert_status "http://localhost:8080/platform/proxy/usage/" "302"
echo "🧪 Checking Nginx responses... (apify-docs)"
assert_header "http://localhost:8080/" "Content-Type" "text/html"
assert_header "http://localhost:8080/" "Content-Type" "text/markdown" -H "Accept: text/markdown"
assert_header "http://localhost:8080/platform/proxy/usage" "Content-Type" "text/html"
assert_header "http://localhost:8080/platform/proxy/usage.md" "Content-Type" "text/markdown"
assert_header "http://localhost:8080/platform/proxy/usage" "Content-Type" "text/markdown" -H "Accept: text/markdown"
assert_header "http://localhost:8080/img/docs-og.png" "Content-Type" "image/png"
assert_header "http://localhost:8080/img/javascript-40x40.svg" "Content-Type" "image/svg"
assert_header "http://localhost:8080/llms.txt" "Content-Type" "text/markdown"
assert_header "http://localhost:8080/llms-full.txt" "Content-Type" "text/markdown"
echo "🧪 Checking .md counterparts of pages that are excluded from llms.txt..."
# Pages excluded from the llms.txt index (see scripts/joinLlmsFiles.mjs)
# must still serve their .md markdown counterparts. Regression test for
# https://github.com/apify/apify-docs/pull/2470#discussion_r3161627392
assert_header "http://localhost:8080/sdk.md" "Content-Type" "text/markdown"
assert_header "http://localhost:8080/open-source.md" "Content-Type" "text/markdown"
assert_header "http://localhost:8080/api/v2/actor-builds-get.md" "Content-Type" "text/markdown"
assert_header "http://localhost:8080/api/v2/dataset-get.md" "Content-Type" "text/markdown"
assert_header "http://localhost:8080/academy/tutorials.md" "Content-Type" "text/markdown"
echo "🧪 Checking HTML alternate links to .md counterparts..."
# The HTML version of every page (including those excluded from the
# llms.txt index) must advertise its .md counterpart via
# <link rel="alternate" type="text/markdown" href="..."> so that
# AI crawlers can discover the markdown version.
assert_html_md_alternate "http://localhost:8080/sdk" "https://docs.apify.com/sdk.md"
assert_html_md_alternate "http://localhost:8080/open-source" "https://docs.apify.com/open-source.md"
assert_html_md_alternate "http://localhost:8080/api/v2/actor-builds-get" "https://docs.apify.com/api/v2/actor-builds-get.md"
assert_html_md_alternate "http://localhost:8080/api/v2/dataset-get" "https://docs.apify.com/api/v2/dataset-get.md"
assert_html_md_alternate "http://localhost:8080/academy/tutorials" "https://docs.apify.com/academy/tutorials.md"
# Sanity check: a regular (non-excluded) page also has the alternate.
assert_html_md_alternate "http://localhost:8080/platform/proxy/usage" "https://docs.apify.com/platform/proxy/usage.md"
echo "🧪 Checking Nginx responses... (apify-sdk-js)"
assert_final_content_type "http://localhost:8080/sdk/js" "text/html"
assert_final_content_type "http://localhost:8080/sdk/js" "text/markdown" -H "Accept: text/markdown"
assert_header "http://localhost:8080/sdk/js/docs/introduction/quick-start" "Content-Type" "text/html"
assert_header "http://localhost:8080/sdk/js/docs/introduction/quick-start.md" "Content-Type" "text/markdown"
assert_header "http://localhost:8080/sdk/js/docs/introduction/quick-start" "Content-Type" "text/markdown" -H "Accept: text/markdown"
assert_header "http://localhost:8080/sdk/js/llms.txt" "Content-Type" "text/markdown"
assert_header "http://localhost:8080/sdk/js/llms-full.txt" "Content-Type" "text/markdown"
echo "🧪 Checking Nginx responses... (apify-sdk-python)"
assert_final_content_type "http://localhost:8080/sdk/python" "text/html"
assert_final_content_type "http://localhost:8080/sdk/python" "text/markdown" -H "Accept: text/markdown"
assert_header "http://localhost:8080/sdk/python/docs/changelog" "Content-Type" "text/html"
assert_header "http://localhost:8080/sdk/python/docs/changelog.md" "Content-Type" "text/markdown"
assert_header "http://localhost:8080/sdk/python/docs/changelog" "Content-Type" "text/markdown" -H "Accept: text/markdown"
assert_header "http://localhost:8080/sdk/python/llms.txt" "Content-Type" "text/markdown"
assert_header "http://localhost:8080/sdk/python/llms-full.txt" "Content-Type" "text/markdown"
echo "🧪 Checking Nginx responses... (apify-client-js)"
assert_final_content_type "http://localhost:8080/api/client/js" "text/html"
assert_final_content_type "http://localhost:8080/api/client/js" "text/markdown" -H "Accept: text/markdown"
assert_header "http://localhost:8080/api/client/js/docs/changelog" "Content-Type" "text/html"
assert_header "http://localhost:8080/api/client/js/docs/changelog.md" "Content-Type" "text/markdown"
assert_header "http://localhost:8080/api/client/js/docs/changelog" "Content-Type" "text/markdown" -H "Accept: text/markdown"
assert_header "http://localhost:8080/api/client/js/llms.txt" "Content-Type" "text/markdown"
assert_header "http://localhost:8080/api/client/js/llms-full.txt" "Content-Type" "text/markdown"
echo "🧪 Checking Nginx responses... (apify-client-python)"
assert_final_content_type "http://localhost:8080/api/client/python" "text/html"
assert_final_content_type "http://localhost:8080/api/client/python" "text/markdown" -H "Accept: text/markdown"
assert_header "http://localhost:8080/api/client/python/docs/changelog" "Content-Type" "text/html"
assert_header "http://localhost:8080/api/client/python/docs/changelog.md" "Content-Type" "text/markdown"
assert_header "http://localhost:8080/api/client/python/docs/changelog" "Content-Type" "text/markdown" -H "Accept: text/markdown"
assert_header "http://localhost:8080/api/client/python/llms.txt" "Content-Type" "text/markdown"
assert_header "http://localhost:8080/api/client/python/llms-full.txt" "Content-Type" "text/markdown"
echo "🧪 Checking Nginx responses... (apify-cli)"
assert_final_content_type "http://localhost:8080/cli" "text/html"
assert_final_content_type "http://localhost:8080/cli" "text/markdown" -H "Accept: text/markdown"
assert_header "http://localhost:8080/cli/docs/changelog" "Content-Type" "text/html"
assert_header "http://localhost:8080/cli/docs/changelog.md" "Content-Type" "text/markdown"
assert_header "http://localhost:8080/cli/docs/changelog" "Content-Type" "text/markdown" -H "Accept: text/markdown"
assert_header "http://localhost:8080/cli/llms.txt" "Content-Type" "text/markdown"
assert_header "http://localhost:8080/cli/llms-full.txt" "Content-Type" "text/markdown"
echo "✅ All Nginx header checks passed."
- name: Stop Nginx
if: always()
run: nginx -c "$(pwd)/default.conf" -s stop
lint_content:
name: Lint markdown content
runs-on: ubuntu-latest
steps:
- name: Checkout Source code
uses: actions/checkout@v6
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@v47
with:
files: '**/*.{md,mdx}'
files_ignore: '!sources/api/*.{md,mdx}'
separator: ","
- name: Use Node.js 24
uses: actions/setup-node@v6
with:
node-version: 24
- uses: apify/workflows/pnpm-install@main
- name: List and Lint Changed Markdown Files
env:
ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }}
run: |
IFS=',' read -ra FILE_ARRAY <<< "$ALL_CHANGED_FILES"
for file in "${FILE_ARRAY[@]}"; do
pnpm exec markdownlint "$file"
done
lint_code:
name: Lint app code
runs-on: ubuntu-latest
steps:
- name: Checkout Source code
uses: actions/checkout@v6
- name: Use Node.js 24
uses: actions/setup-node@v6
with:
node-version: 24
- uses: apify/workflows/pnpm-install@main
- run: pnpm lint:code
- name: Format check
run: pnpm format:check