fix: US state targeting SDK configuration (#2498) #8422
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Test | |
| on: | |
| push: | |
| branches: [ master, renovate/** ] | |
| pull_request: | |
| jobs: | |
| build: | |
| name: Docs build | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout Source code | |
| uses: actions/checkout@v6 | |
| - name: Use Node.js 24 | |
| uses: actions/setup-node@v6 | |
| with: | |
| node-version: 24 | |
| - uses: apify/workflows/pnpm-install@main | |
| - run: pnpm build | |
| env: | |
| INTERCOM_APP_ID: ${{ secrets.INTERCOM_APP_ID }} | |
| SEGMENT_TOKEN: ${{ secrets.SEGMENT_TOKEN }} | |
| - name: Check llms.txt size | |
| run: npm run test:llms-size | |
| - name: Install Nginx | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y nginx | |
| - name: Start Docusaurus server | |
| run: | | |
| nohup pnpm exec docusaurus serve --port 3000 --no-open & | |
| sleep 5 | |
| curl -f http://localhost:3000 > /dev/null | |
| - name: Start Nginx with project config | |
| run: | | |
| PWD_PATH="$(pwd)" | |
| cp nginx.conf nginx-test.conf | |
| sed -i 's|https://apify.github.io/apify-docs|http://127.0.0.1:3000|g' nginx-test.conf | |
| cat > default.conf <<EOF | |
| worker_processes auto; | |
| error_log ${PWD_PATH}/logs/error.log; | |
| pid ${PWD_PATH}/logs/nginx.pid; | |
| events {} | |
| http { | |
| access_log ${PWD_PATH}/logs/access.log; | |
| include ${PWD_PATH}/nginx-test.conf; | |
| } | |
| EOF | |
| mkdir -p "${PWD_PATH}/logs" | |
| nginx -c "${PWD_PATH}/default.conf" | |
| sleep 1 | |
| - name: Run header assertions | |
| run: | | |
| set -euo pipefail | |
| function assert_header() { | |
| url=$1 | |
| header=$2 | |
| expected=$3 | |
| shift 3 | |
| extra_args=("$@") | |
| actual=$(curl -s -D - -o /dev/null "${extra_args[@]}" "$url" | grep -i "^$header" | tr -d '\r' || true) | |
| echo "→ $url → $actual" | |
| echo "$actual" | grep -q "$expected" || (echo "❌ Expected '$expected' in '$header' for $url" && exit 1) | |
| } | |
| function assert_status() { | |
| url=$1 | |
| expected=$2 | |
| shift 2 | |
| extra_args=("$@") | |
| actual=$(curl -s -o /dev/null -w "%{http_code}" "${extra_args[@]}" "$url") | |
| echo "→ $url → HTTP $actual" | |
| [ "$actual" = "$expected" ] || (echo "❌ Expected HTTP $expected but got $actual for $url" && exit 1) | |
| } | |
| function assert_no_redirect() { | |
| url=$1 | |
| shift | |
| extra_args=("$@") | |
| response=$(curl -s -D - -o /dev/null -w "\n%{http_code}" "${extra_args[@]}" "$url" 2>/dev/null) | |
| status=$(echo "$response" | tail -1) | |
| location=$(echo "$response" | grep -i "^location:" | tr -d '\r' || true) | |
| echo "→ $url → HTTP $status ${location:+(${location})}" | |
| if [ "$status" = "301" ] || [ "$status" = "302" ]; then | |
| echo "❌ Got redirect for $url: $location" && exit 1 | |
| fi | |
| } | |
| # Like assert_header but follows redirects and asserts on the | |
| # final response's Content-Type. Use for URLs that legitimately | |
| # redirect before serving the content (e.g. child repo homepages). | |
| # Only Content-Type is supported because curl's writeout variable | |
| # %{content_type} is the only header it exposes after -L. | |
| function assert_final_content_type() { | |
| url=$1 | |
| expected=$2 | |
| shift 2 | |
| extra_args=("$@") | |
| actual=$(curl -s -L -o /dev/null -w "%{content_type}" "${extra_args[@]}" "$url") | |
| echo "→ $url → final Content-Type: $actual" | |
| echo "$actual" | grep -q "$expected" || (echo "❌ Expected '$expected' in final Content-Type for $url, got '$actual'" && exit 1) | |
| } | |
| # Asserts that the HTML at $url contains a | |
| # <link rel="alternate" type="text/markdown" href="$expected_href"> | |
| # tag (attribute order/quoting may vary; we only require | |
| # type=text/markdown and the expected href on the same | |
| # <link> element). AI crawlers rely on this tag to find | |
| # the .md counterpart of an HTML page. | |
| function assert_html_md_alternate() { | |
| url=$1 | |
| expected_href=$2 | |
| matched=$(curl -s "$url" | grep -oE '<link [^>]*type="?text/markdown[^>]*>' | grep -F "$expected_href" || true) | |
| echo "→ $url → ${matched:-no match}" | |
| if [ -z "$matched" ]; then | |
| echo "❌ Expected <link rel=\"alternate\" type=\"text/markdown\" href=\"$expected_href\"> in HTML for $url" | |
| exit 1 | |
| fi | |
| } | |
| echo "🧪 Checking open redirect protection..." | |
| # Backslash URLs must not produce redirects (the redirect Location | |
| # would contain \, which browsers normalize to /, creating | |
| # protocol-relative URLs like //evil.com that redirect externally). | |
| assert_no_redirect "http://localhost:8080///%5Cevil.com/" | |
| assert_no_redirect "http://localhost:8080/%5Cevil.com/" | |
| assert_no_redirect "http://localhost:8080///%5cevil.com/" | |
| # Literal backslash (simulates CDN pre-decoding %5C before forwarding) | |
| assert_no_redirect "http://localhost:8080" --request-target '/\evil.com/' | |
| assert_no_redirect "http://localhost:8080" --request-target '///\evil.com/' | |
| # Normal trailing-slash redirect must still work | |
| assert_status "http://localhost:8080/platform/proxy/usage/" "302" | |
| echo "🧪 Checking Nginx responses... (apify-docs)" | |
| assert_header "http://localhost:8080/" "Content-Type" "text/html" | |
| assert_header "http://localhost:8080/" "Content-Type" "text/markdown" -H "Accept: text/markdown" | |
| assert_header "http://localhost:8080/platform/proxy/usage" "Content-Type" "text/html" | |
| assert_header "http://localhost:8080/platform/proxy/usage.md" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/platform/proxy/usage" "Content-Type" "text/markdown" -H "Accept: text/markdown" | |
| assert_header "http://localhost:8080/img/docs-og.png" "Content-Type" "image/png" | |
| assert_header "http://localhost:8080/img/javascript-40x40.svg" "Content-Type" "image/svg" | |
| assert_header "http://localhost:8080/llms.txt" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/llms-full.txt" "Content-Type" "text/markdown" | |
| echo "🧪 Checking .md counterparts of pages that are excluded from llms.txt..." | |
| # Pages excluded from the llms.txt index (see scripts/joinLlmsFiles.mjs) | |
| # must still serve their .md markdown counterparts. Regression test for | |
| # https://github.com/apify/apify-docs/pull/2470#discussion_r3161627392 | |
| assert_header "http://localhost:8080/sdk.md" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/open-source.md" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/api/v2/actor-builds-get.md" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/api/v2/dataset-get.md" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/academy/tutorials.md" "Content-Type" "text/markdown" | |
| echo "🧪 Checking HTML alternate links to .md counterparts..." | |
| # The HTML version of every page (including those excluded from the | |
| # llms.txt index) must advertise its .md counterpart via | |
| # <link rel="alternate" type="text/markdown" href="..."> so that | |
| # AI crawlers can discover the markdown version. | |
| assert_html_md_alternate "http://localhost:8080/sdk" "https://docs.apify.com/sdk.md" | |
| assert_html_md_alternate "http://localhost:8080/open-source" "https://docs.apify.com/open-source.md" | |
| assert_html_md_alternate "http://localhost:8080/api/v2/actor-builds-get" "https://docs.apify.com/api/v2/actor-builds-get.md" | |
| assert_html_md_alternate "http://localhost:8080/api/v2/dataset-get" "https://docs.apify.com/api/v2/dataset-get.md" | |
| assert_html_md_alternate "http://localhost:8080/academy/tutorials" "https://docs.apify.com/academy/tutorials.md" | |
| # Sanity check: a regular (non-excluded) page also has the alternate. | |
| assert_html_md_alternate "http://localhost:8080/platform/proxy/usage" "https://docs.apify.com/platform/proxy/usage.md" | |
| echo "🧪 Checking Nginx responses... (apify-sdk-js)" | |
| assert_final_content_type "http://localhost:8080/sdk/js" "text/html" | |
| assert_final_content_type "http://localhost:8080/sdk/js" "text/markdown" -H "Accept: text/markdown" | |
| assert_header "http://localhost:8080/sdk/js/docs/introduction/quick-start" "Content-Type" "text/html" | |
| assert_header "http://localhost:8080/sdk/js/docs/introduction/quick-start.md" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/sdk/js/docs/introduction/quick-start" "Content-Type" "text/markdown" -H "Accept: text/markdown" | |
| assert_header "http://localhost:8080/sdk/js/llms.txt" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/sdk/js/llms-full.txt" "Content-Type" "text/markdown" | |
| echo "🧪 Checking Nginx responses... (apify-sdk-python)" | |
| assert_final_content_type "http://localhost:8080/sdk/python" "text/html" | |
| assert_final_content_type "http://localhost:8080/sdk/python" "text/markdown" -H "Accept: text/markdown" | |
| assert_header "http://localhost:8080/sdk/python/docs/changelog" "Content-Type" "text/html" | |
| assert_header "http://localhost:8080/sdk/python/docs/changelog.md" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/sdk/python/docs/changelog" "Content-Type" "text/markdown" -H "Accept: text/markdown" | |
| assert_header "http://localhost:8080/sdk/python/llms.txt" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/sdk/python/llms-full.txt" "Content-Type" "text/markdown" | |
| echo "🧪 Checking Nginx responses... (apify-client-js)" | |
| assert_final_content_type "http://localhost:8080/api/client/js" "text/html" | |
| assert_final_content_type "http://localhost:8080/api/client/js" "text/markdown" -H "Accept: text/markdown" | |
| assert_header "http://localhost:8080/api/client/js/docs/changelog" "Content-Type" "text/html" | |
| assert_header "http://localhost:8080/api/client/js/docs/changelog.md" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/api/client/js/docs/changelog" "Content-Type" "text/markdown" -H "Accept: text/markdown" | |
| assert_header "http://localhost:8080/api/client/js/llms.txt" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/api/client/js/llms-full.txt" "Content-Type" "text/markdown" | |
| echo "🧪 Checking Nginx responses... (apify-client-python)" | |
| assert_final_content_type "http://localhost:8080/api/client/python" "text/html" | |
| assert_final_content_type "http://localhost:8080/api/client/python" "text/markdown" -H "Accept: text/markdown" | |
| assert_header "http://localhost:8080/api/client/python/docs/changelog" "Content-Type" "text/html" | |
| assert_header "http://localhost:8080/api/client/python/docs/changelog.md" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/api/client/python/docs/changelog" "Content-Type" "text/markdown" -H "Accept: text/markdown" | |
| assert_header "http://localhost:8080/api/client/python/llms.txt" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/api/client/python/llms-full.txt" "Content-Type" "text/markdown" | |
| echo "🧪 Checking Nginx responses... (apify-cli)" | |
| assert_final_content_type "http://localhost:8080/cli" "text/html" | |
| assert_final_content_type "http://localhost:8080/cli" "text/markdown" -H "Accept: text/markdown" | |
| assert_header "http://localhost:8080/cli/docs/changelog" "Content-Type" "text/html" | |
| assert_header "http://localhost:8080/cli/docs/changelog.md" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/cli/docs/changelog" "Content-Type" "text/markdown" -H "Accept: text/markdown" | |
| assert_header "http://localhost:8080/cli/llms.txt" "Content-Type" "text/markdown" | |
| assert_header "http://localhost:8080/cli/llms-full.txt" "Content-Type" "text/markdown" | |
| echo "✅ All Nginx header checks passed." | |
| - name: Stop Nginx | |
| if: always() | |
| run: nginx -c "$(pwd)/default.conf" -s stop | |
| lint_content: | |
| name: Lint markdown content | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout Source code | |
| uses: actions/checkout@v6 | |
| - name: Get changed files | |
| id: changed-files | |
| uses: tj-actions/changed-files@v47 | |
| with: | |
| files: '**/*.{md,mdx}' | |
| files_ignore: '!sources/api/*.{md,mdx}' | |
| separator: "," | |
| - name: Use Node.js 24 | |
| uses: actions/setup-node@v6 | |
| with: | |
| node-version: 24 | |
| - uses: apify/workflows/pnpm-install@main | |
| - name: List and Lint Changed Markdown Files | |
| env: | |
| ALL_CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }} | |
| run: | | |
| IFS=',' read -ra FILE_ARRAY <<< "$ALL_CHANGED_FILES" | |
| for file in "${FILE_ARRAY[@]}"; do | |
| pnpm exec markdownlint "$file" | |
| done | |
| lint_code: | |
| name: Lint app code | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout Source code | |
| uses: actions/checkout@v6 | |
| - name: Use Node.js 24 | |
| uses: actions/setup-node@v6 | |
| with: | |
| node-version: 24 | |
| - uses: apify/workflows/pnpm-install@main | |
| - run: pnpm lint:code | |
| - name: Format check | |
| run: pnpm format:check |