Skip to content

Commit fe7b1f1

Browse files
authored
chore: tell robots not to index non-latest manual (#647)
Use meta tag to tell robots to not index any version of the manual other than https://lean-lang.org/doc/reference/latest/ . This includes not indexing the version of the manual that is the current referent of `latest`, e.g., at time of writing, https://lean-lang.org/doc/reference/4.25.0-rc2/ would instruct crawlers not to index.
1 parent 4f6e6aa commit fe7b1f1

File tree

2 files changed

+61
-3
lines changed

2 files changed

+61
-3
lines changed

README.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,3 +183,30 @@ Therefore we can be careful on both sides:
183183
- overlays should, ideally, as time goes on, only monotonically
184184
produce more data, e.g. it should only add fields to injected javascript values and avoid changing the contract of existing fields.
185185
- documents should, ideally, fail gracefully if injected data they expect to exist is missing
186+
187+
### Local Testing
188+
189+
To test `overlay.py` locally before pushing, do the following.
190+
- Ensure branches `deploy` and `postdeploy` exist locally.
191+
- You'll probably want to do
192+
```
193+
git fetch
194+
git checkout deploy
195+
git reset --hard remotes/upstream/deploy
196+
git checkout postdeploy
197+
git reset --hard remotes/upstream/postdeploy
198+
```
199+
- From the `reference-manual` checkout directory, on branch `main`, from a clean
200+
working directory (i.e. make sure to commit any changes you've made) run
201+
```shell
202+
python3 -B deploy/overlay.py . deploy postdeploy
203+
```
204+
- Inspect whatever `postdeploy` results you're interested in, e.g.
205+
```
206+
git show postdeploy:4.25.0-rc2/Type-Classes/Basic-Classes/index.html
207+
# Expect to see <meta name="robots" content="noindex">
208+
```
209+
```
210+
git show postdeploy:latest/Type-Classes/Basic-Classes/index.html
211+
# Expect to *not* see <meta name="robots" content="noindex">
212+
```

deploy/overlay.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,32 @@
44
from release_utils import run_git_command, is_git_ancestor, find_latest_version, find_latest_stable_version
55
from pathlib import Path
66

7+
def add_noindex_meta(directory, extensions=(".html", ".htm")):
8+
"""
9+
Recursively walk through `directory`, find all HTML files,
10+
and insert <meta name="robots" content="noindex"> right after <head>.
11+
"""
12+
for root, _, files in os.walk(directory):
13+
for filename in files:
14+
if filename.lower().endswith(extensions):
15+
filepath = os.path.join(root, filename)
16+
with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
17+
content = f.read()
18+
19+
# Only edit if <head> exists and we haven't already added the meta tag
20+
if "<head>" in content and 'name="robots"' not in content:
21+
new_content = content.replace(
22+
"<head>",
23+
'<head>\n<meta name="robots" content="noindex">'
24+
)
25+
26+
# Write back the modified file
27+
with open(filepath, "w", encoding="utf-8") as f:
28+
f.write(new_content)
29+
print(f"Updated: {filepath}")
30+
else:
31+
print(f"Skipped: {filepath}")
32+
733
# This function is the right thing to change to change the
834
# content of the overlays that are applied.
935
#
@@ -15,11 +41,12 @@ def apply_overlays(deploy_dir):
1541
"""
1642
latest_version = find_latest_version(deploy_dir)
1743
latest_stable_version = find_latest_stable_version(deploy_dir)
18-
print (f"LATEST VERSION {latest_version}")
19-
print (f"LATEST STABLE VERSION {latest_stable_version}")
44+
print (f"overlay.py: the latest lean version is {latest_version}")
45+
print (f"overlay.py: the latest stable lean version is {latest_stable_version}")
2046
header = "// This file was automatically generated by reference-manual/deploy/overlay.py"
2147
for inner in Path(deploy_dir).iterdir():
2248
if inner.is_dir() and (inner / "static").is_dir():
49+
# Generate some version metadata
2350
filename = inner / "static" / "metadata.js"
2451
content = f"{header}\nwindow.metadata = {{versionString: \"{inner}\"}};\n"
2552
if str(inner) == latest_version or str(inner) == "latest":
@@ -29,6 +56,10 @@ def apply_overlays(deploy_dir):
2956
with open(filename, 'a') as file:
3057
file.write(content)
3158

59+
# Tell robots not to index anything other than latest
60+
if str(inner) != "latest":
61+
add_noindex_meta(inner)
62+
3263
def deploy_overlays(deploy_dir, src_branch, tgt_branch):
3364
"""
3465
Apply desired overlays inside deploy_dir
@@ -64,7 +95,7 @@ def deploy_overlays(deploy_dir, src_branch, tgt_branch):
6495
# (see https://stackoverflow.com/questions/4911794/git-command-for-making-one-branch-like-another/4912267#4912267 for context)
6596
run_git_command(["git", "merge", "-m", "merge overlays", "--no-ff", "--no-edit", "-s", "ours", tgt_branch])
6697
run_git_command(["git", "switch", tgt_branch])
67-
run_git_command(["git", "reset", "--hard", src_branch])
98+
run_git_command(["git", "reset", "--hard", src_branch, "--"])
6899
run_git_command(["git", "switch", src_branch])
69100
# Rewind the src_branch back past the merge commit and the
70101
# overlay commit. This cleanup probably isn't strictly necessary

0 commit comments

Comments
 (0)