Skip to content

refactor: consolidate TOC file handling in cloud document scripts #21512

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Aug 5, 2025
Merged
2 changes: 1 addition & 1 deletion scripts/concatMdByToc.js
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ const main = () => {
let mergedStr = "";

fileList.forEach((filePath) => {
mergedStr += `${handleSingleMd(`.${filePath}`)}\n\n`;
mergedStr += `${handleSingleMd(`./${filePath}`)}\n\n`;
});

const variables = JSON.parse(
Expand Down
6 changes: 3 additions & 3 deletions scripts/filterCloudDoc.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import {
getAllMdList,
getAllCloudMdList,
copySingleFileSync,
copyFileWithCustomContentSync,
copyDirectoryWithCustomContentSync,
Expand All @@ -25,9 +25,9 @@ const extractFilefromList = (
};

const main = () => {
const filteredLinkList = getAllMdList("TOC-tidb-cloud.md");
const allFilePaths = getAllCloudMdList();

extractFilefromList(filteredLinkList, ".", "./tmp");
extractFilefromList(allFilePaths, "./", "./tmp");
copySingleFileSync("TOC-tidb-cloud.md", "./tmp/TOC.md");
copyDirectoryWithCustomContentSync(
"./tidb-cloud/",
Expand Down
69 changes: 69 additions & 0 deletions scripts/filterCloudInitFiles.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import * as fs from "fs";
import path from "path";
import { getAllCloudMdList } from "./utils.js";

const allFilePaths = getAllCloudMdList();

// Set to store filtered file paths
const filePaths = new Set();

// Filter the file paths
for (const filePath of allFilePaths) {
// Skip external links (starting with http/https)
if (filePath.startsWith("http")) {
continue;
}

// Skip anchor links (starting with #)
if (filePath.startsWith("#")) {
continue;
}

// Skip files in tidb-cloud folder
if (cleanPath.startsWith("tidb-cloud/")) {
continue;
}

filePaths.add(cleanPath);
}

// Create tmp directory if it doesn't exist
const tmpDir = "tmp";
if (!fs.existsSync(tmpDir)) {
fs.mkdirSync(tmpDir, { recursive: true });
}

// Copy files to tmp directory
let copiedCount = 0;
let skippedCount = 0;

for (const filePath of filePaths) {
const sourcePath = filePath;
const targetPath = path.join(tmpDir, filePath);

// Create target directory if it doesn't exist
const targetDir = path.dirname(targetPath);
if (!fs.existsSync(targetDir)) {
fs.mkdirSync(targetDir, { recursive: true });
}

// Check if source file exists
if (fs.existsSync(sourcePath)) {
try {
fs.copyFileSync(sourcePath, targetPath);
console.log(`✓ Copied: ${filePath}`);
copiedCount++;
} catch (error) {
console.error(`✗ Error copying ${filePath}: ${error.message}`);
}
} else {
console.log(`⚠ Skipped (not found): ${filePath}`);
skippedCount++;
}
}

console.log(`\nSummary:`);
console.log(`- Total files referenced: ${filePaths.size}`);
console.log(`- Files copied: ${copiedCount}`);
console.log(`- Files skipped: ${skippedCount}`);
console.log(`- Files copied to: ${tmpDir}/`);
3 changes: 2 additions & 1 deletion scripts/filterNonCloudDoc.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import {
copySingleFileSync,
copyFileWithCustomContentSync,
removeCustomContent,
CLOUD_TOC_LIST,
} from "./utils.js";

const contentHandler = (content = "") => {
Expand All @@ -17,7 +18,7 @@ const extractFilefromList = (
fileList.forEach((filePath = "") => {
if (
filePath.includes(`/tidb-cloud/`) ||
filePath.includes(`TOC-tidb-cloud.md`)
CLOUD_TOC_LIST.some((tocFile) => filePath.includes(tocFile))
) {
return;
}
Expand Down
66 changes: 14 additions & 52 deletions scripts/filterUpdateFiles.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import * as fs from "fs";
import path from "path";
import axios from "axios";
import { Octokit } from "octokit";
import { CLOUD_TOC_LIST, getAllCloudMdList } from "./utils.js";

const GH_TOKEN = process.env.GH_TOKEN || "";

Expand Down Expand Up @@ -92,68 +93,29 @@ const deleteFile = (targetFile) => {
}
};

// read toc file and parse the file paths
const parseTOCFile = (tocPath) => {
try {
if (!fs.existsSync(tocPath)) {
console.log(`TOC file not found: ${tocPath}`);
return new Set();
}

const content = fs.readFileSync(tocPath, "utf8");
const filePaths = new Set();

// use regex to match the file paths in markdown links
// match [text](path) format
const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
let match;

while ((match = linkRegex.exec(content)) !== null) {
const link = match[2];
// only process links ending with .md
if (link.endsWith(".md")) {
// remove ./ or / at the beginning to ensure path consistency
const normalizedPath = link.replace(/^\.?\//, "");
filePaths.add(normalizedPath);
}
}

console.log(`Found ${filePaths.size} files in TOC: ${tocPath}`);
if (filePaths.size > 0) {
console.log(
"Files in TOC:",
Array.from(filePaths).slice(0, 5).join(", "),
filePaths.size > 5 ? `... and ${filePaths.size - 5} more` : ""
);
}
return filePaths;
} catch (error) {
console.error(`Error parsing TOC file ${tocPath}:`, error);
return new Set();
}
};

// get the file list from the toc file
const getCloudTOCFiles = () => {
// check ./tmp/TOC-tidb-cloud.md first
const tmpTocPath = "./tmp/TOC-tidb-cloud.md";
const localTocPath = "TOC-tidb-cloud.md";
const tmpTocFiles = getAllCloudMdList([
"./tmp/TOC-tidb-cloud.md",
"./tmp/TOC-tidb-cloud-starter.md",
"./tmp/TOC-tidb-cloud-essential.md",
]);
const tocFiles = getAllCloudMdList(CLOUD_TOC_LIST);

let tocFiles = parseTOCFile(tmpTocPath);
// Convert to Set
const tmpTocFilesSet = new Set(tmpTocFiles);
const tocFilesSet = new Set(tocFiles);

// if not found in /tmp, check the current directory
if (tocFiles.size === 0) {
console.log(`No files found in ${tmpTocPath}, trying ${localTocPath}`);
tocFiles = parseTOCFile(localTocPath);
}
// Use tmpTocFiles if not empty, otherwise use tocFiles
const finalTocFiles = tmpTocFilesSet.size > 0 ? tmpTocFilesSet : tocFilesSet;

if (tocFiles.size === 0) {
if (finalTocFiles.size === 0) {
console.log(
"Warning: No TOC file found or no files in TOC. All .md files will be processed."
);
}

return tocFiles;
return finalTocFiles;
};

// filter the files in tmp directory by the toc file
Expand Down
23 changes: 22 additions & 1 deletion scripts/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,34 @@ const filterLink = (srcList = []) => {
};

export const getAllMdList = (tocFile) => {
if (!fs.existsSync(tocFile)) {
console.log(`TOC file not found: ${tocFile}`);
return [];
}

const tocFileContent = fs.readFileSync(tocFile);
const mdAst = generateMdAstFromFile(tocFileContent);
const linkList = extractLinkNodeFromAst(mdAst);
const filteredLinkList = filterLink(linkList);
const filteredLinkList = filterLink(linkList).map((link) =>
link.replace(/^\.?\//, "")
);
return filteredLinkList;
};

export const CLOUD_TOC_LIST = [
"TOC-tidb-cloud.md",
"TOC-tidb-cloud-essential.md",
"TOC-tidb-cloud-starter.md",
];

export const getAllCloudMdList = (tocFiles = CLOUD_TOC_LIST) => {
// Get all MD files from multiple TOCs and deduplicate
const allFilteredLinkLists = tocFiles.map((tocFile) => getAllMdList(tocFile));
const flattenedList = allFilteredLinkLists.flat();
const allFilePaths = [...new Set(flattenedList)]; // Deduplicate
return allFilePaths;
};

const checkDestDir = (destPath) => {
const dir = path.dirname(destPath);

Expand Down
Loading