Skip to content

v8.5 refactor: consolidate TOC file handling in cloud document scripts (#21512) #21514

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
2 changes: 1 addition & 1 deletion scripts/concatMdByToc.js
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ const main = () => {
let mergedStr = "";

fileList.forEach((filePath) => {
mergedStr += `${handleSingleMd(`.${filePath}`)}\n\n`;
mergedStr += `${handleSingleMd(`./${filePath}`)}\n\n`;
});

const variables = JSON.parse(
Expand Down
6 changes: 3 additions & 3 deletions scripts/filterCloudDoc.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import {
getAllMdList,
getAllCloudMdList,
copySingleFileSync,
copyFileWithCustomContentSync,
copyDirectoryWithCustomContentSync,
Expand All @@ -25,9 +25,9 @@ const extractFilefromList = (
};

const main = () => {
const filteredLinkList = getAllMdList("TOC-tidb-cloud.md");
const allFilePaths = getAllCloudMdList();

extractFilefromList(filteredLinkList, ".", "./tmp");
extractFilefromList(allFilePaths, "./", "./tmp");
copySingleFileSync("TOC-tidb-cloud.md", "./tmp/TOC.md");
copyDirectoryWithCustomContentSync(
"./tidb-cloud/",
Expand Down
69 changes: 69 additions & 0 deletions scripts/filterCloudInitFiles.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import * as fs from "fs";
import path from "path";
import { getAllCloudMdList } from "./utils.js";

const allFilePaths = getAllCloudMdList();

// Set to store filtered file paths
const filePaths = new Set();

// Filter the file paths
for (const filePath of allFilePaths) {
// Skip external links (starting with http/https)
if (filePath.startsWith("http")) {
continue;
}

// Skip anchor links (starting with #)
if (filePath.startsWith("#")) {
continue;
}

// Skip files in tidb-cloud folder
if (cleanPath.startsWith("tidb-cloud/")) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

The variable cleanPath is used here but it has not been defined in this scope, which will cause a ReferenceError at runtime. Based on the context of the loop, you likely intended to use filePath.

Suggested change
if (cleanPath.startsWith("tidb-cloud/")) {
if (filePath.startsWith("tidb-cloud/")) {

continue;
}

filePaths.add(cleanPath);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

Similar to the issue above, the undefined variable cleanPath is used here. It should be filePath to add the correct path to the filePaths set.

Suggested change
filePaths.add(cleanPath);
filePaths.add(filePath);

}

// Create tmp directory if it doesn't exist
const tmpDir = "tmp";
if (!fs.existsSync(tmpDir)) {
fs.mkdirSync(tmpDir, { recursive: true });
}

// Copy files to tmp directory
let copiedCount = 0;
let skippedCount = 0;

for (const filePath of filePaths) {
const sourcePath = filePath;
const targetPath = path.join(tmpDir, filePath);

// Create target directory if it doesn't exist
const targetDir = path.dirname(targetPath);
if (!fs.existsSync(targetDir)) {
fs.mkdirSync(targetDir, { recursive: true });
}

// Check if source file exists
if (fs.existsSync(sourcePath)) {
try {
fs.copyFileSync(sourcePath, targetPath);
console.log(`✓ Copied: ${filePath}`);
copiedCount++;
} catch (error) {
console.error(`✗ Error copying ${filePath}: ${error.message}`);
}
} else {
console.log(`⚠ Skipped (not found): ${filePath}`);
skippedCount++;
}
}

console.log(`\nSummary:`);
console.log(`- Total files referenced: ${filePaths.size}`);
console.log(`- Files copied: ${copiedCount}`);
console.log(`- Files skipped: ${skippedCount}`);
console.log(`- Files copied to: ${tmpDir}/`);
3 changes: 2 additions & 1 deletion scripts/filterNonCloudDoc.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import {
copySingleFileSync,
copyFileWithCustomContentSync,
removeCustomContent,
CLOUD_TOC_LIST,
} from "./utils.js";

const contentHandler = (content = "") => {
Expand All @@ -17,7 +18,7 @@ const extractFilefromList = (
fileList.forEach((filePath = "") => {
if (
filePath.includes(`/tidb-cloud/`) ||
filePath.includes(`TOC-tidb-cloud.md`)
CLOUD_TOC_LIST.some((tocFile) => filePath.includes(tocFile))
) {
return;
}
Expand Down
66 changes: 14 additions & 52 deletions scripts/filterUpdateFiles.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import * as fs from "fs";
import path from "path";
import axios from "axios";
import { Octokit } from "octokit";
import { CLOUD_TOC_LIST, getAllCloudMdList } from "./utils.js";

const GH_TOKEN = process.env.GH_TOKEN || "";

Expand Down Expand Up @@ -92,68 +93,29 @@ const deleteFile = (targetFile) => {
}
};

// read toc file and parse the file paths
const parseTOCFile = (tocPath) => {
try {
if (!fs.existsSync(tocPath)) {
console.log(`TOC file not found: ${tocPath}`);
return new Set();
}

const content = fs.readFileSync(tocPath, "utf8");
const filePaths = new Set();

// use regex to match the file paths in markdown links
// match [text](path) format
const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
let match;

while ((match = linkRegex.exec(content)) !== null) {
const link = match[2];
// only process links ending with .md
if (link.endsWith(".md")) {
// remove ./ or / at the beginning to ensure path consistency
const normalizedPath = link.replace(/^\.?\//, "");
filePaths.add(normalizedPath);
}
}

console.log(`Found ${filePaths.size} files in TOC: ${tocPath}`);
if (filePaths.size > 0) {
console.log(
"Files in TOC:",
Array.from(filePaths).slice(0, 5).join(", "),
filePaths.size > 5 ? `... and ${filePaths.size - 5} more` : ""
);
}
return filePaths;
} catch (error) {
console.error(`Error parsing TOC file ${tocPath}:`, error);
return new Set();
}
};

// get the file list from the toc file
const getCloudTOCFiles = () => {
// check ./tmp/TOC-tidb-cloud.md first
const tmpTocPath = "./tmp/TOC-tidb-cloud.md";
const localTocPath = "TOC-tidb-cloud.md";
const tmpTocFiles = getAllCloudMdList([
"./tmp/TOC-tidb-cloud.md",
"./tmp/TOC-tidb-cloud-starter.md",
"./tmp/TOC-tidb-cloud-essential.md",
]);
const tocFiles = getAllCloudMdList(CLOUD_TOC_LIST);
Comment on lines 97 to +103

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

low

The getCloudTOCFiles function is retrieving TOC files from both the tmp directory and the current directory. To improve maintainability and reduce redundancy, consolidate the logic by directly using getAllCloudMdList with both the temporary and current TOC file paths.

This change simplifies the function and ensures consistency in how TOC files are handled.

const getCloudTOCFiles = () => {
  const tmpTocFiles = CLOUD_TOC_LIST.map((tocFile) => `./tmp/${tocFile}`);
  const allTocFiles = [...tmpTocFiles, ...CLOUD_TOC_LIST];
  const finalTocFiles = new Set(getAllCloudMdList(allTocFiles));

  if (finalTocFiles.size === 0) {
    console.log("Warning: No TOC file found or no files in TOC. All .md files will be processed.");
  }

  return finalTocFiles;
};


let tocFiles = parseTOCFile(tmpTocPath);
// Convert to Set
const tmpTocFilesSet = new Set(tmpTocFiles);
const tocFilesSet = new Set(tocFiles);

// if not found in /tmp, check the current directory
if (tocFiles.size === 0) {
console.log(`No files found in ${tmpTocPath}, trying ${localTocPath}`);
tocFiles = parseTOCFile(localTocPath);
}
// Use tmpTocFiles if not empty, otherwise use tocFiles
const finalTocFiles = tmpTocFilesSet.size > 0 ? tmpTocFilesSet : tocFilesSet;

if (tocFiles.size === 0) {
if (finalTocFiles.size === 0) {
console.log(
"Warning: No TOC file found or no files in TOC. All .md files will be processed."
);
}

return tocFiles;
return finalTocFiles;
};

// filter the files in tmp directory by the toc file
Expand Down
23 changes: 22 additions & 1 deletion scripts/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,34 @@ const filterLink = (srcList = []) => {
};

export const getAllMdList = (tocFile) => {
if (!fs.existsSync(tocFile)) {
console.log(`TOC file not found: ${tocFile}`);
return [];
}

const tocFileContent = fs.readFileSync(tocFile);
const mdAst = generateMdAstFromFile(tocFileContent);
const linkList = extractLinkNodeFromAst(mdAst);
const filteredLinkList = filterLink(linkList);
const filteredLinkList = filterLink(linkList).map((link) =>
link.replace(/^\.?\//, "")
);
return filteredLinkList;
};

export const CLOUD_TOC_LIST = [
"TOC-tidb-cloud.md",
"TOC-tidb-cloud-essential.md",
"TOC-tidb-cloud-starter.md",
];

export const getAllCloudMdList = (tocFiles = CLOUD_TOC_LIST) => {
// Get all MD files from multiple TOCs and deduplicate
const allFilteredLinkLists = tocFiles.map((tocFile) => getAllMdList(tocFile));
const flattenedList = allFilteredLinkLists.flat();
const allFilePaths = [...new Set(flattenedList)]; // Deduplicate
return allFilePaths;
};

const checkDestDir = (destPath) => {
const dir = path.dirname(destPath);

Expand Down
Loading