Skip to content

Commit 522e1c0

Browse files
committed
Refactor to a 2-pass system
1 parent 308bcb2 commit 522e1c0

7 files changed

+442
-344
lines changed

.eslintrc.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,6 @@ module.exports = {
2727
"@typescript-eslint/ban-ts-comment": "off",
2828
"@typescript-eslint/no-explicit-any": "off",
2929
"@typescript/explicit-module-boundary-types": "off",
30-
"@typescript/no-unsafe-call": "off",
30+
"@typescript-eslint/no-unsafe-call": "off",
3131
},
3232
};

src/FlatGuidLayoutStrategy.ts

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { LayoutStrategy } from "./LayoutStrategy";
2+
import { NotionPage } from "./NotionPage";
23

34
// This strategy creates a flat list of files that have notion-id for file names.
45
// Pros: the urls will never change so long as the notion pages are not delete and re-recreated.
@@ -11,19 +12,18 @@ import { LayoutStrategy } from "./LayoutStrategy";
1112
// the directory/file structure itself is no longer representative of the outline we want.
1213
export class FlatGuidLayoutStrategy extends LayoutStrategy {
1314
// eslint-disable-next-line @typescript-eslint/no-unused-vars
14-
public newLevel(context: string, _levelLabel: string): string {
15+
public newLevel(
16+
rootDir: string,
17+
context: string,
18+
_levelLabel: string
19+
): string {
1520
// In this strategy, we ignore context and don't create any directories to match the levels.
1621
// Just return the following for the benefit of logging.
1722
return context + "/" + _levelLabel;
1823
}
1924

20-
public getPathForPage(
21-
_context: string,
22-
pageId: string,
23-
_title: string,
24-
extensionWithDot: string
25-
): string {
25+
public getPathForPage(page: NotionPage, extensionWithDot: string): string {
2626
// In this strategy, we don't care about the location or the title
27-
return this.rootDirectory + "/" + pageId + extensionWithDot;
27+
return this.rootDirectory + "/" + page.pageId + extensionWithDot;
2828
}
2929
}
Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import * as fs from "fs-extra";
22
import sanitize from "sanitize-filename";
33
import { LayoutStrategy } from "./LayoutStrategy";
4+
import { NotionPage } from "./NotionPage";
45

56
// This strategy gives us a file tree that mirrors that of notion.
67
// Each level in the outline becomes a directory, and each file bears the name of the Notion document.
@@ -13,25 +14,31 @@ import { LayoutStrategy } from "./LayoutStrategy";
1314
// doesn't buy us much... it would give protection against name changes, but not changes to the outline structure.
1415

1516
export class HierarchicalNamedLayoutStrategy extends LayoutStrategy {
16-
public newLevel(context: string, levelLabel: string): string {
17+
public newLevel(
18+
dirRoot: string,
19+
context: string,
20+
levelLabel: string
21+
): string {
1722
const path = context + "/" + sanitize(levelLabel);
18-
fs.mkdirSync(path, { recursive: true });
23+
24+
//console.log("Creating level " + path);
25+
fs.mkdirSync(dirRoot + "/" + path, { recursive: true });
1926
return path;
2027
}
2128

22-
public getPathForPage(
23-
context: string,
24-
pageId: string,
25-
title: string,
26-
extensionWithDot: string
27-
): string {
29+
public getPathForPage(page: NotionPage, extensionWithDot: string): string {
2830
let path =
29-
this.rootDirectory + "/" + context + sanitize(title) + extensionWithDot;
31+
this.rootDirectory +
32+
"/" +
33+
page.context +
34+
"/" +
35+
sanitize(page.nameOrTitle) +
36+
extensionWithDot;
3037

3138
path = path.replace("//", "/");
32-
console.log(
33-
`getPathForPage(${context}, ${pageId}, ${title}) with root ${this.rootDirectory} --> ${path}`
34-
);
39+
// console.log(
40+
// `getPathForPage(${context}, ${pageId}, ${title}) with root ${this.rootDirectory} --> ${path}`
41+
// );
3542
return path;
3643
}
3744
}

src/LayoutStrategy.ts

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import * as fs from "fs-extra";
2+
import { NotionPage } from "./NotionPage";
23

34
// Here a fuller name would be File Tree Layout Strategy. That is,
45
// as we walk the Notion outline and create files, where do we create them, what do we name them, etc.
@@ -20,16 +21,18 @@ export abstract class LayoutStrategy {
2021
}
2122
}
2223

23-
public abstract newLevel(context: string, levelLabel: string): string;
24+
public abstract newLevel(
25+
rootDir: string,
26+
ontext: string,
27+
levelLabel: string
28+
): string;
2429
public abstract getPathForPage(
25-
context: string,
26-
pageId: string,
27-
title: string,
30+
page: NotionPage,
2831
extensionWithDot: string
2932
): string;
3033

31-
public pageWasSeen(context: string, pageId: string, title: string): void {
32-
const path = this.getPathForPage(context, pageId, title, ".md");
34+
public pageWasSeen(page: NotionPage): void {
35+
const path = this.getPathForPage(page, ".md");
3336
this.existingPagesNotSeenYetInPull =
3437
this.existingPagesNotSeenYetInPull.filter(p => p !== path);
3538
}

src/NotionImage.ts

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
import * as fs from "fs-extra";
2+
import FileType from "file-type";
3+
import fetch from "node-fetch";
4+
5+
let existingImagesNotSeenYetInPull: string[] = [];
6+
let imageOutputPath = "not set yet";
7+
let imagePrefix = "not set yet";
8+
9+
export async function initImageHandling(
10+
prefix: string,
11+
outputPath: string
12+
): Promise<void> {
13+
// If they gave us a trailing slash, remove it because we add it back later.
14+
// Note that it's up to the caller to have a *leading* slash or not.
15+
imagePrefix = prefix.replace(/\/$/, "");
16+
imageOutputPath = outputPath;
17+
18+
console.log("initimg " + imagePrefix + ", " + imageOutputPath);
19+
// Currently we don't delete the image directory, because if an image
20+
// changes, it gets a new id. This way can then prevent downloading
21+
// and image after the 1st time. The downside is currently we don't
22+
// have the smarts to remove unused images.
23+
await fs.mkdir(imageOutputPath, { recursive: true });
24+
}
25+
26+
async function saveImage(
27+
url: string,
28+
imageFolderPath: string
29+
): Promise<string> {
30+
const response = await fetch(url);
31+
const arrayBuffer = await response.arrayBuffer();
32+
const buffer = Buffer.from(arrayBuffer);
33+
const fileType = await FileType.fromBuffer(buffer);
34+
if (fileType?.ext) {
35+
// Since most images come from pasting screenshots, there isn't normally a filename. That's fine, we just make a hash of the url
36+
// Images that are stored by notion come to us with a complex url that changes over time, so we pick out the UUID that doesn't change. Example:
37+
// https://s3.us-west-2.amazonaws.com/secure.notion-static.com/d1058f46-4d2f-4292-8388-4ad393383439/Untitled.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=AKIAT73L2G45EIPT3X45%2F20220516%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20220516T233630Z&X-Amz-Expires=3600&X-Amz-Signature=f215704094fcc884d37073b0b108cf6d1c9da9b7d57a898da38bc30c30b4c4b5&X-Amz-SignedHeaders=host&x-id=GetObject
38+
39+
let thingToHash = url;
40+
const m = /.*secure\.notion-static\.com\/(.*)\//gm.exec(url);
41+
if (m && m.length > 1) {
42+
thingToHash = m[1];
43+
}
44+
45+
const hash = hashOfString(thingToHash);
46+
const outputFileName = `${hash}.${fileType.ext}`;
47+
const path = imageFolderPath + "/" + outputFileName;
48+
imageWasSeen(path);
49+
if (!fs.pathExistsSync(path)) {
50+
// // I think that this ok that this is writing async as we continue
51+
console.log("Adding image " + path);
52+
fs.createWriteStream(path).write(buffer);
53+
}
54+
return outputFileName;
55+
} else {
56+
console.error(
57+
`Something wrong with the filetype extension on the blob we got from ${url}`
58+
);
59+
return "error";
60+
}
61+
}
62+
function hashOfString(s: string) {
63+
let hash = 0;
64+
for (let i = 0; i < s.length; ++i)
65+
hash = Math.imul(31, hash) + s.charCodeAt(i);
66+
67+
return Math.abs(hash);
68+
}
69+
70+
// Download the image if we don't have it, give it a good name, and
71+
// change the src to point to our copy of the image.
72+
export async function processImageBlock(b: any): Promise<void> {
73+
let url = "";
74+
if ("file" in b.image) {
75+
url = b.image.file.url; // image saved on notion (actually AWS)
76+
} else {
77+
url = b.image.external.url; // image still pointing somewhere else. I've see this happen when copying a Google Doc into Notion. Notion kep pointing at the google doc.
78+
}
79+
80+
const newPath = imagePrefix + "/" + (await saveImage(url, imageOutputPath));
81+
82+
// change the src to point to our copy of the image
83+
if ("file" in b.image) {
84+
b.image.file.url = newPath;
85+
} else {
86+
b.image.external.url = newPath;
87+
}
88+
}
89+
90+
function imageWasSeen(path: string) {
91+
existingImagesNotSeenYetInPull = existingImagesNotSeenYetInPull.filter(
92+
p => p !== path
93+
);
94+
}
95+
96+
export async function cleanupOldImages(): Promise<void> {
97+
for (const p of existingImagesNotSeenYetInPull) {
98+
console.log(`Removing old image: ${p}`);
99+
await fs.rm(p);
100+
}
101+
}

0 commit comments

Comments
 (0)