Skip to content

Commit d200db6

Browse files
authored
feat(project): add query page (#9)
1 parent a616f13 commit d200db6

File tree

13 files changed

+325
-10
lines changed

13 files changed

+325
-10
lines changed

browser/src/store/index.ts

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
export * from "./scanner_store";
2-
export * from "./scanning_store";
2+
export * from "./scanning_store";
3+
export * from "./query_store";

browser/src/store/query_store.ts

+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import { val, derive, Val, ReadonlyVal } from "value-enhancer";
2+
import { message } from "antd";
3+
import { fetchJson } from "../utils";
4+
5+
export type QueryStore$ = {
6+
readonly isQuerying: ReadonlyVal<boolean>;
7+
readonly queryResult: ReadonlyVal<QueryResult | null>;
8+
};
9+
10+
export type QueryResult = {
11+
readonly keywords: readonly string[];
12+
readonly items: readonly QueryItem[];
13+
};
14+
15+
export type QueryItem = PDFMetadataItem | PDFPageItem;
16+
17+
export type PDFMetadataItem = {
18+
readonly pdf_files: readonly string[];
19+
readonly distance: number;
20+
readonly metadata: {
21+
readonly author: string | null;
22+
readonly modified_at: string | null;
23+
readonly producer: string | null;
24+
};
25+
};
26+
27+
export type PDFPageItem = {
28+
readonly pdf_files: readonly {
29+
readonly pdf_path: string;
30+
readonly page_index: number;
31+
}[];
32+
readonly distance: number;
33+
readonly content: string;
34+
readonly segments: readonly HighlightSegment[];
35+
readonly annotations: readonly {
36+
readonly index: number;
37+
readonly distance: number;
38+
readonly content: string;
39+
readonly segments: readonly HighlightSegment[];
40+
}[];
41+
};
42+
43+
export type HighlightSegment = {
44+
readonly start: number;
45+
readonly end: number;
46+
readonly highlights: readonly [number, number][];
47+
};
48+
49+
export class QueryStore {
50+
public readonly $: QueryStore$;
51+
52+
readonly #isQuerying$: Val<boolean> = val(false);
53+
readonly #queryResult$: Val<QueryResult | null> = val<QueryResult | null>(null);
54+
55+
public constructor() {
56+
this.$ = {
57+
isQuerying: derive(this.#isQuerying$),
58+
queryResult: derive(this.#queryResult$),
59+
};
60+
}
61+
62+
public query(text: string): void {
63+
const query = new URLSearchParams({
64+
query: text,
65+
});
66+
this.#isQuerying$.set(true);
67+
fetchJson<QueryResult>(`/api/query?${query}`)
68+
.then((queryResults) => {
69+
this.#queryResult$.set(queryResults);
70+
})
71+
.catch((error) => {
72+
console.error(error);
73+
message.error(error.message);
74+
})
75+
.finally(() => {
76+
this.#isQuerying$.set(false);
77+
});
78+
}
79+
}

browser/src/views/App.tsx

+2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import { useNavigate, BrowserRouter, Routes, Route } from "react-router-dom";
77
import { Button, Result, ConfigProvider } from "antd";
88
import { Navigator } from "./Navigator";
99
import { ScannerPage } from "./ScannerPage";
10+
import { QueryPage } from "./QueryPage";
1011

1112
export const App: React.FC<{}> = () => {
1213
return (
@@ -28,6 +29,7 @@ export const App: React.FC<{}> = () => {
2829
const AppRoutes: React.FC<{}> = () => (
2930
<Routes>
3031
<Route path="/scanner" element={<ScannerPage />} />
32+
<Route path="/query" element={<QueryPage/ >} />
3133
<Route path="*" element={<AppNotFound />} />
3234
</Routes>
3335
);

browser/src/views/Link.module.less

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
.pdf-a {
2+
color: #FFFFFF;
3+
}
4+
5+
.pdf-page {
6+
color: #FFD4C5;
7+
margin-left: 8px;
8+
}

browser/src/views/Link.tsx

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import React from "react";
2+
import styles from "./Link.module.less";
3+
4+
import { Tag, Tooltip } from "antd";
5+
import { FilePdfFilled } from "@ant-design/icons";
6+
7+
export type PDFTagLinkProps = {
8+
readonly path: string;
9+
readonly page: number;
10+
};
11+
12+
export const PDFTagLink: React.FC<PDFTagLinkProps> = ({ path, page }) => {
13+
const fileNames = path.split(/[/\\]/);
14+
const fileName = fileNames[fileNames.length - 1];
15+
if (!fileName) {
16+
return null;
17+
}
18+
const fileNameWithoutExt = fileName.replace(/\.[^.]+$/, "");
19+
return (
20+
<Tag
21+
icon={<FilePdfFilled color="#FFFFFF" />}
22+
color="#FF5502">
23+
<Tooltip title={path}>
24+
<a
25+
className={styles["pdf-a"]}
26+
href={`file://${path}`}
27+
download>
28+
{fileNameWithoutExt}
29+
</a>
30+
</Tooltip>
31+
<span className={styles["pdf-page"]}>
32+
{page + 1}
33+
</span>
34+
</Tag>
35+
);
36+
};

browser/src/views/Navigator.tsx

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ export const Navigator: React.FC<{}> = () => {
2323
label: "知识库",
2424
},
2525
{
26-
key: "searcher",
27-
label: "搜索",
26+
key: "query",
27+
label: "查询",
2828
},
2929
]} />
3030
</div>
+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
.query-box {
2+
display: flex;
3+
flex-direction: column;
4+
align-items: stretch;
5+
padding-top: 48px;
6+
padding-bottom: 24px;
7+
}
8+
9+
.query-result-box {
10+
overflow-y: scroll;
11+
flex-shrink: 1;
12+
flex-grow: 1;
13+
}
14+
15+
.keywords-bar {
16+
display: flex;
17+
margin-top: 12px;
18+
margin-bottom: 24px;
19+
flex-direction: row;
20+
align-items: center;
21+
22+
> label {
23+
font-size: 12px;
24+
margin-right: 15px;
25+
color: #8C8C8C;
26+
}
27+
}
28+
29+
.empty {
30+
margin-top: 96px;
31+
}
32+
33+
.pdf-page-card {
34+
border: 1px solid #E5E5E5;
35+
border-radius: 4px;
36+
padding: 24px;
37+
margin-top: 12px;
38+
margin-bottom: 18px;
39+
margin-left: 4px;
40+
margin-right: 4px;
41+
box-shadow: 2px 2px 2.5px #E5E5E5;
42+
}
43+
44+
.text {
45+
font-size: 14px;
46+
color: #8C8C8C;
47+
margin-top: 35px;
48+
}

browser/src/views/QueryPage.tsx

+103
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import React from "react";
2+
import styles from "./QueryPage.module.less";
3+
4+
import { Tag, Empty, Skeleton, Input, Divider, Descriptions } from "antd";
5+
import { useVal } from "use-value-enhancer";
6+
import { PDFPageItem, QueryResult, QueryStore } from "../store";
7+
import { PDFTagLink } from "./Link";
8+
import { Text } from "./Text";
9+
10+
const { Search } = Input;
11+
12+
export const QueryPage: React.FC<{}> = () => {
13+
const store = React.useMemo(() => new QueryStore(), []);
14+
const isQuerying = useVal(store.$.isQuerying);
15+
const queryResults = useVal(store.$.queryResult);
16+
const onSearch = React.useCallback(
17+
(query: string) => store.query(query),
18+
[],
19+
);
20+
let tailView: React.ReactNode = null;
21+
22+
if (isQuerying) {
23+
tailView = <Skeleton active />;
24+
} else if (queryResults) {
25+
tailView = <ResultDisplay result={queryResults} />;
26+
}
27+
return <>
28+
<div className={styles["query-box"]}>
29+
<Search
30+
placeholder="输入你要搜索的内容"
31+
allowClear
32+
onSearch={onSearch} />
33+
{tailView && <>
34+
<Divider />
35+
{tailView}
36+
</>}
37+
</div>
38+
</>;
39+
};
40+
41+
type ResultDisplayProps = {
42+
readonly result: QueryResult;
43+
};
44+
45+
const ResultDisplay: React.FC<ResultDisplayProps> = ({ result }) => {
46+
const { keywords, items } = result;
47+
return (
48+
<div className={styles["query-result-box"]}>
49+
<div className={styles["keywords-bar"]}>
50+
<label>关键词:</label>
51+
{keywords.map((keyword, index) => (
52+
<Tag key={`${index}`}>{keyword}</Tag>
53+
))}
54+
</div>
55+
{items.length === 0 && (
56+
<Empty
57+
className={styles.empty}
58+
description="没有搜索到内容" />
59+
)}
60+
{items.map((item, index) => {
61+
if (!("content" in item)) {
62+
// TODO: 对 PDF Metadata 本身的搜索
63+
return null;
64+
}
65+
return (
66+
<PDFPageCard key={`${index}`} item={item} />
67+
);
68+
})}
69+
</div>
70+
);
71+
};
72+
73+
type PDFPageCardProps = {
74+
readonly item: PDFPageItem;
75+
};
76+
77+
const PDFPageCard: React.FC<PDFPageCardProps> = ({ item }) => {
78+
const { distance, pdf_files, content, segments } = item;
79+
return (
80+
<div className={styles["pdf-page-card"]}>
81+
<Descriptions
82+
layout="vertical"
83+
items={[{
84+
key: "1",
85+
label: "文件",
86+
children: pdf_files.map((pdf, index) => (
87+
<PDFTagLink
88+
key={`${index}`}
89+
path={pdf.pdf_path}
90+
page={pdf.page_index} />
91+
)),
92+
}, {
93+
key: "2",
94+
label: "距离",
95+
children: distance,
96+
}]} />
97+
<Text
98+
className={styles.text}
99+
content={content}
100+
segments={segments} />
101+
</div>
102+
);
103+
};

browser/src/views/Text.tsx

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import React from "react";
2+
3+
import { HighlightSegment } from "../store";
4+
5+
export type TextProps = React.HTMLAttributes<HTMLDivElement> & {
6+
readonly content: string;
7+
readonly segments: readonly HighlightSegment[];
8+
};
9+
10+
export const Text: React.FC<TextProps> = ({ content, segments, ...rest }) => {
11+
return (
12+
<div {...rest}>
13+
{content}
14+
</div>
15+
);
16+
};

index_package/service/service.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import threading
23

34
from typing import Optional
45
from .service_in_thread import ServiceInThread, QueryResult
@@ -9,13 +10,14 @@
910
from ..progress_events import ProgressEventListener
1011
from ..utils import ensure_dir, ensure_parent_dir
1112

13+
_service_in_thread = threading.local()
14+
1215
class Service:
1316
def __init__(
1417
self,
1518
workspace_path: str,
1619
embedding_model_id: str,
1720
):
18-
self._service_in_thread: Optional[ServiceInThread] = None
1921
self._scan_db_path: str = ensure_parent_dir(
2022
os.path.abspath(os.path.join(workspace_path, "scanner.sqlite3"))
2123
)
@@ -57,12 +59,15 @@ def scan_job(self, max_workers: int = 1, progress_event_listener: Optional[Progr
5759
create_service=lambda scope: self._create_service_in_thread(scope),
5860
)
5961

62+
# TODO: 这会导致无法释放。要彻底解决,需要迁移 sqlite pool 的逻辑。
6063
def _get_service_in_thread(self) -> ServiceInThread:
61-
if self._service_in_thread is None:
64+
service_in_thread = getattr(_service_in_thread, "value", None)
65+
if service_in_thread is None:
6266
scanner = Scanner(self._scan_db_path)
63-
self._service_in_thread = self._create_service_in_thread(scanner.scope)
67+
service_in_thread = self._create_service_in_thread(scanner.scope)
68+
setattr(_service_in_thread, "value", service_in_thread)
6469

65-
return self._service_in_thread
70+
return service_in_thread
6671

6772
def _create_service_in_thread(self, scope: Scope) -> ServiceInThread:
6873
return ServiceInThread(

server/launcher.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,4 @@ def _load_port():
4242

4343
def _launch_browser(port: int):
4444
time.sleep(0.85)
45-
webbrowser.open(f"http://localhost:{port}/scanner")
45+
webbrowser.open(f"http://localhost:{port}/query")

0 commit comments

Comments
 (0)