6
6
7
7
from salinic .field import Field
8
8
from salinic .query import SearchQuery
9
- from salinic .schema import Document , Folder , Page
9
+ from salinic .schema import DocumentPage , Folder , PaginatedResponse
10
10
from salinic .utils import first
11
11
12
12
logger = logging .getLogger (__name__ )
@@ -21,129 +21,47 @@ def search(
21
21
self ,
22
22
sq : SearchQuery ,
23
23
user_id : str | None = None
24
- ) -> list [Document | Folder ]:
25
- """Query index
26
-
27
- Solr results are grouped by `document_id` field: this way
28
- all folder entries will be part of group with `document_id=null`,
29
- while all page entities will be grouped per document i.e.
30
- pages which belong together are all in the same group.
31
-
32
- {
33
- "responseHeader":{
34
- ...
35
- "grouped":{
36
- "document_id":{
37
- "matches":26,
38
- "groups":[
39
- "groupValue":null,
40
- "doclist":{"numFound":4,"start":0,"numFoundExact":true,"docs":[
41
- {
42
- "id":"0b663599-32b1-4396-8dbe-ae7cd327cec6",
43
- "lang":"en",
44
- "user_id":"4cee7c39-7c34-4cc5-8543-42a8c88c9fe6",
45
- "entity_type":"folder",
46
- "title_txt_en":"A2 updated",
47
- "_version_":1801539995817738240},
48
- {
49
- "id":"768c6841-d37a-4d02-857f-ab7eaf69b27e",
50
- "lang":"en",
51
- "user_id":"4cee7c39-7c34-4cc5-8543-42a8c88c9fe6",
52
- "entity_type":"folder",
53
- "title_txt_en":".inbox",
54
- "_version_":1801539995692957696}]
55
- }},
56
- {
57
- "groupValue":"9bc57688-302e-4e1f-840a-c747dcccb362",
58
- "doclist":{"numFound":5,"start":0,"numFoundExact":true,"docs":[
59
- {
60
- "id":"a6e4916f-dea6-414b-aa38-f5b9ea375725",
61
- "document_id":"9bc57688-302e-4e1f-840a-c747dcccb362",
62
- "lang":"en",
63
- "user_id":"4cee7c39-7c34-4cc5-8543-42a8c88c9fe6",
64
- "page_number":1,
65
- "entity_type":"page",
66
- "title_txt_en":"brother_004603.pdf",
67
- "_version_":1801539996374532096},
68
- {
69
- "id":"72f6ca9e-af4b-4235-a56c-a62508e24efe",
70
- "document_id":"9bc57688-302e-4e1f-840a-c747dcccb362",
71
- "lang":"en",
72
- "user_id":"4cee7c39-7c34-4cc5-8543-42a8c88c9fe6",
73
- "page_number":2,
74
- "entity_type":"page",
75
- "title_txt_en":"brother_004603.pdf",
76
- "_version_":1801539996403892224},]
77
- }},
78
- {
79
- "groupValue":"200b0201-cfcd-43df-b41f-f1732568a0d2",
80
- "doclist":{"numFound":2,"start":0,"numFoundExact":true,"docs":[
81
- {
82
- "id":"9fa936e6-fe94-46bf-ad01-d8591cc290d4",
83
- "document_id":"200b0201-cfcd-43df-b41f-f1732568a0d2",
84
- "lang":"en",
85
- "user_id":"4cee7c39-7c34-4cc5-8543-42a8c88c9fe6",
86
- "page_number":1,
87
- "entity_type":"page",
88
- "title_txt_en":"brother_004598.pdf",
89
- "_version_":1801539995874361344},
90
- {
91
- "id":"c364994c-eab5-4c6a-842a-6f40537f7a2e",
92
- "document_id":"200b0201-cfcd-43df-b41f-f1732568a0d2",
93
- "lang":"en",
94
- "user_id":"4cee7c39-7c34-4cc5-8543-42a8c88c9fe6",
95
- "page_number":2,
96
- "entity_type":"page",
97
- "title_txt_en":"brother_004598.pdf",
98
- "_version_":1801539995910012928}]
99
- }},
100
- }}]}}}
101
- """
24
+ ) -> PaginatedResponse :
25
+ """Query index"""
102
26
result = self .client .search (sq , user_id )
103
- grouped = glom (result , 'grouped.document_id' )
104
- if glom (grouped , 'matches' ) == 0 :
105
- return []
106
-
107
- result = []
108
- for group in glom (grouped , 'groups' ):
109
- if glom (group , 'groupValue' ):
110
- # groupValue != null => document
111
- document_id = glom (group , 'groupValue' )
112
- title = ''
113
- lang = 'en'
114
- tags = []
115
- pages = []
116
- for page in glom (group , 'doclist.docs' ):
117
- lang = page .get ('lang' , 'en' )
118
- title = page .get (f'title_txt_{ lang } ' , None )
119
- text = page .get (f'text_txt_{ lang } ' , None )
120
- tags = page .get ('tags' , [])
121
- p = Page (
122
- id = page ['id' ],
123
- page_number = page ['page_number' ],
124
- text = text
125
- )
126
- pages .append (p )
127
- item = Document (
128
- id = document_id ,
27
+ items = glom (result , 'response.docs' )
28
+ total_found = glom (result , 'response.numFound' )
29
+ start = glom (result , 'response.start' )
30
+ page_number = int (start / sq .page_size ) + 1
31
+ num_pages = int (total_found / sq .page_size ) + 1
32
+ returned_list = []
33
+
34
+ for item in items :
35
+ if document_id := item .get ('document_id' , None ):
36
+ lang = item .get ('lang' , 'en' )
37
+ title = item .get (f'title_txt_{ lang } ' , lang )
38
+ tags = item .get ('tags' , [])
39
+ dp = DocumentPage (
40
+ id = item ['id' ],
41
+ page_number = item ['page_number' ],
42
+ document_id = document_id ,
129
43
title = title ,
130
44
lang = lang ,
131
- pages = pages ,
132
- tags = tags ,
45
+ tags = tags
133
46
)
134
- result .append (item )
47
+ returned_list .append (dp )
135
48
else :
136
- for folder in glom (group , 'doclist.docs' ):
137
- lang = folder .get ('lang' , 'en' )
138
- title = folder .get (f'title_txt_{ lang } ' , None )
139
- item = Folder (
140
- id = folder ['id' ],
141
- title = title ,
142
- tags = folder .get ('tags' , []),
143
- )
144
- result .append (item )
145
-
146
- return result
49
+ lang = item .get ('lang' , 'en' )
50
+ title = item .get (f'title_txt_{ lang } ' , lang )
51
+ folder = Folder (
52
+ id = item ['id' ],
53
+ title = title ,
54
+ lang = lang ,
55
+ tags = item .get ('tags' , []),
56
+ )
57
+ returned_list .append (folder )
58
+
59
+ return PaginatedResponse (
60
+ page_size = sq .page_size ,
61
+ page_number = page_number ,
62
+ num_pages = num_pages ,
63
+ items = returned_list
64
+ )
147
65
148
66
149
67
class IndexRW (Base ):
0 commit comments