-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsearcher.py
71 lines (56 loc) · 1.98 KB
/
searcher.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from elasticsearch import Elasticsearch
class Highlight:
def __init__(self, field: str):
self.field: str = field
self.snippets = []
def add_snippet(self, text: str):
self.snippets.append(text)
class SearchResult:
def __init__(self, title):
self.score: float = 0.0
self.title: str = title
self.text: str = ""
self.highlights: [] = []
def add_highlight(self, highlight: Highlight):
self.highlights.append(highlight)
class Searcher:
def __init__(self):
self.client: Elasticsearch = None
self.index_name: str = "wikipedia_pages"
self.doc_type: str = "Wikipage"
self.query_template = {
"query": {
"simple_query_string": {
"fields": [
"title"
],
"query": "Web crawler",
"default_operator": "or"
}
},
"highlight": {
"order": "score",
"fields": {
"title": {
"type": "unified"
}
}
}
}
def connect(self):
self.client = Elasticsearch([{'host': 'localhost', 'port': 9200}])
self.client.ping()
def search(self, query_string: str):
query: dict = self.query_template.copy()
query['query']['simple_query_string']['query'] = query_string
raw_results = self.client.search(self.index_name, self.doc_type, query, size=10)
search_results = []
for hit in raw_results["hits"]["hits"]:
result = SearchResult(hit['_source']['title'])
result.score = hit["_score"]
for field in hit['highlight']:
highligth = Highlight(field)
highligth.snippets = hit['highlight'][field]
result.add_highlight(highligth)
search_results.append(result)
return search_results