-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathranking.py
109 lines (79 loc) · 3.85 KB
/
ranking.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
""" Ranking Class """
# ---------------------------------------- IMPORT HERE ----------------------------------------
import math
# ---------------------------------------- Ranking ----------------------------------------
class Ranking:
def __init__(self, choice):
self.choice = choice
# ---------------------------------------- COMPUTE SCORE ----------------------------------------------------------
def compute_score(self, query, docid, index_trie, tfidf_query):
""" Computes score for each document w.r.t the query """
tot_score = 0
doc_tot_score = 0
length_td = 0
length_tq = 0
for term in query:
if term in index_trie and docid in index_trie[term]:
tfidf_doc = index_trie[term][docid][1]
else:
tfidf_doc = 0
doc_tot_score += tfidf_doc
tot_score += (tfidf_doc * tfidf_query[term]) # tf-idf w.r.t document * tf-idf w.r.t query
length_td += (tfidf_doc * tfidf_doc)
length_tq += (tfidf_query[term] * tfidf_query[term])
if self.choice == 1:
final_score = tot_score / (math.sqrt(length_tq) * math.sqrt(length_td))
elif self.choice == 2:
final_score = doc_tot_score
elif self.choice == 3:
final_score = tot_score
return [docid, final_score]
'''
def compute_score(self, query, docid, index_trie, tfidf_query):
""" Computes score for each document w.r.t the query """
tot_score = 0
length_td = 0
length_tq = 0
for term in query:
if term in index_trie and docid in index_trie[term]:
tfidf_doc = index_trie[term][docid][1]
else:
tfidf_doc = 0
#option 1
tot_score += (tfidf_doc * tfidf_query[term]) # tf-idf w.r.t document * tf-idf w.r.t query
# option 2
#tot_score += (tfidf_doc)
return [docid, tot_score]
'''
# ---------------------------------------- RANK RESULTS -----------------------------------------------------------
def rank_all(self, query, answers, indexes, idf_dict, isWC = 0):
""" Ranks final list of documents based on tf-idf scores """
final = list()
tfidf_query = dict()
if isWC: # wildcard
new_query = list()
for term in query:
if '*' not in term:
new_query.append(term)
if len(new_query) == 0: # if query has only WC terms, return all matching docids with same score 1
for index_num in answers:
for docid in answers[index_num]:
final.append([docid, 1, index_num]) # score: [docid, score of document, index_num]
return final
query = new_query
q = set(query)
for index_num in answers:
# tf-idf of each query term w.r.t the query
for term in q:
if term in idf_dict[index_num]: # {i:{term:idf}}
qc = math.log(1 + query.count(term), 10) #log normalization of term count in query
tfidf_query[term] = idf_dict[index_num][term] * qc
else:
tfidf_query[term] = 0
# computing score for each matching document
for docid in answers[index_num]:
score = self.compute_score(query, docid, indexes[index_num][0], tfidf_query)
score.append(index_num)
final.append(score) # score: [docid, score of document, index_num]
final = sorted(final, key = lambda x: -x[1]) # sorting in descending order based on score of document
return final