-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy paths_main.py
35 lines (27 loc) · 1.12 KB
/
s_main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import random
import warnings
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import pairwise_distances
warnings.filterwarnings("ignore")
data = pd.read_pickle(r'16k_apperal_data_preprocessed')
def generate_random_products():
n = data.shape[0]
p_nums = random.sample(range(0, n), 6)
urls = []
for i in p_nums:
res = data.iloc[[i]].values[0]
urls.append([res[3],str(str(res[1]) + " " + str(res[2]) + " " + str(res[4])),str(res[5]),str(res[6]),i])
return urls
tfidf_title_vectorizer = TfidfVectorizer(min_df=0)
tfidf_title_features = tfidf_title_vectorizer.fit_transform(data['title'])
def get_recomendattions(doc_id):
num_results = 7
urls = []
pairwise_dist = pairwise_distances(tfidf_title_features, tfidf_title_features[doc_id])
indices = np.argsort(pairwise_dist.flatten())[0:num_results]
for i in range(1, len(indices)):
res = data.iloc[[indices[i]]].values[0]
urls.append([res[3], str(str(res[1]) + " " + str(res[2]) + " " + str(res[4])), str(res[5]), str(res[6]), indices[i]])
return urls