Skip to content

Commit eeae2bf

Browse files
committed
make model deterministic and fix server errors
1 parent d5b4e2a commit eeae2bf

File tree

2 files changed

+5
-2
lines changed

2 files changed

+5
-2
lines changed

__pycache__/trainModel.cpython-37.pyc

-8 Bytes
Binary file not shown.

trainModel.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
from utils import *
22
from gensim import corpora, models
3+
import numpy as np
34

45
def train_model(data):
6+
# Stabilizes the model so the results are deterministic
7+
np.random.seed(42)
58
# Load our data into a dataframe and preprocess the text
69
df = data_to_pd(data)
710
corpus = df['text'].map(preprocess)
811
# Creates a dictionary of words from the corpus
912
dictionary = gensim.corpora.Dictionary(corpus)
10-
dictionary.filter_extremes(no_below=15, no_above=0.5, keep_n=100000)
1113
bow_corpus = [dictionary.doc2bow(doc) for doc in corpus]
1214
# Train our model
1315
lda_model = gensim.models.LdaMulticore(bow_corpus, num_topics=3, id2word=dictionary, passes=2, workers=2)
@@ -16,6 +18,7 @@ def train_model(data):
1618
def classify_search(search, dictionary, lda_model):
1719
bow_vector = dictionary.doc2bow(preprocess(search))
1820
ranked = sorted(lda_model[bow_vector], key=lambda tup: -1*tup[1])
21+
print(ranked)
1922
return ranked[0][0]
2023

2124
def partition(data, search):
@@ -28,4 +31,4 @@ def partition(data, search):
2831
scores.append(True)
2932
else:
3033
scores.append(False)
31-
return scores
34+
return scores

0 commit comments

Comments
 (0)