Skip to content

Commit

Permalink
clean
Browse files Browse the repository at this point in the history
  • Loading branch information
imdiptanu committed May 5, 2021
1 parent 1fb37cd commit 242ad44
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 9 deletions.
4 changes: 0 additions & 4 deletions doc2topic/corpora.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,6 @@ def prepare(self, replace=False, with_generator=False):
self.input_tokens = np.array(self.input_tokens, dtype="int32")
self.outputs = np.array(self.outputs)

print(self.input_docs)
print(self.input_tokens)
print(self.outputs)

#self.idx2token = dict([(i,t) for t,i in self.token2idx.items()])
if replace:
del self.docs
Expand Down
5 changes: 0 additions & 5 deletions doc2topic/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,6 @@ def get_wordvecs(self, min_zero=True):
def get_topic_words(self, top_n=10, stopwords=set()):
self.get_wordvecs()
topic_words = {}
print("-----------------")
print(np.shape(self.wordvecs))
self.get_docvecs()
print(np.shape(self.docvecs))
print("-----------------")
for topic in range(self.wordvecs.shape[1]):
topic_words[topic] = heapq.nlargest(top_n+len(stopwords), enumerate(L1normalize(self.wordvecs[:,topic])), key=lambda x:x[1])
topic_words[topic] = [(self.corpus.idx2token[idx], score) for idx, score in topic_words[topic] if self.corpus.idx2token[idx] not in stopwords]
Expand Down

0 comments on commit 242ad44

Please sign in to comment.