Skip to content

Commit

Permalink
Add Dockerfile
Browse files Browse the repository at this point in the history
  • Loading branch information
dongfang91 committed Oct 8, 2018
1 parent 8035a00 commit bdee86a
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 6 deletions.
30 changes: 30 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# use a container for python 3
FROM python:3.6

# make a directory for data resources
RUN mkdir /data/
RUN mkdir /data/AskAPatient/
RUN mkdir /data/TwADR-L/
RUN mkdir /data/model/


# this will copy the the required files to the Docker container
COPY data/AskAPatient/* /data/AskAPatient/
COPY data/TwADR-L/* /data/TwADR-L/
COPY data/model/* /data/model/
ADD gru.py /
ADD rnn_baseline.py /
ADD term_matching_baseline.py /
ADD torch_preprocess.py /

# to install dependencies with pip, see the following example
Run pip3 install http://download.pytorch.org/whl/cpu/torch-0.4.1-cp36-cp36m-linux_x86_64.whl
Run pip3 install torchvision
RUN pip3 install nltk
RUN pip3 install scikit-learn
RUN pip3 install numpy
RUN pip3 install gensim

# This is the command that will be run when you start the container
# Here it's a python script, but it could be a shell script, etc.
CMD [ "python", "./rnn_baseline.py" ]
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
# concept_normalization
Software Package:
nltk, scikitlearn, gensim, nltk, numpy, pytorch
9 changes: 4 additions & 5 deletions rnn_baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,13 @@
import torch.optim as optim
import torch.nn as nn
from torch.autograd import Variable
import torch_prepreocess as function
import torch_preprocess as function
import math
use_plot = True
use_save = True
if use_save:
import pickle


np.random.seed(123)
torch.manual_seed(123)


## parameter setting

Expand Down
3 changes: 2 additions & 1 deletion term_matching_baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from nltk.tokenize import word_tokenize
from nltk.stem.snowball import SnowballStemmer
import nltk
from gensim.summarization import bm25
nltk.download('punkt')
stemmer = SnowballStemmer("english")

Expand Down Expand Up @@ -71,7 +72,6 @@ def get_scores(tf_idf_tests,tf_idf_label,label_text_index,label_code_dict,y_test
return score

def get_bm_25_score(query_list, bm25_list, label_text_new_index, label_y_dict, y_test_text):
from gensim.summarization import bm25

bm25Model = bm25.BM25(bm25_list)
average_idf = sum(map(lambda k: float(bm25Model.idf[k]), bm25Model.idf.keys())) / len(bm25Model.idf.keys())
Expand Down Expand Up @@ -170,3 +170,4 @@ def term_matching_baseline(dataset):
print("Average Test accuracy for %s: TF-IDF %s " %(dataset, scores_tf_idf_test/10.0))
print("Average Test accuracy for %s: BM25 %s" %(dataset, scores_bm25_test/10.0))

# term_matching_baseline("AskAPatient")
File renamed without changes.

0 comments on commit bdee86a

Please sign in to comment.