Add Dockerfile

dongfang91 · Oct 8, 2018 · bdee86a · bdee86a
1 parent 8035a00
commit bdee86a
Show file tree

Hide file tree

Showing 5 changed files with 38 additions and 6 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,30 @@
+# use a container for python 3
+FROM python:3.6
+
+# make a directory for data resources
+RUN mkdir /data/
+RUN mkdir /data/AskAPatient/
+RUN mkdir /data/TwADR-L/
+RUN mkdir /data/model/
+
+
+# this will copy the the required files to the Docker container
+COPY data/AskAPatient/* /data/AskAPatient/
+COPY data/TwADR-L/* /data/TwADR-L/
+COPY data/model/* /data/model/
+ADD gru.py /
+ADD rnn_baseline.py /
+ADD term_matching_baseline.py /
+ADD torch_preprocess.py /
+
+# to install dependencies with pip, see the following example
+Run pip3 install http://download.pytorch.org/whl/cpu/torch-0.4.1-cp36-cp36m-linux_x86_64.whl
+Run pip3 install torchvision
+RUN pip3 install nltk
+RUN pip3 install scikit-learn
+RUN pip3 install numpy
+RUN pip3 install gensim
+
+# This is the command that will be run when you start the container
+# Here it's a python script, but it could be a shell script, etc.
+CMD [ "python", "./rnn_baseline.py" ]
diff --git a/README.md b/README.md
@@ -1 +1,3 @@
 # concept_normalization
+Software Package: 
+nltk, scikitlearn, gensim, nltk, numpy, pytorch
diff --git a/rnn_baseline.py b/rnn_baseline.py
@@ -4,14 +4,13 @@
 import torch.optim as optim
 import torch.nn as nn
 from torch.autograd import Variable
-import torch_prepreocess as function
+import torch_preprocess as function
 import math
-use_plot = True
-use_save = True
-if use_save:
-    import pickle
 
 
+np.random.seed(123)
+torch.manual_seed(123)
+
 
 ## parameter setting
 

diff --git a/term_matching_baseline.py b/term_matching_baseline.py
@@ -3,6 +3,7 @@
 from nltk.tokenize import word_tokenize
 from nltk.stem.snowball import SnowballStemmer
 import nltk
+from gensim.summarization import bm25
 nltk.download('punkt')
 stemmer = SnowballStemmer("english")
 
@@ -71,7 +72,6 @@ def get_scores(tf_idf_tests,tf_idf_label,label_text_index,label_code_dict,y_test
     return score
 
 def get_bm_25_score(query_list, bm25_list, label_text_new_index, label_y_dict, y_test_text):
-    from gensim.summarization import bm25
 
     bm25Model = bm25.BM25(bm25_list)
     average_idf = sum(map(lambda k: float(bm25Model.idf[k]), bm25Model.idf.keys())) / len(bm25Model.idf.keys())
@@ -170,3 +170,4 @@ def term_matching_baseline(dataset):
     print("Average Test accuracy for %s: TF-IDF %s " %(dataset, scores_tf_idf_test/10.0))
     print("Average Test accuracy for %s: BM25 %s" %(dataset, scores_bm25_test/10.0))
 
+# term_matching_baseline("AskAPatient")
diff --git a/torch_prepreocess.py → torch_preprocess.py b/torch_prepreocess.py → torch_preprocess.py