From bdee86ada146406b6f3f4ad3ffaffa01a403abe3 Mon Sep 17 00:00:00 2001
From: dongfang <xudffaint@gmail.com>
Date: Sun, 7 Oct 2018 23:26:05 -0700
Subject: [PATCH] Add Dockerfile

---
 Dockerfile                                  | 30 +++++++++++++++++++++
 README.md                                   |  2 ++
 rnn_baseline.py                             |  9 +++----
 term_matching_baseline.py                   |  3 ++-
 torch_prepreocess.py => torch_preprocess.py |  0
 5 files changed, 38 insertions(+), 6 deletions(-)
 create mode 100644 Dockerfile
 rename torch_prepreocess.py => torch_preprocess.py (100%)

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..89fde08
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,30 @@
+# use a container for python 3
+FROM python:3.6
+
+# make a directory for data resources
+RUN mkdir /data/
+RUN mkdir /data/AskAPatient/
+RUN mkdir /data/TwADR-L/
+RUN mkdir /data/model/
+
+
+# this will copy the the required files to the Docker container
+COPY data/AskAPatient/* /data/AskAPatient/
+COPY data/TwADR-L/* /data/TwADR-L/
+COPY data/model/* /data/model/
+ADD gru.py /
+ADD rnn_baseline.py /
+ADD term_matching_baseline.py /
+ADD torch_preprocess.py /
+
+# to install dependencies with pip, see the following example
+Run pip3 install http://download.pytorch.org/whl/cpu/torch-0.4.1-cp36-cp36m-linux_x86_64.whl
+Run pip3 install torchvision
+RUN pip3 install nltk
+RUN pip3 install scikit-learn
+RUN pip3 install numpy
+RUN pip3 install gensim
+
+# This is the command that will be run when you start the container
+# Here it's a python script, but it could be a shell script, etc.
+CMD [ "python", "./rnn_baseline.py" ]
diff --git a/README.md b/README.md
index 176d31d..8e77da2 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,3 @@
 # concept_normalization
+Software Package: 
+nltk, scikitlearn, gensim, nltk, numpy, pytorch
\ No newline at end of file
diff --git a/rnn_baseline.py b/rnn_baseline.py
index c4e9463..c438743 100644
--- a/rnn_baseline.py
+++ b/rnn_baseline.py
@@ -4,14 +4,13 @@
 import torch.optim as optim
 import torch.nn as nn
 from torch.autograd import Variable
-import torch_prepreocess as function
+import torch_preprocess as function
 import math
-use_plot = True
-use_save = True
-if use_save:
-    import pickle
 
 
+np.random.seed(123)
+torch.manual_seed(123)
+
 
 ## parameter setting
 
diff --git a/term_matching_baseline.py b/term_matching_baseline.py
index 5a353ef..84b49ad 100644
--- a/term_matching_baseline.py
+++ b/term_matching_baseline.py
@@ -3,6 +3,7 @@
 from nltk.tokenize import word_tokenize
 from nltk.stem.snowball import SnowballStemmer
 import nltk
+from gensim.summarization import bm25
 nltk.download('punkt')
 stemmer = SnowballStemmer("english")
 
@@ -71,7 +72,6 @@ def get_scores(tf_idf_tests,tf_idf_label,label_text_index,label_code_dict,y_test
     return score
 
 def get_bm_25_score(query_list, bm25_list, label_text_new_index, label_y_dict, y_test_text):
-    from gensim.summarization import bm25
 
     bm25Model = bm25.BM25(bm25_list)
     average_idf = sum(map(lambda k: float(bm25Model.idf[k]), bm25Model.idf.keys())) / len(bm25Model.idf.keys())
@@ -170,3 +170,4 @@ def term_matching_baseline(dataset):
     print("Average Test accuracy for %s: TF-IDF %s " %(dataset, scores_tf_idf_test/10.0))
     print("Average Test accuracy for %s: BM25 %s" %(dataset, scores_bm25_test/10.0))
 
+# term_matching_baseline("AskAPatient")
\ No newline at end of file
diff --git a/torch_prepreocess.py b/torch_preprocess.py
similarity index 100%
rename from torch_prepreocess.py
rename to torch_preprocess.py