From bdee86ada146406b6f3f4ad3ffaffa01a403abe3 Mon Sep 17 00:00:00 2001 From: dongfang Date: Sun, 7 Oct 2018 23:26:05 -0700 Subject: [PATCH] Add Dockerfile --- Dockerfile | 30 +++++++++++++++++++++ README.md | 2 ++ rnn_baseline.py | 9 +++---- term_matching_baseline.py | 3 ++- torch_prepreocess.py => torch_preprocess.py | 0 5 files changed, 38 insertions(+), 6 deletions(-) create mode 100644 Dockerfile rename torch_prepreocess.py => torch_preprocess.py (100%) diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..89fde08 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,30 @@ +# use a container for python 3 +FROM python:3.6 + +# make a directory for data resources +RUN mkdir /data/ +RUN mkdir /data/AskAPatient/ +RUN mkdir /data/TwADR-L/ +RUN mkdir /data/model/ + + +# this will copy the the required files to the Docker container +COPY data/AskAPatient/* /data/AskAPatient/ +COPY data/TwADR-L/* /data/TwADR-L/ +COPY data/model/* /data/model/ +ADD gru.py / +ADD rnn_baseline.py / +ADD term_matching_baseline.py / +ADD torch_preprocess.py / + +# to install dependencies with pip, see the following example +Run pip3 install http://download.pytorch.org/whl/cpu/torch-0.4.1-cp36-cp36m-linux_x86_64.whl +Run pip3 install torchvision +RUN pip3 install nltk +RUN pip3 install scikit-learn +RUN pip3 install numpy +RUN pip3 install gensim + +# This is the command that will be run when you start the container +# Here it's a python script, but it could be a shell script, etc. +CMD [ "python", "./rnn_baseline.py" ] diff --git a/README.md b/README.md index 176d31d..8e77da2 100644 --- a/README.md +++ b/README.md @@ -1 +1,3 @@ # concept_normalization +Software Package: +nltk, scikitlearn, gensim, nltk, numpy, pytorch \ No newline at end of file diff --git a/rnn_baseline.py b/rnn_baseline.py index c4e9463..c438743 100644 --- a/rnn_baseline.py +++ b/rnn_baseline.py @@ -4,14 +4,13 @@ import torch.optim as optim import torch.nn as nn from torch.autograd import Variable -import torch_prepreocess as function +import torch_preprocess as function import math -use_plot = True -use_save = True -if use_save: - import pickle +np.random.seed(123) +torch.manual_seed(123) + ## parameter setting diff --git a/term_matching_baseline.py b/term_matching_baseline.py index 5a353ef..84b49ad 100644 --- a/term_matching_baseline.py +++ b/term_matching_baseline.py @@ -3,6 +3,7 @@ from nltk.tokenize import word_tokenize from nltk.stem.snowball import SnowballStemmer import nltk +from gensim.summarization import bm25 nltk.download('punkt') stemmer = SnowballStemmer("english") @@ -71,7 +72,6 @@ def get_scores(tf_idf_tests,tf_idf_label,label_text_index,label_code_dict,y_test return score def get_bm_25_score(query_list, bm25_list, label_text_new_index, label_y_dict, y_test_text): - from gensim.summarization import bm25 bm25Model = bm25.BM25(bm25_list) average_idf = sum(map(lambda k: float(bm25Model.idf[k]), bm25Model.idf.keys())) / len(bm25Model.idf.keys()) @@ -170,3 +170,4 @@ def term_matching_baseline(dataset): print("Average Test accuracy for %s: TF-IDF %s " %(dataset, scores_tf_idf_test/10.0)) print("Average Test accuracy for %s: BM25 %s" %(dataset, scores_bm25_test/10.0)) +# term_matching_baseline("AskAPatient") \ No newline at end of file diff --git a/torch_prepreocess.py b/torch_preprocess.py similarity index 100% rename from torch_prepreocess.py rename to torch_preprocess.py