Updated ReadME.me

ashfarhangi · May 18, 2022 · 9d2a4fe · 9d2a4fe
1 parent e37ee4b
commit 9d2a4fe
Show file tree

Hide file tree

Showing 28 changed files with 635 additions and 230 deletions.
diff --git a/.env b/.env
diff --git a/.vscode/configurationCache.log b/.vscode/configurationCache.log
@@ -0,0 +1 @@
+{"buildTargets":[],"launchTargets":[],"customConfigurationProvider":{"workspaceBrowse":{"browsePath":[],"compilerArgs":[]},"fileIndex":[]}}
diff --git a/.vscode/dryrun.log b/.vscode/dryrun.log
@@ -0,0 +1,4 @@
+make.exe --dry-run --always-make --keep-going --print-directory
+'make.exe' is not recognized as an internal or external command,
+operable program or batch file.
+
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "makefile.extensionOutputFolder": "./.vscode"
+}
diff --git a/.vscode/targets.log b/.vscode/targets.log
@@ -0,0 +1,4 @@
+make.exe all --print-data-base --no-builtin-variables --no-builtin-rules --question
+'make.exe' is not recognized as an internal or external command,
+operable program or batch file.
+
diff --git a/Makefile b/Makefile
diff --git a/README.md b/README.md
@@ -15,43 +15,36 @@ Arvix:
 ![](https://github.com/0415070/Protoformer/blob/main/visualization/arvix100.png)
 ### Installation
 
-1. You can download a copy of all the files in this repository by cloning the following repo:
+• You can download a copy of all the files in this repository by cloning the repo:
 
-   ```
+   ```Python
    git clone https://github.com/ashfarhangi/Protoformer.git
    ```
 
-2. Install requirement packages
+• Install requirement packages
 
-   ```
+   ```Python
    pip install -r requirements.txt
    ```
 
-3. Enter your Twitter API keys in:
+• Run model.py 
+
+(optional)
+• Enter your Twitter API keys in:
 To use the data properly, you need to use your own official Twitter API. Please replace the API_KEY with you own. as shown below:
 
 
    ```
    const API_KEY = 'ENTER YOUR API';
    ```
 
-4. Run model.py after the dataset has been gathered  
-### Prerequisites
-You'll need a working Python environment to run the code.
-The recommended way to set up your environment is through the
-[Anaconda Python distribution](https://www.anaconda.com/download/) which
-provides the `conda` package manager.
-Anaconda can be installed in your user directory and does not interfere with
-the system Python installation.
-The required dependencies are specified in the file `environment.yml`. We used `conda` virtual environments to manage the project dependencies in
-isolation. Thus, you can install our dependencies without causing conflicts with your
-setup (even with different Python versions).
-Run the following command in the repository folder (where `environment.yml`
-is located) to create a separate environment and install all required
-dependencies in it:
-
-    conda env create
 
+### Prerequisites
+```
+Python
+git
+pip
+```
 
 
 ## License

diff --git a/__init__.py b/__init__.py
@@ -0,0 +1,4 @@
+
+
+
+
diff --git a/data/__init__.py b/data/__init__.py
diff --git a/data/__pycache__/__init__.cpython-38.pyc b/data/__pycache__/__init__.cpython-38.pyc
diff --git a/data/__pycache__/dataloader.cpython-38.pyc b/data/__pycache__/dataloader.cpython-38.pyc
diff --git a/data/dataloader.py b/data/dataloader.py
@@ -0,0 +1,59 @@
+from sklearn.model_selection import train_test_split
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+import torch
+from torch import nn, optim
+from torch.utils.data import Dataset, DataLoader
+import torch.nn.functional as F
+import warnings
+warnings.filterwarnings('ignore')
+import sklearn
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import confusion_matrix,classification_report
+from collections import defaultdict
+from textwrap import wrap
+from joblib import load, dump
+import pickle
+from tqdm import tqdm
+import transformers
+import datetime
+import matplotlib.pylab as pylab
+from sklearn.metrics.pairwise import cosine_similarity
+from sklearn.decomposition import PCA
+from scipy.stats import energy_distance
+from fastdist import fastdist
+df_imdb = pd.read_csv('.//data//IMDB.csv')
+# df_imdb = df_imdb.sample(2000)
+df_imdb.reset_index(drop=True,inplace=True)
+sns.countplot(df_imdb.sentiment)
+plt.ylabel('Samples')
+plt.xlabel('IMDB Movie Sentiments')
+plt.show()
+# sns.countplot(df_embeddings.predicted_raw_difference)
+df = df_imdb
+df_profile = df_imdb
+df_profile.columns = ['number','doc', 'labels_original']
+df_profile['labels'] = df_profile['labels_original']
+
+le = LabelEncoder()
+df_profile['labels']= le.fit_transform(df_profile['labels'])
+
+# X = df_profile.review
+X = df_profile.doc
+y = df_profile.labels
+# z = df_profile.user_name
+X_train,X_test,y_train,y_test= train_test_split(X,y,stratify=y,test_size=0.2, random_state=47)
+print('number of training samples:', len(X_train))
+print('number of test samples:', len(X_test))
+train_df = pd.DataFrame({'doc':X_train,
+                         'labels':y_train})
+test_df = pd.DataFrame({'doc':X_test,
+                         'labels':y_test})
+train_df.reset_index(drop=True,inplace=True)
+test_df.reset_index(drop=True,inplace=True)
diff --git a/main.py b/main.py
@@ -0,0 +1,20 @@
+# =============================================================================
+# Main file  
+# =============================================================================
+from src import utils
+from data import dataloader
+from src import metric,model,plot,trainer
+
+
+def run():
+    """Builds model, loads data, trains and evaluates"""
+    model = Protoformer('twitter-uni')
+    # DistilBERT(twitter-uni) BERT(imdb) RoBERTa(arxiv-10)
+    model.load_data('twitter-uni')
+    # twitter-uni, imdb, arxiv-10
+    model.build()
+    model.train()
+    model.evaluate()
+
+if __name__ == '__main__':
+    run()    
diff --git a/requirements.txt b/requirements.txt
@@ -1,16 +1,13 @@
-# local package
--e .
-
-# external requirements
-click
-Sphinx
-coverage
-awscli
-flake8
-python-dotenv>=0.5.1
-shap==0.35.0
-shap
+numpy
+pandas
+matplotlib
 transformers
-sentence-transformers
-torch==1.8.1
-
+torch
+fastdist
+sklearn
+seaborn
+pickle
+joblib
+tqdm
+pkbar
+fastdist
diff --git a/setup.py b/setup.py
diff --git a/src/__ini__.py b/src/__ini__.py
diff --git a/src/__pycache__/metric.cpython-38.pyc b/src/__pycache__/metric.cpython-38.pyc
diff --git a/src/__pycache__/model.cpython-38.pyc b/src/__pycache__/model.cpython-38.pyc
diff --git a/src/__pycache__/plot.cpython-38.pyc b/src/__pycache__/plot.cpython-38.pyc
diff --git a/src/__pycache__/trainer.cpython-38.pyc b/src/__pycache__/trainer.cpython-38.pyc
diff --git a/src/__pycache__/utils.cpython-38.pyc b/src/__pycache__/utils.cpython-38.pyc
diff --git a/src/metric.py b/src/metric.py
@@ -0,0 +1,7 @@
+# =============================================================================
+# Evaluation metrics
+# =============================================================================
+from sklearn.metrics import f1_score,classification_report
+def acc_cal(big_idx, targets):
+    n_correct = (big_idx==targets).sum().item()
+    return n_correct
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"buildTargets":[],"launchTargets":[],"customConfigurationProvider":{"workspaceBrowse":{"browsePath":[],"compilerArgs":[]},"fileIndex":[]}}