Merge branch 'master' of https://github.com/jsalt18-sentence-repl/jiant

nyu-mll · May 22, 2019 · 0e622dd · 0e622dd
2 parents 3cc2fc4 + 9f54df2
commit 0e622dd
Show file tree

Hide file tree

Showing 8 changed files with 46 additions and 22 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -26,6 +26,12 @@ jobs:
             source activate jiant
             python -m nltk.downloader perluniprops nonbreaking_prefixes punkt
             nose2 -v
+      # Step 4: style check
+      - run:
+          name: check style
+          command: |
+            pip install black
+            black . --check --exclude "/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|_build|buck-out|build|dist|src/modules/cove)/"
 workflows:
   version: 2
   test:

diff --git a/README.md b/README.md
@@ -1,6 +1,7 @@
 # jiant
 
-[![CircleCI](https://circleci.com/gh/nyu-mll/jiant/tree/master.svg?style=svg)](https://circleci.com/gh/nyu-mll/jiant/tree/master)
+[![CircleCI](https://circleci.com/gh/nyu-mll/jiant/tree/master.svg?style=svg)](https://circleci.com/gh/nyu-mll/jiant/tree/master) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/python/black)
+
 
 `jiant` is a work-in-progress software toolkit for natural language processing research, designed to facilitate work on multitask learning and transfer learning for sentence understanding tasks.
 
@@ -61,16 +62,31 @@ To exactly reproduce experiments from [the ELMo's Friends paper](https://arxiv.o
 For the [edge probing paper](https://openreview.net/forum?id=SJzSgnRcKX), see the [probing/](probing/) directory.
 
 
-## License
-
-This package is released under the [MIT License](LICENSE.md). The material in the allennlp_mods directory is based on [AllenNLP](https://github.com/allenai/allennlp), which was originally released under the Apache 2.0 license.
-
 ## Getting Help
 
 Post an issue here on GitHub if you have any problems, and create a pull request if you make any improvements (substantial or cosmetic) to the code that you're willing to share.
 
+
 ## Contributing
 
 We use the `black` coding style with a line limit of 100. After installing the requirements, simply running `pre-commit
 install` should ensure you comply with this in all your future commits. If you're adding features or fixing a bug,
 please also add the tests.
+
+
+## License
+
+This package is released under the [MIT License](LICENSE.md). The material in the allennlp_mods directory is based on [AllenNLP](https://github.com/allenai/allennlp), which was originally released under the Apache 2.0 license.
+
+
+## Acknowledgments
+
+- Part of the development of `jiant` took at the 2018 Frederick Jelinek Memorial Summer Workshop on Speech and Language Technologies, and was supported by Johns Hopkins University with unrestricted gifts from Amazon, Facebook, Google, Microsoft and Mitsubishi Electric Research Laboratories. 
+- This work was made possible in part by a donation to NYU from Eric and Wendy Schmidt made
+by recommendation of the Schmidt Futures program.
+- We gratefully acknowledge the support of NVIDIA Corporation with the donation of a Titan V GPU used at NYU in this work. 
+- Developer Alex Wang is supported by the National Science Foundation Graduate Research Fellowship Program under Grant
+No. DGE 1342536. Any opinions, findings, and conclusions or recommendations expressed in this
+material are those of the author(s) and do not necessarily reflect the views of the National Science
+Foundation.
+- Developer Yada Pruksachatkun is supported by the Moore-Sloan Data Science Environment as part of the NYU Data Science Services initiative.
diff --git a/scripts/superglue-baselines.sh b/scripts/superglue-baselines.sh
@@ -34,8 +34,8 @@ function wsc() {
     python main.py --config config/superglue-bert.conf --overrides "random_seed = ${seed}, cuda = ${gpuid}, run_name = wsc, pretrain_tasks = \"winograd-coreference\", target_tasks = \"winograd-coreference\", do_pretrain = 1, do_target_task_training = 0, do_full_eval = 1, batch_size = 4, val_interval = 139, optimizer = adam"
 }
 
-if [ $1 == "cb" ]; then
-    cb
+if [ $1 == "commit" ]; then
+    commit
 elif [ $1 == "copa" ]; then
     copa
 elif [ $1 == "multirc" ]; then

diff --git a/src/evaluate.py b/src/evaluate.py
@@ -219,7 +219,7 @@ def write_preds(
     "rte-superglue": "RTE",
     "wic": "WiC",
     "superglue-diagnostic": "AX",
-    "winograd-coreference": "WSC"
+    "winograd-coreference": "WSC",
 }
 
 
@@ -353,7 +353,7 @@ def _write_multirc_preds(
     preds_file = _get_pred_filename(task.name, pred_dir, split_name, strict_glue_format)
     with open(preds_file, "w", encoding="utf-8") as preds_fh:
         if strict_glue_format:
-            par_qst_ans_d = defaultdict(lambda : defaultdict(list))
+            par_qst_ans_d = defaultdict(lambda: defaultdict(list))
             for row_idx, row in preds_df.iterrows():
                 ans_d = {"idx": int(row["ans_idx"]), "label": int(row["preds"])}
                 par_qst_ans_d[int(row["par_idx"])][int(row["qst_idx"])].append(ans_d)

diff --git a/src/tasks/qa.py b/src/tasks/qa.py
@@ -102,7 +102,7 @@ def _make_instance(para, question, answer, label, par_idx, qst_idx, ans_idx):
             d["par_idx"] = MetadataField(par_idx)
             d["qst_idx"] = MetadataField(qst_idx)
             d["ans_idx"] = MetadataField(ans_idx)
-            d["idx"] = MetadataField(ans_idx) # required by evaluate()
+            d["idx"] = MetadataField(ans_idx)  # required by evaluate()
             if is_using_bert:
                 inp = para + question[1:-1] + answer[1:]
                 d["para_quest_ans"] = sentence_to_text_field(inp, indexers)

diff --git a/src/tasks/tasks.py b/src/tasks/tasks.py
@@ -353,9 +353,9 @@ def get_metrics(self, reset=False):
         return {"1-mse": 1 - mse, "mse": mse, "spearmanr": spearmanr}
 
     def process_split(self, split, indexers) -> Iterable[Type[Instance]]:
-        ''' Process split text into a list of AllenNLP Instances. '''
+        """ Process split text into a list of AllenNLP Instances. """
         return process_single_pair_task_split(split, indexers, is_pair=True, classification=False)
-    
+
     def update_metrics(self, logits, labels, tagmask=None):
         self.scorer1(mean_squared_error(logits, labels))  # update average MSE
         self.scorer2(logits, labels)

diff --git a/src/trainer.py b/src/trainer.py
@@ -359,7 +359,7 @@ def _setup_training(
         self._metric_infos = metric_infos
         return task_infos, metric_infos
 
-    def get_scaling_weights(self, scaling_method, num_tasks, task_names, task_n_train_example):
+    def get_scaling_weights(self, scaling_method, num_tasks, task_names, task_n_train_examples):
         """
         Parameters
         ----------------
@@ -385,7 +385,7 @@ def get_scaling_weights(self, scaling_method, num_tasks, task_names, task_n_trai
         elif scaling_method == "max_inverse":
             scaling_weights = 1 / task_n_train_examples
         # Weighting losses based on best validation step for each task from a previous uniform run,
-        # normalizd by the maximum validation step
+        # normalized by the maximum validation step
         # eg. 'max_epoch_9_18_1_11_18_2_14_16_1'
         elif "max_epoch_" in scaling_method:
             epochs = scaling_method.strip("max_epoch_").split("_")
@@ -399,13 +399,16 @@ def get_scaling_weights(self, scaling_method, num_tasks, task_names, task_n_trai
         scaling_weights = dict(zip(task_names, scaling_weights))
         return scaling_weights
 
-    def get_sampling_weights(self, weighting_method, num_tasks, task_n_train_examples):
+    def get_sampling_weights(
+        self, weighting_method, num_tasks, task_n_train_examples, task_n_train_batches
+    ):
         """
         Parameters
         ----------------
         weighting_method: str, weighting method
         num_tasks: int
         task_n_train_examples: list of ints of number of examples per task
+        task_n_train_batches: list of ints of number of batches per task
         Returns
         ----------------
         sampling weights: list of ints, to sample tasks to train on
@@ -430,6 +433,8 @@ def get_sampling_weights(self, weighting_method, num_tasks, task_n_train_example
         elif "softmax_" in weighting_method:  # exp(x/temp)
             weighting_temp = float(weighting_method.strip("softmax_"))
             sample_weights = np.exp(task_n_train_examples / weighting_temp)
+        else:
+            raise KeyError(f"Unknown weighting method: {weighting_method}")
         return sample_weights
 
     def train(
@@ -527,7 +532,7 @@ def clip_function(grad):
         task_n_train_batches = np.array([task_infos[task.name]["n_tr_batches"] for task in tasks])
         log.info("Training examples per task: " + str(dict(zip(task_names, task_n_train_examples))))
         sample_weights = self.get_sampling_weights(
-            weighting_method, len(tasks), task_n_train_examples
+            weighting_method, len(tasks), task_n_train_examples, task_n_train_batches
         )
 
         normalized_sample_weights = np.array(sample_weights) / sum(sample_weights)
@@ -1033,9 +1038,7 @@ def _delete_old_checkpoints(self, phase, epoch):
             if ".best_macro" not in file and "_{}.".format(epoch) not in file:
                 os.remove(file)
 
-    def _save_checkpoint(
-        self, training_state, phase="pretrain", new_best_macro=False, keep_all=False
-    ):
+    def _save_checkpoint(self, training_state, phase="pretrain", new_best_macro=False):
         """
         Parameters
         ----------

diff --git a/tutorials/setup_tutorial.md b/tutorials/setup_tutorial.md
@@ -10,10 +10,9 @@ First off, let's make sure you've the full repository, including all the git sub
 This project uses submodules to manage some dependencies on other research code, in particular for loading CoVe, GPT, and BERT. To make sure you get these repos when you download `jiant`, add `--recursive` to your `clone` command:
 
 ```
-git clone --recursive [email protected]:jsalt18-sentence-repl/jiant.git jiant
+git clone --branch v0.9.0  --recursive https://github.com/nyu-mll/jiant.git--recursive [email protected]:jsalt18-sentence-repl/jiant.git jiant
 ```
-
-If you already cloned and just need to get the submodules, you can run:
+This will download the full repository and load the 0.9 release of `jiant`. For the latent version, delete `--branch v0.9.0`. If you already cloned and just need to get the submodules, you can run:
 
 ```
 git submodule update --init --recursive