diff --git a/config/examples/copa_bert.conf b/config/examples/copa_bert.conf
new file mode 100644
index 000000000..0e2c14600
--- /dev/null
+++ b/config/examples/copa_bert.conf
@@ -0,0 +1,37 @@
+// An example configuration for the COPA task with BERT.
+// Run with:
+//   python main.py --config_file config/examples/copa_bert.conf
+
+// This imports the defaults, which can be overridden below.
+include "defaults.conf"
+
+// Basics
+exp_name = copa_with_bert
+list_params = 0  // Quieter logs, since we're not experimenting with new or exciting architectures.
+write_preds = test  // Write test set predictions to disk for use on SuperGLUE if desired.
+
+// Standard setup for training on a single target task
+pretrain_tasks = copa
+target_tasks = copa
+do_pretrain = 1
+do_target_task_training = 0
+do_full_eval = 1
+
+// Typical BERT base setup
+bert_model_name = bert-base-uncased
+tokenizer = bert-base-uncased
+transfer_paradigm = finetune
+classifier = log_reg
+optimizer = bert_adam
+lr = 0.00001
+sent_enc = none
+sep_embs_for_skip = 1
+max_seq_len = 512
+dropout = 0.1
+
+// Trainer setup for small tasks with BERT
+val_interval = 10
+batch_size = 12
+max_epochs = 4
+
+
diff --git a/config/examples/stilts_example.conf b/config/examples/stilts_example.conf
new file mode 100644
index 000000000..c5aa3ff23
--- /dev/null
+++ b/config/examples/stilts_example.conf
@@ -0,0 +1,39 @@
+// This config is for STILTS training [https://arxiv.org/pdf/1811.01088.pdf]
+// for BERT -> MNLI -> RTE
+// For this example we use BERT-base.
+// Run with:
+//   python main.py --config_file config/examples/stilts_example.conf
+
+include "../defaults.conf"
+pretrain_tasks = "mnli"
+target_tasks = "rte"
+
+//Experiment configs
+do_pretrain = 1      
+do_target_task_training = 1 
+do_full_eval = 1
+
+batch_size = 24
+
+write_preds = "val,test"
+
+//BERT-specific parameters
+bert_embeddings_mode = "top"    
+bert_fine_tune = 1
+sep_embs_for_skip = 1
+sent_enc = "none"
+classifier = log_reg // following BERT paper
+
+dropout = 0.1 // following BERT paper
+optimizer = bert_adam
+max_epochs = 3
+lr = .00001
+min_lr = .0000001
+lr_patience = 4
+patience = 20
+max_vals = 10000
+transfer_paradigm = "finetune"
+
+tokenizer = "bert-base-uncased"
+bert_model_name = "bert-base-uncased"
+