Skip to content

Commit 59099d3

Browse files
committed
release
1 parent 0d0ea24 commit 59099d3

File tree

115 files changed

+31474
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

115 files changed

+31474
-0
lines changed

bash/amr_preprocess.sh

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#!/bin/bash
2+
3+
CURRENT_PATH=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
4+
5+
echo '######## converting mrp to amr ...' >&2
6+
python3 ${CURRENT_PATH}/../utils/mrp_to_amr.py -i $1 -o $1.amr.txt --not_amr_str_only --all_nodes
7+
8+
export JAMR_HOME=path/to/jamr
9+
export CDEC=path/to/cdec
10+
11+
echo '######## running jamr rule based aligner ...' >&2
12+
${JAMR_HOME}/scripts/ALIGN.sh < $1.amr.txt > $1.jalign.txt
13+
14+
TOOLKIT_HOME=${CURRENT_PATH}/../toolkit
15+
16+
echo '######## running tamr rule based aligner ...' >&2
17+
python3 ${TOOLKIT_HOME}/tamr_aligner/rule_base_align.py -verbose -data $1.jalign.txt -output $1.alignment.txt -wordvec $2 -trials 10000 -improve_perfect -morpho_match -semantic_match
18+
19+
echo '######## refreshing alignments ...' >&2
20+
python3 ${TOOLKIT_HOME}/tamr_aligner/refresh_alignments.py -lexicon $1.alignment.txt -data $1.jalign.txt > $1.new_aligned.txt
21+
22+
echo '######## generating oracles ...' >&2
23+
python3 ${TOOLKIT_HOME}/tamr_aligner/eager_oracle.py -mod dump -aligned $1.new_aligned.txt > $1.actions.txt
24+
25+
echo '######## adding extra mrp information ...' >&2
26+
python3 ${TOOLKIT_HOME}/amr_add_extra.py -i $1.actions.txt -o $1.actions.aug.txt -e mrp $1

bash/predict.sh

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#!/bin/bash
2+
3+
# examples of predicting commands
4+
5+
# DM
6+
CUDA_VISIBLE_DEVICES=0 \
7+
allennlp predict \
8+
--cuda-device 0 \
9+
--output-file dm-output.mrp \
10+
--predictor transition_predictor_sdp \
11+
--include-package utils \
12+
--include-package modules \
13+
--use-dataset-reader \
14+
--batch-size 32 \
15+
--silent \
16+
checkpoints/dm_bert \
17+
data/dm-test.mrp
18+
19+
20+
# PSD
21+
CUDA_VISIBLE_DEVICES=0 \
22+
allennlp predict \
23+
--cuda-device 0 \
24+
--output-file psd-output.mrp \
25+
--predictor transition_predictor_sdp \
26+
--include-package utils \
27+
--include-package modules \
28+
--use-dataset-reader \
29+
--batch-size 32 \
30+
--silent \
31+
checkpoints/psd_bert \
32+
data/psd-test.mrp
33+
34+
35+
# EDS
36+
CUDA_VISIBLE_DEVICES=0 \
37+
allennlp predict \
38+
--cuda-device 0 \
39+
--output-file eds-output.mrp \
40+
--predictor transition_predictor_eds \
41+
--include-package utils \
42+
--include-package modules \
43+
--use-dataset-reader \
44+
--batch-size 32 \
45+
--silent \
46+
checkpoints/eds_bert \
47+
data/eds-test.mrp
48+
49+
50+
# UCCA
51+
CUDA_VISIBLE_DEVICES=0 \
52+
allennlp predict \
53+
--cuda-device 0 \
54+
--output-file ucca-output.mrp \
55+
--predictor transition_predictor_ucca \
56+
--include-package utils \
57+
--include-package modules \
58+
--use-dataset-reader \
59+
--batch-size 32 \
60+
--silent \
61+
checkpoints/ucca_bert \
62+
data/ucca-test.mrp
63+
64+
65+
# AMR
66+
# !!! AMR parser accepts input of augmented amr format instead of mrp format !!!
67+
CUDA_VISIBLE_DEVICES=0 \
68+
allennlp predict \
69+
--cuda-device 0 \
70+
--output-file amr-output.mrp \
71+
--predictor transition_amr_predictor \
72+
--include-package utils \
73+
--include-package modules \
74+
--use-dataset-reader \
75+
--batch-size 32 \
76+
--silent \
77+
checkpoints/amr_bert \
78+
data/amr-test.txt

bash/train.sh

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#!/bin/bash
2+
3+
# examples of training commands
4+
5+
# DM
6+
CUDA_VISIBLE_DEVICES=0 \
7+
TRAIN_PATH=data/dm-train.mrp \
8+
DEV_PATH=data/dm-dev.mrp \
9+
BERT_PATH=bert/wwm_cased_L-24_H-1024_A-16 \
10+
WORD_DIM=1024 \
11+
LOWER_CASE=FALSE \
12+
BATCH_SIZE=4 \
13+
allennlp train \
14+
-s checkpoints/dm_bert \
15+
--include-package utils \
16+
--include-package modules \
17+
--file-friendly-logging \
18+
config/transition_bert_sdp_conll.jsonnet
19+
20+
21+
# PSD
22+
CUDA_VISIBLE_DEVICES=0 \
23+
TRAIN_PATH=data/psd-train.mrp \
24+
DEV_PATH=data/psd-dev.mrp \
25+
BERT_PATH=bert/wwm_cased_L-24_H-1024_A-16 \
26+
WORD_DIM=1024 \
27+
LOWER_CASE=FALSE \
28+
BATCH_SIZE=4 \
29+
allennlp train \
30+
-s checkpoints/psd_bert \
31+
--include-package utils \
32+
--include-package modules \
33+
--file-friendly-logging \
34+
config/transition_bert_sdp_conll.jsonnet
35+
36+
37+
# EDS
38+
CUDA_VISIBLE_DEVICES=0 \
39+
TRAIN_PATH=data/eds-train.mrp \
40+
DEV_PATH=data/eds-dev.mrp \
41+
BERT_PATH=bert/wwm_cased_L-24_H-1024_A-16 \
42+
WORD_DIM=1024 \
43+
LOWER_CASE=FALSE \
44+
BATCH_SIZE=4 \
45+
allennlp train \
46+
-s checkpoints/eds_bert \
47+
--include-package utils \
48+
--include-package modules \
49+
--file-friendly-logging \
50+
config/transition_bert_eds.jsonnet
51+
52+
53+
# UCCA
54+
CUDA_VISIBLE_DEVICES=0 \
55+
TRAIN_PATH=data/ucca-train.mrp \
56+
DEV_PATH=data/ucca-dev.mrp \
57+
BERT_PATH=bert/wwm_cased_L-24_H-1024_A-16 \
58+
WORD_DIM=1024 \
59+
LOWER_CASE=FALSE \
60+
BATCH_SIZE=4 \
61+
allennlp train \
62+
-s checkpoints/ucca_bert \
63+
--include-package utils \
64+
--include-package modules \
65+
--file-friendly-logging \
66+
config/transition_bert_ucca.jsonnet
67+
68+
69+
# AMR
70+
# !!! AMR parser accepts input of augmented amr format instead of mrp format !!!
71+
CUDA_VISIBLE_DEVICES=0 \
72+
TRAIN_PATH=data/amr-train.mrp.actions.aug.txt \
73+
DEV_PATH=data/amr-dev.mrp.actions.aug.txt \
74+
BERT_PATH=bert/wwm_cased_L-24_H-1024_A-16 \
75+
WORD_DIM=1024 \
76+
LOWER_CASE=FALSE \
77+
BATCH_SIZE=4 \
78+
allennlp train \
79+
-s checkpoints/amr_bert \
80+
--include-package utils \
81+
--include-package modules \
82+
--file-friendly-logging \
83+
config/transition_bert_amr.jsonnet

config/transition_bert_amr.jsonnet

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
{
2+
"vocabulary": {
3+
"non_padded_namespaces": []
4+
},
5+
"dataset_reader": {
6+
"type": "amr_list-based_arc-eager",
7+
"token_indexers": {
8+
"tokens": {
9+
"type": "bert-pretrained",
10+
"pretrained_model": std.extVar('BERT_PATH'),
11+
"do_lowercase": std.extVar('LOWER_CASE')=='TRUE'
12+
}
13+
}
14+
},
15+
"train_data_path": std.extVar('TRAIN_PATH'),
16+
"validation_data_path": std.extVar('DEV_PATH'),
17+
"model": {
18+
"type": "transition_parser_amr",
19+
"eval_on_training": false,
20+
"text_field_embedder": {
21+
"tokens": {
22+
"type": "bert-pretrained",
23+
"pretrained_model": std.extVar('BERT_PATH'),
24+
"requires_grad": true,
25+
"top_layer_only": false
26+
},
27+
"embedder_to_indexer_map": {
28+
"tokens": ["tokens", "tokens-offsets", "tokens-type-ids"]
29+
},
30+
"allow_unmatched_keys": true
31+
},
32+
"word_dim": std.parseInt(std.extVar('WORD_DIM')),
33+
"hidden_dim": 256,
34+
"action_dim": 128,
35+
"entity_dim": 64,
36+
"rel_dim": 64,
37+
"num_layers": 2,
38+
"recurrent_dropout_probability": 0.2,
39+
"layer_dropout_probability": 0.2,
40+
"same_dropout_mask_per_instance": true,
41+
"input_dropout": 0.2,
42+
"initializer": [
43+
["p_.*weight", {"type": "xavier_uniform"}],
44+
["p_.*bias", {"type": "zero"}],
45+
["p(root|empty)_.*_emb", {"type": "normal"}],
46+
]
47+
},
48+
"iterator": {
49+
"type": "bucket",
50+
"sorting_keys": [["tokens", "num_tokens"]],
51+
"batch_size": std.parseInt(std.extVar('BATCH_SIZE'))
52+
},
53+
"trainer": {
54+
"num_epochs": 50,
55+
"grad_norm": 5.0,
56+
"grad_clipping": 5.0,
57+
"patience": 50,
58+
"cuda_device": 0,
59+
"validation_metric": "+all-f",
60+
"optimizer": {
61+
"type": "adam",
62+
"parameter_groups": [
63+
[[".*bert.*"], {"lr": 5e-5}],
64+
[["^((?!bert).)*$"], {}]
65+
],
66+
"betas": [0.9, 0.999],
67+
"lr": 1e-3
68+
},
69+
"learning_rate_scheduler": {
70+
"type": "slanted_triangular",
71+
"num_epochs": 50,
72+
"num_steps_per_epoch": 1000,
73+
"cut_frac": 0.1,
74+
"ratio": 32,
75+
"gradual_unfreezing": true,
76+
"discriminative_fine_tuning": true,
77+
"decay_factor": 1.0,
78+
},
79+
"num_serialized_models_to_keep": 50
80+
}
81+
}

0 commit comments

Comments
 (0)