forked from tnq177/transformers_without_tears
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfigurations.py
56 lines (47 loc) · 1.34 KB
/
configurations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import all_constants as ac
def base():
config = {}
config['embed_dim'] = 512
config['ff_dim'] = 2048
config['num_enc_layers'] = 6
config['num_dec_layers'] = 6
config['num_heads'] = 8
# architecture
config['use_bias'] = True
config['fix_norm'] = True
config['scnorm'] = True
config['mask_logit'] = True
config['pre_act'] = True
config['clip_grad'] = 1.0
config['lr_scheduler'] = ac.NO_WU
config['warmup_steps'] = 8000
config['lr'] = 3e-4
config['lr_scale'] = 1.
config['lr_decay'] = 0.8
config['stop_lr'] = 5e-5
config['eval_metric'] = ac.DEV_BLEU
config['patience'] = 3
config['alpha'] = 0.7
config['label_smoothing'] = 0.1
config['batch_size'] = 4096
config['epoch_size'] = 1000
config['max_epochs'] = 200
config['dropout'] = 0.3
config['att_dropout'] = 0.3
config['ff_dropout'] = 0.3
config['word_dropout'] = 0.1
# Decoding
config['decode_method'] = ac.BEAM_SEARCH
config['decode_batch_size'] = 4096
config['beam_size'] = 4
config['max_parallel_beams'] = 0
config['beam_alpha'] = 0.6
config['use_rel_max_len'] = True
config['rel_max_len'] = 50
config['abs_max_len'] = 300
config['allow_empty'] = False
return config
def en2vi():
config = base()
config['epoch_size'] = 1500
return config