代码拉取完成,页面将自动刷新
[Transformer]
bert = 'bert-base-multilingual-cased'
n_bert_layers = 0 # 0 means all
use_hidden_states = True
use_attentions = True
attention_head = 0
attention_layer = 6
bert_fine_tune = False
[Features]
n_word_embed = 0 # embeddings for words
n_char_embed = 50
n_feat_embed = 0
[Network]
mix_dropout = .1
embed_dropout = .33
n_lstm_hidden = 400
n_lstm_layers = 2
lstm_dropout = .33
token_dropout = 0.0
n_mlp_arc = 500
n_mlp_rel = 100
mlp_dropout = .33
[Optimizer]
optimizer = 'adam' # or 'adamw'
lr = 2e-3 # Li uses 1.2e-3
mu = .9
nu = .98 # sometimes 0.9 is better
epsilon = 1e-12 # Li uses 1e-6
clip = 5.0
decay = .75
decay_steps = 5000
accumulation_steps = 1
evaluate_in_training = True
warmup_steps_ratio = .1
[Run]
batch_size = 5000
epochs = 1000
patience = 20
min_freq = 2
fix_len = 20
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。