1 Star 0 Fork 2

neal23333/albert_ner

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
model.py 11.78 KB
一键复制 编辑 原始数据 按行查看 历史
MarsRoger 提交于 2019-11-21 17:34 . albert嵌入
# encoding = utf8
import os
import numpy as np
import tensorflow as tf
from tensorflow.contrib.crf import crf_log_likelihood
from tensorflow.contrib.crf import viterbi_decode
from tensorflow.contrib.layers.python.layers import initializers
import rnncell as rnn
from utils import bio_to_json,bio_to_json_v1
from albert_zh import modeling
class Model(object):
def __init__(self, config):
self.config = config
self.lr = config["lr"]
self.lstm_dim = config["lstm_dim"]
self.num_tags = config["num_tags"]
self.global_step = tf.Variable(0, trainable=False)
self.best_dev_f1 = tf.Variable(0.0, trainable=False)
self.best_test_f1 = tf.Variable(0.0, trainable=False)
self.initializer = initializers.xavier_initializer()
# add placeholders for the model
self.input_ids = tf.placeholder(dtype=tf.int32, shape=[None, None], name="input_ids")
self.input_mask = tf.placeholder(dtype=tf.int32, shape=[None, None], name="input_mask")
self.segment_ids = tf.placeholder(dtype=tf.int32, shape=[None, None], name="segment_ids")
self.targets = tf.placeholder(dtype=tf.int32, shape=[None, None], name="Targets")
# dropout keep prob
self.dropout = tf.placeholder(dtype=tf.float32, name="Dropout")
used = tf.sign(tf.abs(self.input_ids))
length = tf.reduce_sum(used, reduction_indices=1)
self.lengths = tf.cast(length, tf.int32)
self.batch_size = tf.shape(self.input_ids)[0]
self.num_steps = tf.shape(self.input_ids)[-1]
# embeddings for chinese character and segmentation representation
embedding = self.bert_embedding()
# apply dropout before feed to lstm layer
lstm_inputs = tf.nn.dropout(embedding, self.dropout)
# bi-directional lstm layer
lstm_outputs = self.biLSTM_layer(lstm_inputs, self.lstm_dim, self.lengths)
# logits for tags
self.logits = self.project_layer(lstm_outputs)
# loss of the model
self.loss = self.loss_layer(self.logits, self.lengths)
# bert模型参数初始化的地方
project_dir = os.path.dirname(os.path.abspath(__file__))
# init_checkpoint = "%s/chinese_L-12_H-768_A-12/bert_model.ckpt"%project_dir
init_checkpoint = "%s/albert_tiny/albert_model.ckpt" % project_dir
# 获取模型中所有的训练参数。
tvars = tf.trainable_variables()
# 加载BERT模型
(assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars,
init_checkpoint)
tf.compat.v1.train.init_from_checkpoint(init_checkpoint, assignment_map)
print("**** Trainable Variables ****")
# 打印加载模型的参数
train_vars = []
for var in tvars:
init_string = ""
if var.name in initialized_variable_names:
init_string = ", *INIT_FROM_CKPT*"
else:
train_vars.append(var)
print(" name = %s, shape = %s%s", var.name, var.shape,
init_string)
with tf.variable_scope("optimizer"):
optimizer = self.config["optimizer"]
if optimizer == "adam":
self.opt = tf.compat.v1.train.AdamOptimizer(self.lr)
else:
raise KeyError
grads = tf.gradients(self.loss, train_vars)
(grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)
self.train_op = self.opt.apply_gradients(
zip(grads, train_vars), global_step=self.global_step)
# saver of the model
self.saver = tf.compat.v1.train.Saver(tf.global_variables(), max_to_keep=5)
def bert_embedding(self):
# load bert embedding
import os
project_dir = os.path.dirname(os.path.abspath(__file__))
bert_config = modeling.BertConfig.from_json_file(
"%s/albert_tiny/albert_config_tiny.json" % project_dir) # 配置文件地址
model = modeling.BertModel(
config=bert_config,
is_training=True,
input_ids=self.input_ids,
input_mask=self.input_mask,
token_type_ids=self.segment_ids,
use_one_hot_embeddings=False)
embedding = model.get_sequence_output()
return embedding
def conv_layer(self):
pass
def biLSTM_layer(self, lstm_inputs, lstm_dim, lengths, name=None):
"""
:param lstm_inputs: [batch_size, num_steps, emb_size]
:return: [batch_size, num_steps, 2*lstm_dim]
"""
with tf.variable_scope("char_BiLSTM" if not name else name):
lstm_cell = {}
for direction in ["forward", "backward"]:
with tf.variable_scope(direction):
lstm_cell[direction] = rnn.CoupledInputForgetGateLSTMCell(
lstm_dim,
use_peepholes=True,
initializer=self.initializer,
state_is_tuple=True)
outputs, final_states = tf.nn.bidirectional_dynamic_rnn(
lstm_cell["forward"],
lstm_cell["backward"],
lstm_inputs,
dtype=tf.float32,
sequence_length=lengths)
return tf.concat(outputs, axis=2)
def project_layer(self, lstm_outputs, name=None):
"""
hidden layer between lstm layer and logits
:param lstm_outputs: [batch_size, num_steps, emb_size]
:return: [batch_size, num_steps, num_tags]
"""
with tf.variable_scope("project" if not name else name):
with tf.variable_scope("hidden"):
W = tf.compat.v1.get_variable("W", shape=[self.lstm_dim*2, self.lstm_dim],
dtype=tf.float32, initializer=self.initializer)
b = tf.compat.v1.get_variable("b", shape=[self.lstm_dim], dtype=tf.float32,
initializer=tf.zeros_initializer())
output = tf.reshape(lstm_outputs, shape=[-1, self.lstm_dim*2])
hidden = tf.tanh(tf.compat.v1.nn.xw_plus_b(output, W, b))
# project to score of tags
with tf.variable_scope("logits"):
W = tf.compat.v1.get_variable("W", shape=[self.lstm_dim, self.num_tags],
dtype=tf.float32, initializer=self.initializer)
b = tf.compat.v1.get_variable("b", shape=[self.num_tags], dtype=tf.float32,
initializer=tf.zeros_initializer())
pred = tf.compat.v1.nn.xw_plus_b(hidden, W, b)
return tf.reshape(pred, [-1, self.num_steps, self.num_tags])
def loss_layer(self, project_logits, lengths, name=None):
"""
calculate crf loss
:param project_logits: [1, num_steps, num_tags]
:return: scalar loss
"""
with tf.variable_scope("crf_loss" if not name else name):
small = -1000.0
# pad logits for crf loss
start_logits = tf.concat(
[small * tf.ones(shape=[self.batch_size, 1, self.num_tags]), tf.zeros(shape=[self.batch_size, 1, 1])], axis=-1)
pad_logits = tf.cast(small * tf.ones([self.batch_size, self.num_steps, 1]), tf.float32)
logits = tf.concat([project_logits, pad_logits], axis=-1)
logits = tf.concat([start_logits, logits], axis=1)
targets = tf.concat(
[tf.cast(self.num_tags*tf.ones([self.batch_size, 1]), tf.int32), self.targets], axis=-1)
self.trans = tf.compat.v1.get_variable(
"transitions",
shape=[self.num_tags + 1, self.num_tags + 1],
initializer=self.initializer)
log_likelihood, self.trans = crf_log_likelihood(
inputs=logits,
tag_indices=targets,
transition_params=self.trans,
sequence_lengths=lengths+1)
return tf.reduce_mean(-log_likelihood)
def create_feed_dict(self, is_train, batch):
"""
:param is_train: Flag, True for train batch
:param batch: list train/evaluate data
:return: structured data to feed
"""
_, segment_ids, chars, mask, tags = batch
feed_dict = {
self.input_ids: np.asarray(chars),
self.input_mask: np.asarray(mask),
self.segment_ids: np.asarray(segment_ids),
self.dropout: 1.0,
}
if is_train:
feed_dict[self.targets] = np.asarray(tags)
feed_dict[self.dropout] = self.config["dropout_keep"]
return feed_dict
def run_step(self, sess, is_train, batch):
"""
:param sess: session to run the batch
:param is_train: a flag indicate if it is a train batch
:param batch: a dict containing batch data
:return: batch result, loss of the batch or logits
"""
feed_dict = self.create_feed_dict(is_train, batch)
if is_train:
global_step, loss, _ = sess.run(
[self.global_step, self.loss, self.train_op],
feed_dict)
return global_step, loss
else:
lengths, logits = sess.run([self.lengths, self.logits], feed_dict)
return lengths, logits
def decode(self, logits, lengths, matrix):
"""
:param logits: [batch_size, num_steps, num_tags]float32, logits
:param lengths: [batch_size]int32, real length of each sequence
:param matrix: transaction matrix for inference
:return:
"""
# inference final labels usa viterbi Algorithm
paths = []
small = -1000.0
start = np.asarray([[small]*self.num_tags +[0]])
for score, length in zip(logits, lengths):
score = score[:length]
pad = small * np.ones([length, 1])
logits = np.concatenate([score, pad], axis=1)
logits = np.concatenate([start, logits], axis=0)
path, _ = viterbi_decode(logits, matrix)
paths.append(path[1:])
return paths
def evaluate(self, sess, data_manager, id_to_tag):
"""
:param sess: session to run the model
:param data: list of data
:param id_to_tag: index to tag name
:return: evaluate result
"""
results = []
trans = self.trans.eval()
for batch in data_manager.iter_batch():
strings = batch[0]
labels = batch[-1]
lengths, scores = self.run_step(sess, False, batch)
batch_paths = self.decode(scores, lengths, trans)
for i in range(len(strings)):
result = []
string = strings[i][:lengths[i]]
gold = [id_to_tag[int(x)] for x in labels[i][1:lengths[i]]]
pred = [id_to_tag[int(x)] for x in batch_paths[i][1:lengths[i]]]
for char, gold, pred in zip(string, gold, pred):
result.append(" ".join([char, gold, pred]))
results.append(result)
return results
def evaluate_lines(self, sess, inputs, id_to_tag):
tags = []
strings = inputs[0]
trans = self.trans.eval(sess)
lengths, scores = self.run_step(sess, False, inputs)
batch_paths = self.decode(scores, lengths, trans)
for i in range(len(strings)):
pred = [id_to_tag[int(x)] for x in batch_paths[i]]
tags.append(pred)
return bio_to_json_v1(strings, tags)
def evaluate_line(self, sess, inputs, id_to_tag):
trans = self.trans.eval(sess)
lengths, scores = self.run_step(sess, False, inputs)
batch_paths = self.decode(scores, lengths, trans)
tags = [id_to_tag[idx] for idx in batch_paths[0]]
return bio_to_json(inputs[0], tags[1:-1])
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/neal23333/albert_ner.git
git@gitee.com:neal23333/albert_ner.git
neal23333
albert_ner
albert_ner
master

搜索帮助