master

分支 (1)

管理

管理

master

R_conll2003
/
Bi_LSTM_Model.py

import pickle

import keras
import numpy as np
import pandas as pd
import keras.backend as K
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, Bidirectional, LSTM, Dense, TimeDistributed, Dropout, Masking
from keras.utils import np_utils
import pydot
from keras_contrib.layers.crf import CRF
from matplotlib import pyplot as plt
from sklearn.metrics import classification_report, f1_score, confusion_matrix

from AttentionDecoder import AttentionDecoder
from load_data import CONSTANTS, load_data, data_processing, BASE_DIR


def input_data_for_model(input_shape,path):

    # 数据导入
    input_data = load_data(path)
    # 数据处理
    data_processing()
    # 导入字典
    with open(CONSTANTS[1], 'rb') as f:
        word_dictionary = pickle.load(f)
    with open(CONSTANTS[2], 'rb') as f:
        inverse_word_dictionary = pickle.load(f)
    with open(CONSTANTS[3], 'rb') as f:
        label_dictionary = pickle.load(f)
    with open(CONSTANTS[4], 'rb') as f:
        output_dictionary = pickle.load(f)
    vocab_size = len(word_dictionary.keys())
    label_size = len(label_dictionary.keys())

    # 处理输入数据
    aggregate_function = lambda input: [(word, label) for word,label in
                                            zip(input['word'].values.tolist(),
                                                input['tag'].values.tolist())]

    grouped_input_data = input_data.groupby('sent_no').apply(aggregate_function)
    sentences = [sentence for sentence in grouped_input_data]

    x = [[word_dictionary[word[0]] for word in sent] for sent in sentences]
    x = pad_sequences(maxlen=input_shape, sequences=x, padding='post', value=0)
    y = [[label_dictionary[word[1]] for word in sent] for sent in sentences]
    y = pad_sequences(maxlen=input_shape, sequences=y, padding='post', value=0)
    y = [np_utils.to_categorical(label, num_classes=label_size+1) for label in y]
    return x, y, output_dictionary, vocab_size, label_size, inverse_word_dictionary

def input_data_for_model(input_shape):

    # 数据导入
    input_data = load_data()
    # 数据处理
    data_processing()
    # 导入字典
    with open(CONSTANTS[1], 'rb') as f:
        word_dictionary = pickle.load(f)
    with open(CONSTANTS[2], 'rb') as f:
        inverse_word_dictionary = pickle.load(f)
    with open(CONSTANTS[3], 'rb') as f:
        label_dictionary = pickle.load(f)
    with open(CONSTANTS[4], 'rb') as f:
        output_dictionary = pickle.load(f)
    vocab_size = len(word_dictionary.keys())
    label_size = len(label_dictionary.keys())

    # 处理输入数据
    aggregate_function = lambda input: [(word, label) for word,label in
                                            zip(input['word'].values.tolist(),
                                                input['tag'].values.tolist())]

    grouped_input_data = input_data.groupby('sent_no').apply(aggregate_function)
    sentences = [sentence for sentence in grouped_input_data]

    x = [[word_dictionary[word[0]] for word in sent] for sent in sentences]
    x = pad_sequences(maxlen=input_shape, sequences=x, padding='post', value=0)
    y = [[label_dictionary[word[1]] for word in sent] for sent in sentences]
    y = pad_sequences(maxlen=input_shape, sequences=y, padding='post', value=0)
    y = [np_utils.to_categorical(label, num_classes=label_size+1) for label in y]
    return x, y, output_dictionary, vocab_size, label_size, inverse_word_dictionary

def draw_train(history):
        # Plot training & validation accuracy values
        # plt.plot(history.history['crf_viterbi_accuracy'],'r-')
        # plt.plot(history.history['val_crf_viterbi_accuracy'],'b:')
        plt.plot(history.history['accuracy'],'r-')
        plt.plot(history.history['val_accuracy'],'b:')
        plt.title('Model accuracy')
        plt.ylabel('Accuracy')
        plt.xlabel('Epoch')
        plt.legend(['Train','Test'], loc='upper left')
        plt.show()
        # Plot training & validation loss values
        plt.plot(history.history['loss'],'r-')
        plt.plot(history.history['val_loss'],'b:')
        plt.title('Model loss')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.legend(['Train','Test'], loc='upper left')
        plt.show()

# # 定义深度学习模型：Bi-LSTM
def create_Bi_LSTM(vocab_size, label_size, input_shape, output_dim, n_units, out_act, activation):
    model = Sequential()
    model.add(Embedding(input_dim=vocab_size + 1,
                        output_dim=output_dim,
                        input_length=input_shape,
                        trainable=True,
                        mask_zero=True))
    model.add(Bidirectional(LSTM(units=128, activation=activation,
                                 return_sequences=True)))
    model.add(TimeDistributed(Dense(512, activation='relu')))
    model.add(Dropout(0.5))
    model.add(TimeDistributed(Dense(512, activation='relu')))
    model.add(Bidirectional(LSTM(units=128, activation=activation,
                                 return_sequences=True)))

    model.add(TimeDistributed(Dense(label_size+1, activation=out_act)))
    keras.optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
    model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])
    # crf_layer = CRF(label_size+1, sparse_target=True)
    # model.add(crf_layer)
    # model.compile('adam', loss=crf_layer.loss_function, metrics=[crf_layer.accuracy])
    keras.utils.plot_model(model, 'LSTM.png', show_shapes=True)
    model.summary()
    return model


# 模型训练
def model_train():
    # 将数据集分为训练集和测试集，占比为9:1
    input_shape = 40
    x, y, output_dictionary, vocab_size, label_size, inverse_word_dictionary = input_data_for_model(input_shape)
    train_end = int(len(x) * 0.9)
    train_x, train_y = x[0:train_end], np.array(y[0:train_end])
    test_x, test_y = x[train_end:], np.array(y[train_end:])    # 模型输入参数

    activation = 'selu'
    out_act = 'softmax'
    n_units = 128
    batch_size = 186
    epochs = 10
    output_dim = 128
    # 模型训练
    lstm_model = create_Bi_LSTM(vocab_size, label_size, input_shape, output_dim, n_units, out_act, activation)
    history=lstm_model.fit(train_x, train_y,
                           validation_split=0.2,
                           epochs=epochs,
                           batch_size=batch_size,
                           verbose=1
                           )
    print(history.history)
    draw_train(history)
    # 模型保存
    # model_save_path = CONSTANTS[0]
    # lstm_model.save(model_save_path)
    keras.utils.plot_model(lstm_model, 'LSTM.png', show_shapes=True)

    N = test_x.shape[0]
    avg_accuracy = 0  # 预测的平均准确率
    inpu=[]
    outpu=[]
    for start, end in zip(range(0, N, 1), range(1, N + 1, 1)):
        y_predict = lstm_model.predict(test_x[start:end])
        input_sequences, output_sequences = [], []
        eval = lstm_model.evaluate(test_x[start:end], test_y[start:end])
        avg_accuracy += eval[1]
        print('Test Accuracy: loss = %0.6f accuracy = %0.2f%%' % (eval[0], eval[1] * 100))
        for i in range(0, len(y_predict[0])):
            output_sequences.append(np.argmax(y_predict[0][i]))
            input_sequences.append(np.argmax(test_y[start][i]))
        for i in range(len(input_sequences)):
            if(input_sequences[i]==0):
                break
            inpu.append(input_sequences[i])
            outpu.append(output_sequences[i])
    cm2 = confusion_matrix(inpu,outpu)
    print("混淆矩阵")
    print(cm2)
    print("分类指标情况")
    print(classification_report(inpu, outpu))
    print("micro f1:")
    print(f1_score(inpu, outpu, average='micro'))
    print("macro f1:")
    print(f1_score(inpu, outpu, average='macro'))
    print("weighted f1:")
    print(f1_score(inpu, outpu,  average='weighted'))


def model_test():
    # 导入字典
    with open(CONSTANTS[1], 'rb') as f:
        word_dictionary = pickle.load(f)
    with open(CONSTANTS[4], 'rb') as f:
        output_dictionary = pickle.load(f)

    x, y, output_dictionary, vocab_size, label_size, inverse_word_dictionary=input_data_for_model(40,"D:\\conll\\data\\CoNLL-2003\\eng.testa")
    model_save_path = CONSTANTS[0]
    lstm_model = load_model(model_save_path)
    y_predict=lstm_model.predict(x)


if __name__ == '__main__':
    model_train()