Buzz
/
model_upgraded.py

from math import factorial, pi, pow, log

import numpy as np
import tensorflow as tf

from preprocess import standardize_kpi, complete_timestamp

tf.compat.v1.disable_eager_execution()

tf.compat.v1.reset_default_graph()


class Buzz():
    def __init__(self, z_dim):
        self._z_dim = z_dim

    @property
    def z_dim(self):
        return self._z_dim

    # `x\in [-10, 10]`
    def build_q_net(self, x, outdimension=64):
        # x->z
        with tf.compat.v1.variable_scope('build_q_net', reuse=tf.compat.v1.AUTO_REUSE):
            lz_x = tf.reshape(x, [-1, 8, 16, 1])

            # 4 conv layers
            lz_x = conv(lz_x, outdimension * 4, 'conv1', stride_size=2, activation_func=tf.nn.leaky_relu, )
            lz_x = conv(lz_x, outdimension * 2, 'conv2', activation_func=tf.nn.leaky_relu)
            lz_x = conv(lz_x, outdimension * 2, 'conv3', stride_size=2, activation_func=tf.nn.leaky_relu)
            lz_x = conv(lz_x, outdimension, 'conv4', activation_func=tf.nn.leaky_relu)
            # get z_mean and z_std by fully connection layer
            # shape [batch_size, z_dimension]
            with tf.compat.v1.name_scope('ZMEAN'):
                z_mean = fc(lz_x, self._z_dim, 'z_mean', activation_func=None)
                # z_mean = tf.layers.batch_normalization(z_mean)
            with tf.compat.v1.name_scope('ZSTD'):
                z_std = fc(lz_x, self._z_dim, 'z_std', activation_func=tf.nn.softplus)
            # shape [batch_size, z_dimension]

            VVarList = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, 'build_q_net')

            return z_mean, z_std, VVarList

    def genModel(self, z, in_dimension=64, output_num=128):
        with tf.compat.v1.variable_scope('genModel', reuse=tf.compat.v1.AUTO_REUSE):
            batch_size = z.get_shape().as_list()[0]
            # fully connection layer
            lx_z = fc(z, 512, 'fc', activation_func=None)
            # reshape and deconv layers
            lx_z = tf.reshape(lx_z, [-1, 2, 4, in_dimension])

            lx_z = deconv(lx_z, in_dimension * 2, 'deconv1', activation_func=tf.nn.leaky_relu)
            lx_z = deconv(lx_z, in_dimension * 2, 'deconv2', stride_size=2, activation_func=tf.nn.leaky_relu)
            lx_z = deconv(lx_z, in_dimension * 4, 'deconv3', activation_func=tf.nn.leaky_relu)
            lx_z = deconv(lx_z, 1, 'deconv4', stride_size=2, activation_func=tf.nn.leaky_relu)

            resShape = lx_z.get_shape().as_list()
            lx_z = tf.reshape(lx_z, [batch_size, resShape[1] * resShape[2] * resShape[3]])

            lx_z = fc(lx_z, output_num, 'deconvOut', activation_func=None)
            # lx_z = tf.clip_by_value(lx_z, -10, 10)

            GVarList = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, 'genModel')

            return lx_z, GVarList

    # chain the generative model and q_net
    def autoEncoder(self, x, outDimension=64):
        with tf.compat.v1.name_scope('Variational_Net'):
            mu, sigma, VVarList = self.build_q_net(x, outDimension)

        with tf.compat.v1.name_scope('Z_GenNetInput'):
            z = mu + sigma * tf.random.normal(tf.shape(mu), 0, 1, dtype=tf.float32)

        with tf.compat.v1.name_scope('GenerationNet'):
            Xhat, GVarList = self.genModel(z, outDimension)
        with tf.compat.v1.name_scope('KL_divergence'):
            negKL = 0.5 * tf.reduce_mean(tf.reduce_sum(1 + tf.math.log(1e-8 + tf.square(sigma)) - \
                                                       tf.square(mu) - tf.square(sigma), 1))

        # shape [batch_size, window_size] , 1
        return Xhat, negKL, VVarList, GVarList

    def discNet(self, x, outdimension=32):
        with tf.compat.v1.variable_scope('discNet', reuse=tf.compat.v1.AUTO_REUSE):
            lz_x = tf.reshape(x, [-1, 8, 16, 1])
            # 4 conv layers
            lz_x = conv(lz_x, outdimension * 4, 'conv1', stride_size=2, activation_func=tf.nn.leaky_relu)
            lz_x = conv(lz_x, outdimension * 2, 'conv2', activation_func=tf.nn.leaky_relu)
            lz_x = conv(lz_x, outdimension * 2, 'conv3', stride_size=2, activation_func=tf.nn.leaky_relu)
            lz_x = conv(lz_x, outdimension, 'conv4', activation_func=tf.nn.leaky_relu)
            dis = fc(lz_x, 1, 'dis', activation_func=None)

            DVarList = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, 'discNet')

            # shape [batch_size, 1]
            return dis, DVarList

    def getLoss(self, x, eta, window_size=128):
        Xhat, negKL, VVarList, GVarList = self.autoEncoder(x)

        #        with tf.variable_scope('DisX'):
        with tf.compat.v1.name_scope('DisX'):
            dis1, DVarList = self.discNet(x)
        #        with tf.variable_scope('DisGenZ'):
        with tf.compat.v1.name_scope('DisGenZ'):
            dis2, DVarList = self.discNet(Xhat)

        with tf.compat.v1.name_scope('wgan_loss'):
            wgan_loss = tf.reduce_mean(dis1 - dis2)  # real - fake

        # get gradient penalty
        batch_size = x.get_shape().as_list()[0]
        kxi = tf.random.uniform(shape=tf.shape(Xhat))
        with tf.compat.v1.name_scope('hatX'):
            Xnew = kxi * (x - Xhat) + Xhat
        with tf.compat.v1.name_scope('get_gradient_penalty'):
            disres, _ = self.discNet(Xnew)
            gradients = tf.gradients(disres, [Xnew])[0]
            # gradients = tf.clip_by_value(gradients, -10, 10)

            slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=1))
            gradientPenalty = tf.reduce_mean((slopes - 1.) ** 2)  # shape (1)

        # vloss = |wgan_loss| - eta * GP
        with tf.compat.v1.variable_scope('vloss'):
            vloss = tf.math.abs(wgan_loss) - eta * gradientPenalty

        # wloss = -la * |wgan_loss| - KL - logZ
        with tf.compat.v1.variable_scope('omegaLoss', reuse=tf.compat.v1.AUTO_REUSE):
            la = tf.compat.v1.get_variable('lambda', shape=(), initializer=tf.compat.v1.constant_initializer(0.5))
            la = tf.clip_by_value(la, 1e-3, 10)
            logZ = log(factorial(window_size) / factorial(int(window_size / 2)) * 2 * pow(pi, window_size / 2)) \
                   - window_size * tf.math.log(la)
            Wloss = -la * tf.math.abs(wgan_loss) + negKL - logZ

        return Xhat, negKL, wgan_loss, gradientPenalty, -vloss, -Wloss, logZ, la, VVarList, GVarList, DVarList


# conv layer
def conv(inputs, output_num, name, kernel_size=5, stride_size=1, init_bias=0.0, conv_padding='SAME', stddev=0.1,
         activation_func=tf.nn.relu):
    with tf.compat.v1.variable_scope(name):
        input_size = inputs.get_shape().as_list()[-1]
        # construct kernal and bias
        conv_weights = tf.compat.v1.get_variable('weights', [kernel_size, kernel_size, input_size, output_num],
                                                 dtype=tf.float32,
                                                 initializer=tf.compat.v1.keras.initializers.VarianceScaling(scale=1.0,
                                                                                                             mode="fan_avg",
                                                                                                             distribution="uniform"))
        conv_biases = tf.compat.v1.get_variable('biases', [output_num], dtype=tf.float32,
                                                initializer=tf.compat.v1.initializers.random_normal(stddev=stddev))

        # conv and add bias
        conv_layer = tf.nn.conv2d(inputs, filters=conv_weights, strides=[1, stride_size, stride_size, 1],
                                  padding=conv_padding)
        conv_layer = tf.nn.bias_add(conv_layer, conv_biases)

        # if batch_normalization:
        #     conv_layer = tf.contrib.layers.batch_norm(conv_layer)

        # activation function
        if activation_func:
            conv_layer = activation_func(conv_layer)
        return conv_layer


def deconv(inputs, output_num, name, kernal_size=5, stride_size=1, deconv_padding='SAME',
           std_dev=0.1, activation_func=tf.nn.relu):
    with tf.compat.v1.variable_scope(name):
        deconv_layer = tf.compat.v1.layers.conv2d_transpose(inputs, output_num, kernal_size, stride_size,
                                                            padding=deconv_padding, name=name)
        deconv_bias = tf.compat.v1.get_variable('biases', [output_num], dtype=tf.float32,
                                                initializer=tf.compat.v1.initializers.random_normal(stddev=std_dev))
        deconv_layer = tf.nn.bias_add(deconv_layer, deconv_bias)

        # if batch_normalization:
        #     deconv_layer = tf.contrib.layers.batch_norm(deconv_layer)

        if activation_func:
            deconv_layer = activation_func(deconv_layer)
        return deconv_layer


# local response normalization
def lrn(inputs, depth_radius=2, alpha=0.0001, beta=0.75, bias=1.0):
    return tf.nn.local_response_normalization(inputs, depth_radius=depth_radius, alpha=alpha, beta=beta, bias=bias)


def fc(inputs, output_size, name, activation_func=tf.nn.relu, stddev=0.1):
    input_shape = inputs.get_shape().as_list()
    with tf.compat.v1.variable_scope(name):
        # flatten the inputs and construct weight and bias
        if len(input_shape) == 4:
            fc_weights = tf.compat.v1.get_variable('weights',
                                                   [input_shape[1] * input_shape[2] * input_shape[3], output_size],
                                                   dtype=tf.float32,
                                                   initializer=tf.compat.v1.initializers.random_normal(stddev=stddev))
            inputs = tf.reshape(inputs, [input_shape[0], fc_weights.get_shape().as_list()[0]])
        else:
            fc_weights = tf.compat.v1.get_variable('weights', [input_shape[-1], output_size], dtype=tf.float32,
                                                   initializer=tf.compat.v1.initializers.random_normal(stddev=stddev))

        fc_biases = tf.compat.v1.get_variable('biases', [output_size], dtype=tf.float32,
                                              initializer=tf.compat.v1.initializers.random_normal(stddev=stddev))
        fc_layer = tf.matmul(inputs, fc_weights)
        fc_layer = tf.nn.bias_add(fc_layer, fc_biases)

        # if batch_normalization:
        #     fc_layer = tf.contrib.layers.batch_norm(fc_layer)

        # activate fully connection layer
        if activation_func:
            fc_layer = activation_func(fc_layer)
        return fc_layer


def readData(fileName):
    with open(fileName) as fileObj:
        next(fileObj)
        timestamp = []
        values = []
        labels = []
        for line in fileObj.readlines():
            tmpList = line.split(',')
            timestamp.append(int(tmpList[0]))
            values.append(float(tmpList[1]))
            labels.append(int(tmpList[2]))
        return timestamp, values, labels


if __name__ == "__main__":

    '''
    parameters
    '''
    z_dimension = 13
    eta = 10  # gp weight
    window_size = 128
    n_critic = 3  # number of critic iterations
    s0 = 32  # initial neighborhood size
    b0 = 8  # initial batch size
    # parameters for adam optimizer
    alpha0 = 1e-3
    beta1 = 0.9
    beta2 = 0.999

    # get Data
    fileName = './cpu4.csv'
    timestamp, values, labels = readData(fileName)

    #    labels = np.zeros_like(values, dtype=np.int32)
    # complete the timestamp and obtain the missing point indicators

    timestamp, missing, (values, labels) = complete_timestamp(timestamp, (values, labels))

    # split data into training and testing data
    test_portion = 0.3
    test_n = int(len(values) * test_portion)

    train_values, test_values = values[:-test_n], values[-test_n:]
    train_labels, test_labels = labels[:-test_n], labels[-test_n:]
    train_missing, test_missing = missing[:-test_n], missing[-test_n:]

    # standardize the training and testing data and clip the data  [-10, 10]
    traibyn_values, mean, std = standardize_kpi(
        train_values, excludes=np.logical_or(train_labels, train_missing))

    test_values, _, _ = standardize_kpi(test_values, mean=mean, std=std)

    # build computation graph
    x = tf.compat.v1.placeholder(tf.float32, shape=[s0 * b0, window_size], name='inputData')
    DisOptimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=alpha0, beta1=beta1, beta2=beta2)
    GenOptimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=alpha0, beta1=beta1, beta2=beta2)

    model = Buzz(z_dimension)
    Xhat, negKL, wgan_loss, gradientPenalty, vloss, Wloss, logZ, la, VVarList, GVarList, DVarList = model.getLoss(x,
                                                                                                                  eta)

    VVarList.extend(GVarList)
    VVarList.append(la)

    Dis_loss = DisOptimizer.minimize(vloss, var_list=VVarList)
    Gen_loss = GenOptimizer.minimize(Wloss, var_list=DVarList)

    trainDataLen = len(train_values)
    # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.compat.v1.Session(config=config)
    # sess = tf.Session()

    sess.run(tf.compat.v1.global_variables_initializer())
    #    writer = tf.summary.FileWriter('./logs', sess.graph)
    # sess = tf_debug.LocalCLIDebugWrapperSession(sess=sess)

    epoch = 0
    while s0 > 0:
        # shuffle the data and select omega
        stopNum = (trainDataLen - window_size) // s0  # remove the first window size
        NC = [window_size + i * s0 for i in range(stopNum)]
        np.random.shuffle(NC)
        # print(len(NC))
        # print(stopNum)
        for critic in range(stopNum // b0):

            selectedOmega = NC[b0 * (critic):b0 * (critic + 1)]

            # get input data
            inputs = []
            for i in range(b0):
                for j in range(s0):
                    inputs.append(train_values[selectedOmega[i] + j - window_size + 1: selectedOmega[
                                                                                           i] + j + 1])  # [bs, window_size]

            # print(np.shape(inputs))
            if critic % 4 == 3:
                _, GenLoss, WL, GP, _KL, LZ, barX, LAM = sess.run(
                    [Gen_loss, Wloss, wgan_loss, gradientPenalty, negKL, logZ, Xhat, la], feed_dict={x: inputs})
                print('epoch: {} GenLoss: {} s0: {}  b0: {}, wgan_loss: {}, GP: {}, -KL:{}, LZ:{}, LAM:{}'.format(epoch,
                                                                                                                  GenLoss,
                                                                                                                  s0,
                                                                                                                  b0,
                                                                                                                  WL,
                                                                                                                  GP,
                                                                                                                  _KL,
                                                                                                                  LZ,
                                                                                                                  LAM))
            else:
                _, DisLoss, WL, GP, barX, LAM = sess.run([Dis_loss, vloss, wgan_loss, gradientPenalty, Xhat, la],
                                                         feed_dict={x: inputs})
                # print(
                #     'epoch: {} DisLoss: {} s0: {}  b0: {}, wgan_loss: {}, GP: {}, LAM:{}'.format(epoch, DisLoss, s0, b0,
                #                                                                                  WL, GP, LAM))
                # print(barX)
        epoch += 1

        # print('epoch: {}'.format(epoch))
        print('epoch: {} GenLoss: {}  DisLoss: {} s0: {}  b0: {}'.format(epoch, GenLoss, DisLoss, s0, b0))

        if epoch % 40 == 0:
            s0 = s0 // 2
            b0 = b0 * 2