experiment_-prox-spider_-hd
/
Experiment _1.py

# I: improt
from sklearn.datasets import load_svmlight_file
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import Normalizer
import numpy as np
import matplotlib.pyplot as plt
import json_func

from SGD import ProxSGD
from SVRG import SVRG_nonsmooth
from VM_SVRG import VM_SVRG_nonsmooth
from ASVRG import ASVRG_nonsmooth
from RSAG import RSAG_nonsmooth
from Katyusha_ns import Katyusha_ns
from SPIDER import SpiderMED_nonsmooth
from SpiderBoost import SpdierBoost_nonsmooth
from SS_ProxHD import SS_ProxHD , SS_ProxHDM

from loss import logistic_loss_nonconvex_nonsmooth, logistic_loss_nonconvex_gradient, Robust_linear_regression_loss_nonsmooth, Robust_linear_regression_gradient
# Compare the performance of SSPHD with other algorithms

# II: load data
# dataset_ = ['a9a', 'ijcnn1', 'w8a', 'rcv1_train.binary', 'covtype.libsvm.binary', 'gisette_scale']
# dataset_ = ['a9a', 'ijcnn1', 'w8a', 'covtype.libsvm.binary', 'gisette_scale']
dataset_ = ['ijcnn1']
# dataset_ = ['a9a']

save_file = True
plot_fig = True
alg = "logistic"
# alg = "linear"
for dataset in dataset_:
    experiment_name = 'test_1_logistic_nonconvex_nonsmooth' + '_' + dataset
    opt = {}
    if dataset == 'a9a':
        X, Y = load_svmlight_file('data/a9a')
        n = X.shape[0]
        d = X.shape[1]
        # enc = OneHotEncoder()
        # Y = enc.fit_transform(Y.reshape(-1, 1))
        Y = [0 if e == -1 else e for e in Y]
        # c = Y.shape[1]
        max_iterations = 2400
        SGD_max_iterations = 2800
        opt['l1_weight'] = 0.01
        opt["batch_size"] = 256
        opt["tol_grad"] = -1
        im = int(0.1*n)
    elif dataset == 'w8a':
        X, Y = load_svmlight_file('data/w8a')
        n = X.shape[0]
        d = X.shape[1]
        # enc = OneHotEncoder()
        # Y = enc.fit_transform(Y.reshape(-1, 1))
        Y = [0 if e == -1 else e for e in Y]
        # c = Y.shape[1]
        max_iterations = 1600
        SGD_max_iterations = 2400
        opt['l1_weight'] = 0.01
        opt["batch_size"] = 256
        opt["tol_grad"] = -1
        im = int(0.05 * n)
    elif dataset == 'ijcnn1':
        X, Y = load_svmlight_file('data/ijcnn1')
        n = X.shape[0]
        d = X.shape[1]
        # enc = OneHotEncoder()
        # Y = enc.fit_transform(Y.reshape(-1, 1))
        Y = [0 if e == -1 else e for e in Y]
        # c = Y.shape[1]
        max_iterations = 3600
        SGD_max_iterations = 4500
        opt['l1_weight'] = 0.001
        opt["batch_size"] = 512
        opt["tol_grad"] = -1
        im = int(0.15 * n)
    elif dataset == 'covtype.libsvm.binary':
        X, Y = load_svmlight_file('data/covtype.libsvm.binary')
        n = X.shape[0]
        d = X.shape[1]
        # enc = OneHotEncoder()
        # Y = enc.fit_transform(Y.reshape(-1, 1))
        Y = [0 if e == 2 else e for e in Y]
        # c = Y.shape[1]
        max_iterations = 1600
        SGD_max_iterations = 2400
        opt['l1_weight'] = 0.00001
        opt["batch_size"] = 728
        opt["tol_grad"] = -1
        im = int(0.01 * n)
    elif dataset == 'gisette_scale':
        X, Y = load_svmlight_file('data/gisette_scale')
        n = X.shape[0]
        d = X.shape[1]
        # enc = OneHotEncoder()
        # Y = enc.fit_transform(Y.reshape(-1, 1))
        Y = [0 if e == -1 else e for e in Y]
        # c = Y.shape[1]
        max_iterations = 1600
        SGD_max_iterations = 2400
        opt['l1_weight'] = 0.1
        opt["batch_size"] = 256
        opt["tol_grad"] = -1
        im = int(0.2 * n)
    elif dataset == 'rcv1_train.binary':
        X, Y = load_svmlight_file('data/rcv1_train.binary')
        n = X.shape[0]
        d = X.shape[1]
        # enc = OneHotEncoder()
        # Y = enc.fit_transform(Y.reshape(-1, 1))
        Y = [0 if e == -1 else e for e in Y]
        # c = Y.shape[1]
        max_iterations = 1000
        SGD_max_iterations = 1500
        opt['l1_weight'] = 0.01
        opt["batch_size"] = 512
        opt["tol_grad"] = -1
        im = int(0.25 * n)
    '''
    min_max_scaler = MinMaxScaler()
    X = min_max_scaler.fit_transform(X.toarray())
    '''
    normalizer = Normalizer().fit(X.toarray())
    X = normalizer.transform(X.toarray())
    # X = X.toarray()
    # Y = Y.toarray()
    Y = np.array(Y)
    w = np.ones(d) * 2
    #w = np.ones(d) * 2
    # w = np.random.normal(0, 1, d)


    # III: loss function
    if alg == "logistic":
        loss_function = logistic_loss_nonconvex_nonsmooth
        gradient = logistic_loss_nonconvex_gradient
    elif alg == "linear":
        loss_function = Robust_linear_regression_loss_nonsmooth
        gradient = Robust_linear_regression_gradient

    # IV: algortihms
    opt["alpha"] = 0.1 # nonconvex regularizer
    opt["tau"] = 1e-10  # small term for log function in case of log(0)
    opt["inner_loop_m"] = int(n / opt["batch_size"])
    opt["eta"] = 0.05   # step_size
    opt["nonsmooth"] = True
    opt['momentum'] = 10

    opt['max_iterations'] = max_iterations


    flag_ProxSGD = True
    flag_ProxSVRG = False
    flag_VM_SVRG = True
    flag_Katyusha_ns = False
    flag_ASVRG = False
    flag_RSAG = True

    flag_ProxSpiderBoost = False
    flag_SpiderMED = False

    flag_SS_ProxHD = True
    flag_SS_ProxHDM = True

    # save Data
    loss = []
    grads = []
    samples = []
    times = []
    list_params = []

    # opt["inner_loop_m"] =  int(2*n/opt["batch_size"])
    opt["inner_loop_m"] = int(n / opt["batch_size"])

    if flag_ProxSGD == True:
        opt['max_iterations'] = SGD_max_iterations
        # opt["inner_loop_m"] = 128
        # opt["batch_size"] = 256
        (_loss, _grad, _sample, _time) = ProxSGD(w, X, Y, loss_function, gradient, **opt)
        list_params.append('SGD')
        loss.append(_loss)
        grads.append(_grad)
        _sample = [e / n for e in _sample]
        samples.append(_sample)
        times.append(_time)
        opt['max_iterations'] = max_iterations
    if flag_ProxSVRG == True:
        (_loss, _grad, _sample, _time) = SVRG_nonsmooth(w, X, Y, loss_function, gradient, **opt)
        list_params.append('SVRG')
        loss.append(_loss)
        grads.append(_grad)
        _sample = [e / n for e in _sample]
        samples.append(_sample)
        times.append(_time)

    if flag_VM_SVRG == True:
        (_loss, _grad, _sample, _time) = VM_SVRG_nonsmooth(w, X, Y, loss_function, gradient,im, **opt)
        list_params.append('VM SVRG')
        loss.append(_loss)
        grads.append(_grad)
        _sample = [e / n for e in _sample]
        samples.append(_sample)
        times.append(_time)

    if flag_Katyusha_ns == True:
        # opt["inner_loop_m"] = 128
        # opt["batch_size"] = 256
        (_loss, _grad, _sample, _time) = Katyusha_ns(w, X, Y, loss_function, gradient, **opt)
        list_params.append('Katyusha_ns')
        loss.append(_loss)
        grads.append(_grad)
        _sample = [e / n for e in _sample]
        samples.append(_sample)
        times.append(_time)

    if flag_ASVRG == True:
        # opt["inner_loop_m"] = 128
        # opt["batch_size"] = 256
        (_loss, _grad, _sample, _time) = ASVRG_nonsmooth(w, X, Y, loss_function, gradient, **opt)
        list_params.append('ASVRG')
        loss.append(_loss)
        grads.append(_grad)
        _sample = [e / n for e in _sample]
        samples.append(_sample)
        times.append(_time)
    if flag_RSAG == True:
        opt['max_iterations'] = SGD_max_iterations
        (_loss, _grad, _sample, _time) = RSAG_nonsmooth(w, X, Y, loss_function, gradient, **opt)
        list_params.append('RSAG')
        loss.append(_loss)
        grads.append(_grad)
        _sample = [e / n for e in _sample]
        samples.append(_sample)
        times.append(_time)
        opt['max_iterations'] = max_iterations
    if flag_ProxSpiderBoost == True:
        # opt["inner_loop_m"] = 128
        # opt["batch_size"] = 256
        (_loss, _grad, _sample, _time) = SpdierBoost_nonsmooth(w, X, Y, loss_function, gradient, **opt)
        list_params.append('SpiderBoost')
        loss.append(_loss)
        grads.append(_grad)
        _sample = [e / n for e in _sample]
        samples.append(_sample)
        times.append(_time)

    if flag_SpiderMED == True:
        # opt["inner_loop_m"] = 128
        # opt["batch_size"] = 256
        (_loss, _grad, _sample, _time) = SpiderMED_nonsmooth(w, X, Y, loss_function, gradient, **opt)
        list_params.append('SpiderBoost-M')
        loss.append(_loss)
        grads.append(_grad)
        _sample = [e / n for e in _sample]
        samples.append(_sample)
        times.append(_time)

    if flag_SS_ProxHD == True:
        # opt["inner_loop_m"] = 128
        # opt["batch_size"] = 256
        (_loss, _grad, _sample, _time) = SS_ProxHD(w, X, Y, loss_function, gradient, **opt)
        list_params.append('SS_ProxHD')
        loss.append(_loss)
        grads.append(_grad)
        _sample = [e / n for e in _sample]
        samples.append(_sample)
        times.append(_time)
    #带有动量项
    if flag_SS_ProxHDM == True:
        # opt["inner_loop_m"] = 128
        # opt["batch_size"] = 256
        (_loss, _grad, _sample, _time) = SS_ProxHDM(w, X, Y, loss_function, gradient, **opt)
        list_params.append('SS_ProxHDM')
        loss.append(_loss)
        grads.append(_grad)
        _sample = [e / n for e in _sample]
        samples.append(_sample)
        times.append(_time)

    # save data in json
    if save_file == True:
        file_path = "result/test_1/json/"
        for i in range(len(list_params)):
            file_name = f'{alg}_{dataset}_{list_params[i]}_eta{opt["eta"]}_m{opt["inner_loop_m"]}_b{opt["batch_size"]}_L{opt["l1_weight"]}.json'
            json_data = {}
            json_data['loss'] = loss[i]
            json_data['grads'] = grads[i]
            json_data['samples'] = samples[i]
            json_data['times'] = times[i]
            json_data['name'] = list_params[i]
            json_func.save_json(file_path, file_name, json_data)


    # plot
    if plot_fig == True:

        colors = ['#1B2631', '#C0392B', '#9B59B6', 'Maroon', '#1E8449', '#0343DF', '#E67E22', '#95A5A6', '#FF97F2',
                  '#34495E']
        # markers = ['s', '8', '>', '<', 'P', '*', 'd', 'X']
        markers = [None, None, None, None, None, None, None, None, '*', 'd', 'X']
        linestyles = ['-.', '-', '-', '-.', '-', '-', '-', '-', '-', '-', '-', '-', '-']

        fig = plt.figure()
        plt.yscale('log')

        for i in range(len(list_params)):
            plt.plot(samples[i], grads[i], linestyles[i % 10], marker=markers[i % 10], color=colors[i % 10], linewidth=2,
                     markersize=12)
        plt.legend(list_params, fontsize=11, loc='best')
        plt.title(dataset)
        plt.ylabel("grad (log)", fontsize=18)
        plt.xlabel("number of epochs", fontsize=18)
        plt.show()
        #fig.savefig(f'result/test_1/{experiment_name}_Grad_eta{opt["eta"]}_m{opt["inner_loop_m"]}_b{opt["batch_size"]}_L{opt["l1_weight"]}.pdf', format='pdf', dpi=1000)
        # fig.savefig(f'result/test_1/{experiment_name}_Grad_eta{opt["eta"]}_m{opt["inner_loop_m"]}_b{opt["batch_size"]}_L{opt["l1_weight"]}.png', bbox_inches='tight', dpi=1000)


        # plot loss
        fig = plt.figure()
        plt.yscale('log')

        min_loss = loss[0][0]
        small_term = 1e-10
        for i in range(len(list_params)):
            for e in loss[i]:
                if e < min_loss:
                    min_loss = e
        min_loss = min_loss - small_term
        # min_loss = 0
        for i in range(len(list_params)):
            temp = [e - min_loss for e in loss[i]]
            loss[i] = temp
        for i in range(len(list_params)):
            plt.plot(samples[i], loss[i], linestyles[i % 10], marker=markers[i % 10], color=colors[i % 10], linewidth=2,
                     markersize=12)
        plt.legend(list_params, fontsize=11, loc='best')
        plt.title(dataset)
        plt.ylabel("$f-f^*$(log)", fontsize=18)
        plt.xlabel("number of epochs", fontsize=18)
        plt.show()
        #fig.savefig(f'result/test_1/{experiment_name}_Loss_eta{opt["eta"]}_m{opt["inner_loop_m"]}_b{opt["batch_size"]}_L{opt["l1_weight"]}.pdf', format='pdf', dpi=1000)
        # fig.savefig(f'result/test_1/{experiment_name}_Loss_eta{opt["eta"]}_m{opt["inner_loop_m"]}_b{opt["batch_size"]}_L{opt["l1_weight"]}.png', bbox_inches='tight', dpi=1000)