代码拉取完成,页面将自动刷新
同步操作将从 openKylin/cybersectookits 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers
from tensorflow.keras import models
import numpy as np
import time
import os
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import classification_report
from tensorflow.python.keras.backend import dropout
import itertools
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras import regularizers
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
#gpu_id = '0,1'
#os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id)
os.environ['CUDA_VISIBLE_DEVICES'] = "-1"
#os.system('echo $CUDA_VISIBLE_DEVICES')
tf_config = tf.compat.v1.ConfigProto()
tf_config.gpu_options.allow_growth = True
tf.compat.v1.Session(config=tf_config)
path = './'
font = {'family' : 'serif',
'color' : 'darkred',
'weight' : 'normal',
'size' : 14,
}
font_text = {'family' : 'serif',
'color' : 'darkred',
'weight' : 'normal',
'size' : 8,
}#'backgroundflow',
#用户生成混淆矩阵
matraix_list = ['non-VPN', 'VPN']
def LeNet5(train_x,test_x,train_y,test_y):
model = models.Sequential()
model.add(layers.Conv2D(10, (10, 10), strides=(5, 5), padding='SAME', activation='relu',
input_shape=(1500, 1500, 1))) # padding='SAME', 应该是不填充,因为大小变成了300*300
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(20, (10, 10), strides=(5, 5), padding='SAME', activation='relu'))
model.add(layers.Dropout(0.25))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten()) # 扁平化层,将特征图变成一维层
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(2, activation='softmax'))
#model.add(layers.Dense(2, activation='Sigmoid'))
model.summary()#输出网络结构
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy']) # 使用默认的优化参数:(α = 0.001, β1 = 0.9, β2 = 0.999, ϵ = 10−8)
#train_y = tf.squeeze(train_y)
#test_y = tf.squeeze(test_y)
#train_yy = tf.one_hot(train_y, depth=2)
#test_yy = tf.one_hot(test_y, depth=2)
train_yy = to_categorical(train_y,2)
test_yy = to_categorical(test_y,2)
# print(train_yy)
t1 = time.time()
history = model.fit(train_x,
train_yy,
epochs=4,
batch_size=4,
validation_data=(test_x, test_yy)
)
# test_loss, test_acc = model.evaluate(test_images, test_labels)
# print(test_loss)
time_train = time.time() - t1
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'r', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.savefig(os.path.join(path,'total_CNN_accuracy_Epochs.eps'),dpi=300,format='eps')
plt.savefig(os.path.join(path,'total_CNN_accuracy_Epochs.png'))
plt.show()
#绘制训练损失和验证损失
plt.clf()
loss = history.history['loss']
val_loss = history.history['val_loss']
#epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
#plt.savefig(os.path.join(path,'total_CNN_loss_Epochs.eps'),dpi=300,format='eps')
plt.savefig(os.path.join(path,'total_CNN_loss_Epochs.png'))
plt.show()
t1 = time.time()
# out_pre = model.predict_classes(test_x)
out_pre0 = model.predict(test_x)
out_pre = np.argmax(out_pre0,axis=1) #change
time_test = time.time() - t1
file = open(os.path.join(path,'CNNv1_time.txt'),'w')
file.writelines('train_time:'+str(round(time_train,2)))
file.writelines('\ntest_time:'+str(round(time_test,2)))
file.close()
score(test_y,out_pre,history,'CNNv1')
def score(test_y,out_pre,history,classification): #代表分类器名称的str
df_reindex = pd.read_csv(os.path.join(path, 'reindex.csv'), encoding='gb2312')
df_data = pd.DataFrame(history.history)
precision, recall,f1_score,_ = metrics.precision_recall_fscore_support(test_y, out_pre,average = 'weighted')
print('Precision:{:.4f}'.format(precision),'Recall:{:.4f}'.format(recall),'f1_score:{:.4f}'.format(f1_score))
df_data.loc['score',0:3]= [precision, recall,f1_score]
df_data.to_csv(os.path.join(path,classification+'_score.csv'))
my_confusion_matrix = metrics.confusion_matrix(test_y,out_pre)
plt.figure(figsize=(10, 8), dpi=120)
plot_confusion_matrix(my_confusion_matrix,classes=matraix_list,normalize=True)
#plt.figure(figsize=(10, 8), dpi=120)
#plot_confusion_matrix(my_confusion_matrix, classes=list_y, normalize=True)
plt.savefig(os.path.join(path,classification+'_total_matrix.eps'),dpi=300,format='eps')
plt.savefig(os.path.join(path,classification+'_total_matrix.png'))
plt.show()
#print(out_class,len(out_class),len(y_train))
pre_df = pd.DataFrame(test_y,columns=['label'])
pre_df['pre'] = out_pre
pre_df['previousindex'] = df_reindex['previousindex']
# reindexdf = pd.DataFrame(reindex, columns=['previousindex'])
pre_df.to_csv(os.path.join(path,classification+'_predict.csv'),index=False)
# show the result as classification report报告每个样本的
# print(classification_report(test_y, out_pre, target_names=matraix_list, digits=4))
#输出每一类别的信息
cr = classification_report(test_y, out_pre, target_names=matraix_list, digits=4)
print(cr)
cr = cr.splitlines()
res = []
res.append([''] + cr[0].split())
for row in cr[2:-2]:
res.append(row.split())
lr = cr[-1].split()
res.append([''.join(lr[:3])] + lr[3:])
np.array(res)
df = pd.DataFrame(res)
df.to_csv(os.path.join(path,classification+'_report.csv'),index=False)
def extract(real,pre,testdata):
df_predict = pd.read_csv(os.path.join(path, 'CNNv1_predict.csv'), encoding='gb2312')
df_testdata = pd.read_csv(os.path.join(path, testdata), encoding='gb2312')
extract_data0=pd.DataFrame()
extract_data = pd.DataFrame(columns=df_testdata.columns.values)#如果没有列名的话,后面append的时候顺序会乱
for i in df_predict.index:
if df_predict['label'][i]==real and df_predict['pre'][i]==pre:#第一个是真是的标签,第二个是预测后的标签
extract_data0 = extract_data0.append(df_predict.loc[i], ignore_index=True)#默认为False,如果为true则不使用index标签
extract_data0.to_csv(os.path.join(path, 'extract0(real'+str(real)+'-pre'+str(pre)+').csv'), index=False)#index=False表示不加索引
# print(df_testdata.loc[67250],df_testdata.loc[67259])#也可以看到表格的第一行数据的索引为0,因此是对应的
# extract_data = extract_data.append(df_testdata.loc[0])#测试index是不是对应的
for i in extract_data0.previousindex:
extract_data = extract_data.append(df_testdata.loc[i], ignore_index=True)##忽略索引
extract_data.to_csv(os.path.join(path, 'extract(real' + str(real) + '-pre' + str(pre) + ').csv'), index=False)
'''
功能:输出混淆矩阵的图片
'''
def plot_confusion_matrix(cm, classes,
normalize=False,
cmap='Blues'):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=315, fontsize=10)
plt.yticks(tick_marks, classes, fontsize=10)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
if cm[i, j] > 0:
plt.text(j, i, format(cm[i, j], fmt),
horizontalalignment="center",
size=10,
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout(pad=1.5)
plt.ylabel('True label', fontdict=font)
plt.xlabel('Predicted label', fontdict=font)
def main():
print('Loading data...')
# x = np.fromfile('train-app.ev',dtype=np.uint8)
x = np.fromfile('F:/RTM/train/ev/Compare-1229train(+background)-nolol.ev',dtype=np.uint8)
# x = np.fromfile('D:/CompareLabDATA/Part_TrainData/All-Split/Train-pcap(+background)/Compare/EV/EV0103/Compare-1229train(+background)-0103.ev',dtype=np.uint8)
print(int(x.shape[0] / 2250000))
x_train = x.reshape(int(x.shape[0] / 2250000), 1500, 1500, 1)
input_train = x_train.astype('float32') / 255
y_train = pd.read_csv('F:/RTM/train/csv/Compare-1229train(+background)-nolol.csv')
ytrain = y_train["label"].values
# print(y_train.index)
# y_train = pd.read_csv('D:/CompareLabDATA/Part_TrainData/All-Split/Train-pcap(+background)/Compare/CSV/CSV0103/Compare-1229train(+background)-0103.csv')
# train_labels = pd.read_csv('train-app.csv')
# print(y_train.shape)
# y_train = tf.reshape(y_train, [-1, 1])
# train_labels = to_categorical(y_train)
test_x = np.fromfile('F:/RTM/test/ev/Compare-1229train(+background)-nolol.ev', dtype=np.uint8)
# test_x = np.fromfile('D:/CompareLabDATA/Part_TrainData/All-Split/Test-pcap(+background)/Compare/EV/EV0103/Compare-1229test(+background)-0103.ev', dtype=np.uint8)
print(int(test_x.shape[0] / 2250000))
test_x = test_x.reshape(int(test_x.shape[0] / 2250000), 1500, 1500, 1)
test_x = test_x.astype('float32') / 255
test_y = pd.read_csv('F:/RTM/test/csv/Compare-1229train(+background)-nolol.csv')
testy = test_y["label"].values
# test_y = pd.read_csv('D:/CompareLabDATA/Part_TrainData/All-Split/Test-pcap(+background)/Compare/CSV/CSV0103/Compare-1229test(+background)-0103.csv')
indexdf = pd.DataFrame(test_y.index, columns=['previousindex'])
indexdf.to_csv('reindex.csv')
# input_train,test_x,y_train,test_y = train_test_split(input_train, y_train, test_size=0.3,random_state=0)#拆分数据集
LeNet5(input_train,test_x,ytrain,testy)
# extract(0, 11, testdata)
# train_x,test_x,train_y,test_y = train_test_split(input_train,y_train,test_size=0.3, random_state=0) #打乱了数据顺序
if __name__ =='__main__':
main()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。