1 Star 0 Fork 324

wangyun5114/cybersectookits

forked from openKylin/cybersectookits 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
TrafficClassificationDeepLearning.py 4.73 KB
一键复制 编辑 原始数据 按行查看 历史
import tensorflow.compat.v1 as tf
import numpy as np
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from sklearn import metrics
from tensorflow import keras
from sklearn.metrics import confusion_matrix, roc_curve
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import time
import pandas as pd
# 标签
list_y = ['WWW','MAIL','FTP-CONTROL','FTP-PASV','ATTACK','P2P',
'DATABASE','FTP-DATA','MULTIMEDIA','SERVICES','INTERACTIVE','GAMES']
# 数据预处理
def data_preprocess(filename):
X, Y = [], []
dir = os.getcwd()
for f in filename:
print(f)
with open(os.path.join(dir, f), 'r') as file:
for n, i in enumerate(file.readlines()[253:]):
# 将 Y 和 N 分别转为 1 和 0
i = i.replace('Y','1')
i = i.replace('N', '0')
spl = i.split(',')
if spl.count('?')>8:
continue
# 去除字符'\n'
i = i.replace('\n', '')
fz = [float(f) for f in i.split(',')[:-1] if f != '?']
meana = sum(fz) / len(fz)
i = i.replace('?', str(0))
# 均值填充,加高斯白噪声
# 方便作为深度学习模型的输入
x = [float(j) for j in i.split(',')[:-1]] +[meana] * 8 + np.random.normal(0,1,256)
# 修正标签字符
y = i.split(',')[-1].replace('FTP-CO0TROL','FTP-CONTROL')
y = y.replace('I0TERACTIVE','INTERACTIVE' )
y = list_y.index(y)
X.append(x)
Y.append(y)
file.close()
return X, Y
# 数据标准化
# 数据预处理,返回处理好的数据和标签
total_x,total_y = data_preprocess(['entry01.weka.allclass.arff','entry02.weka.allclass.arff',
'entry03.weka.allclass.arff','entry04.weka.allclass.arff',
'entry05.weka.allclass.arff','entry09.weka.allclass.arff',
'entry10.weka.allclass.arff','entry07.weka.allclass.arff',
'entry08.weka.allclass.arff','entry06.weka.allclass.arff'])
# 使用 train_test_split 对训练集和测试集按照 1:3 进行划分
train_x,test_x,train_y,test_y = train_test_split(total_x,total_y,test_size=0.25, random_state=0)
# 使用 convert_to_tensor 将数据转为tensor类型
train_x = tf.convert_to_tensor(train_x, dtype=tf.float64)
train_y= tf.convert_to_tensor(train_y,dtype=tf.int64)
test_x = tf.convert_to_tensor(test_x, dtype=tf.float64)
test_y = tf.convert_to_tensor(test_y,dtype= tf.int64)
# 使用 tf.keras.utils.normalize 将训练集和测试集样本规范化处理
train_x = tf.keras.utils.normalize(train_x, axis=1)
test_x = tf.keras.utils.normalize(test_x, axis=1)
# 绘制混淆矩阵
def plot_confusion_matrix(title, pred_y):
cm = confusion_matrix(test_y, np.argmax(pred_y, 1))
labels_name = list_y
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] # 归一化
plt.imshow(cm, interpolation='nearest') # 在特定的窗口上显示图像
plt.title(title) # 图像标题
plt.colorbar()
num_local = np.array(range(len(labels_name)))
plt.xticks(num_local, labels_name, rotation=90) # 将标签印在x轴坐标上
plt.yticks(num_local, labels_name) # 将标签印在y轴坐标上
plt.ylabel('True')
plt.xlabel('Predicted')
plt.show()
num_classes = 12 # 最终结果分成12类
num_pixels = 256 # 维度为256
def DenseBlock():
t1 = time.time()
# 构建网络
model = Sequential()
model.add(layers.Dense(num_pixels, input_dim=num_pixels, activation='relu'))
model.add(layers.Dense(num_classes, activation='softmax'))
# 展示当前的网络结构
model.summary()
# 编译模型
model.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
# 重塑输入样本
X_train = tf.reshape(train_x, [-1, 256])
X_test = tf.reshape(test_x, [-1, 256])
# 训练模型
history = model.fit(X_train, train_y, validation_split=0.2, epochs=20, batch_size=128, verbose=2,)
# 评估模型
scores = model.evaluate(X_test, test_y, verbose=0) # scores = {'loss', 'accuracy'}
# 输出模型的预测结果
predict_y = model.predict(X_test)
t2 = time.time()
# 输出精度和运行时间
print("Accuracy: %.2f%%" % (scores[1] * 100), t2 - t1)
# 展示混淆矩阵
plot_confusion_matrix("DenseBlock Confusion Matrix", predict_y)
# 最终返回训练好的模型
return model
clf = DenseBlock()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/wangyun5114/cybersectookits.git
git@gitee.com:wangyun5114/cybersectookits.git
wangyun5114
cybersectookits
cybersectookits
master

搜索帮助