4 Star 1 Fork 0

Green/encrypted-traffic-analysis-2021

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
method_DL_template2_wxw.py 11.80 KB
一键复制 编辑 原始数据 按行查看 历史
wangxw 提交于 2021-05-12 14:40 . 完善细节
#!D:/Code/python
# -*- coding: utf-8 -*-
# @Time : 2021/5/7 0007 20:20
# @Author : xgf
# @File : method_DL_template.py
# @Software : PyCharm
import numpy
import torch
from torch import nn, Tensor, optim
from torch.autograd import Variable
import torch.nn.functional as F
from typing import (
TypeVar, Type, Union, Optional, Any,
List, Dict, Tuple, Callable, NamedTuple
)
import random
import time
import os
import copy
import re
import logging
from concurrent.futures import ThreadPoolExecutor
from concurrent import futures
import itertools
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from utils import Args, D, timeit
# from log import Log
method_name = 'encrypted-traffic-analysis-2021'
# mylog = Log('../encrypted-traffic-analysis-2021_log', method_name)
def get_args() -> Args:
"""
获取参数
"""
default_raw_data_dir = os.path.join(os.path.dirname(
__file__), "dataset/traffic")
default_feature_data_dir = os.path.join(os.path.dirname(
__file__), "dataset/traindata")
return Args([
D("batchSize", int, 64),
D("learningRate", float, 1e-2),
D("numEpochs", int, 5000),
D("rawDataDir", str, default_raw_data_dir),
D("dataDir", str, default_feature_data_dir),
D("saveDir", str, None),
D("nClass", int, 50),
D('splitData', int, 0.8),
])
max_features = [22607, 0.9339419978517723, 1599, 0.06605800214822771, 24206, 6971.819376000754, 10492.093183239525,
6974.724357711386, 12216.404564957757, 10039.0, 12290.0, 2.0249652023687483, 1.3203963666391412, 1, 19, 1, 1,
22, 8, 23, 7, 672.3888888888889, 413.1607353196532, 13, 1781, 586.5, 0.24022639701446408,
0.0014869999230530883, 0.006851341989027328, 0.43530082699999895, 0.0015701625259172789, 0.008577127393046323,
0.6145144896628096, 0.0225155147029179, 0.03382628600025843, 1, 1, 1, 1, 1, 0.012953087945389186,
2.0627147460742803, 18.66804293971924, 20.0, 20, 3.272895675818782, 3.296907216494845, 3.0, 29,
3.7787047836052214, 46.61237113402062, 47.0, 50, 80, 20, 88, 12, 136, 31.187239944521497, 25.0,
26.882848597066, 721, 16, 2.906801007556675, 2.0, 1.8423725513133162, 397]
def readNpy(file_path):
"""
读取npy文件
@param file_path:npy文件路径
@return:读取的npy内容,内容具体格式未知
"""
res = np.load(file_path, allow_pickle=True)
features = []
for index, sample in enumerate(res):
features.append(sample[0])
print(len(features), len(features[0]))
features = np.array(features)
# print(features.max(axis=0))
features = features / np.array(
max_features
)
# print(res)
# print(features)
# exit(0)
for index, sample in enumerate(res):
res[index][0] = features[index]
return res
class AverageMeter(object):
"""Computes and stores the average and current value
Imported from https://github.com/pytorch/examples/blob/master/imagenet/main.py#L247-L262
"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
class EncryptTrafficDataset(Dataset):
"""
特征数据集
"""
def __init__(self, traffic_data, transform=None, target_transform=None):
traffic_data = np.array(traffic_data)
traffic_data = traffic_data[:, :2]
try:
self.traffic_features = traffic_data[:, 0]
self.traffic_labels = traffic_data[:, 1]
except IOError:
print("EncryptTrafficDataset初始化数据集失败,因为数据集传入错误")
self.transform = transform
self.target_transform = target_transform
def __len__(self):
return len(self.traffic_labels)
def __getitem__(self, idx):
feature = self.traffic_features[idx]
label = self.traffic_labels[idx]
if self.transform:
feature = self.transform(feature)
if self.target_transform:
label = self.target_transform(label)
feature = Tensor(feature)
# print(label)
# sample = {"feature": feature, "label": label, "lag": lag}
sample = {"feature": feature, "label": label}
return (feature, label)
class ThreeLinearNetwork(nn.Module):
"""
定义一个简单的三层神经网络用于测试
"""
def __init__(self):
super(ThreeLinearNetwork, self).__init__()
# self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(67, 1024),
nn.ReLU(),
nn.Linear(1024, 1024),
nn.ReLU(),
nn.Linear(1024, 1024),
nn.ReLU(),
nn.Linear(1024, 512),
nn.ReLU(),
nn.Dropout(),
nn.Linear(512, 50),
nn.ReLU()
)
def forward(self, x):
# x = self.flatten(x)
# x = x.float()
logits = self.linear_relu_stack(x)
return logits
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
for batch, (X, y) in enumerate(dataloader):
y.long()
X, y = X.to(device), y.to(device)
# Compute prediction error
pred = model(X)
loss = loss_fn(pred, y)
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
# print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestClassifier
defence_90 = np.load("./feature_extraction/defence_90.npy", allow_pickle=True)
# defence_10 = np.load("./feature_extraction/defence_10.npy", allow_pickle=True)
features_train = []
labels_train = []
for sample in defence_90:
# features_train.append(sample[0])
norm_features = np.array(sample[0]) / max_features
features_train.append(norm_features)
labels_train.append(sample[1])
rfc = RandomForestClassifier(n_estimators=100,n_jobs=-1,random_state=0).fit(features_train, labels_train)
def test(dataloader, model, epoch, name):
size = len(dataloader.dataset)
model.eval()
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
y.long()
X, y = X.to(device), y.to(device)
pred = model(X)
# print(pred)
#
print(X.size())
pred = pred.to('cpu')
y = y.to('cpu')
rfc_inputs = X.to('cpu').numpy()
rfc_pred = rfc.predict(rfc_inputs)
rfc_pred_prob = rfc.predict_proba(rfc_inputs)
pred_list = []
for idx, p in enumerate(rfc_pred):
mlp_p = pred.argmax(1)[idx]
label_p = y[idx]
if p == mlp_p:
pred_list.append(p)
else:
# mlp_probs = (pred[idx].numpy() - np.mean(pred[idx].numpy())) / np.std(pred[idx].numpy())
# mlp_probs = mlp_probs / mlp_probs.sum()
mlp_probs = pred[idx].numpy() / (pred[idx].numpy().sum() if pred[idx].numpy().sum()!=0 else 1)
rfc_probs = np.array(rfc_pred_prob[idx])
std_pro = 0
tmp = rfc_probs[p] / mlp_probs[p]
select = random.randint(0, 1)
# if select == 0:
if (rfc_probs[p] - np.mean(rfc_probs)) > (mlp_probs[mlp_p] - np.mean(mlp_probs)):
pred_list.append(p)
else:
pred_list.append(mlp_p)
# print(pred[idx])
print(f"mlp_prob: {mlp_probs[mlp_p]}:{mlp_probs[p]}, rfc_prob: {rfc_probs[p]} label: {label_p}, mlp: {mlp_p} ({pred[idx][mlp_p]}) {'对' if mlp_p == label_p else '错'}, rfc: {p} {'对' if p == label_p else '错'} {pred_list[-1]} {pred_list[-1] == label_p}")
# print(rfc_pred)
# for index,p in enumerate(rfc_pred):
# pred[index][p] += np.mean(pred[index].numpy())*3.5
pred = pred.to(device)
y = y.to(device)
test_loss += loss_fn(pred, y).item()
# preds = torch.tensor(pred_list).to(device)
preds = pred.argmax(1).to(device)
correct += (preds == y).type(torch.float).sum().item()
test_loss /= size
correct /= size
print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
writer.add_scalar(f'{name} acc', correct, epoch)
return test_loss, correct
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter(f"./tb_log/mlp_5layer") # 这行会自动创建文件夹
if __name__ == '__main__':
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))
args = get_args()
batch_size = args.batchSize # 批的大小
learning_rate = args.learningRate # 学习率
num_epochs = args.numEpochs # 遍历训练集的次数
data_dir = args.dataDir
save_dir = args.saveDir
n_class = args.nClass
split_data = args.splitData
# # 读取数据
# # data = readNpy('./feature_extraction/undefence_features.npy')
traindata = readNpy('./feature_extraction/defence_90.npy')
testdata = readNpy('./feature_extraction/defence_10.npy')
# traffic_data = EncryptTrafficDataset(data)
# print(traffic_data[0])
# # 划分数据
# train_size = int(split_data * len(traffic_data))
# test_size = len(traffic_data) - train_size
# train_data, test_data = torch.utils.data.random_split(traffic_data, [train_size, test_size])
train_data, test_data = EncryptTrafficDataset(traindata), EncryptTrafficDataset(testdata)
# 定义dataloader
train_dataloader = DataLoader(train_data, batch_size= batch_size, shuffle=True)
train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")
test_dataloader = DataLoader(test_data, batch_size= batch_size, shuffle=True)
model = ThreeLinearNetwork().to(device)
model_path = "model_5layer_67fea_tensorborad.pth"
# model = torch.load(model_path).to(device)
print(model)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr= learning_rate)
max_acc = 0
for t in range(num_epochs):
print(f"Epoch {t + 1}\n-------------------------------")
train_loss = train(train_dataloader, model, loss_fn, optimizer)
train_loss, train_test = test(train_dataloader, model, t+1, 'train')
test_loss, test_acc = test(test_dataloader, model, t+1, 'test')
# mylog.state_dict_update([('train_loss_list', train_loss),
# ('train_acc_list', test_acc),
# ('valid_loss_list', test_loss),
# ('valid_acc_list', test_acc),
# ])
if t % 100 == 99:
torch.save(model, model_path)
print(f"当前最大:{max_acc}, test_acc:{test_acc}")
if test_acc >= max_acc and test_acc >= 0.5:
print(test_acc)
max_acc = test_acc
torch.save(model, f"model_acc{int(test_acc*100)}_5layer_67fea_tensorboard.pth")
# model = torch.load(model_path)
# test_loss, test_acc = test(test_dataloader, model)
print("Done!")
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/bitosky/encrypted-traffic-analysis-2021.git
git@gitee.com:bitosky/encrypted-traffic-analysis-2021.git
bitosky
encrypted-traffic-analysis-2021
encrypted-traffic-analysis-2021
master

搜索帮助