master

分支 (1)

管理

管理

master

mining-flow-detection
/
detection.py

import os
import sys
import joblib
from sklearn.tree import DecisionTreeRegressor
import pandas as pd
import numpy as np


if sys.argv[1][1:]=='h':
    print('Help : python detection.py [pcapfile] [inputFolder] [outputFolder] \n'
    'The "pcapfile" is the name of the pcap file.\n'
    'The "inputFolder" is the path of your pcap file.\n'
    'The "outputFolder" is the path of the output file you want.')
else:
    inputfd=sys.argv[2]
    outputfd=sys.argv[3]
    cmd='.\\cfm.bat '+inputfd+' '+outputfd
    ncmd='cd CICFlowMeter-4.0\\bin && cd && '+cmd
    #print(ncmd)
    os.system(ncmd)
    outfile=outputfd+'\\'+sys.argv[1]+'_Flow.csv'
    outfile=outfile.replace('\\','/')
    #print(outfile)

    model_cart = joblib.load('cart.dat')
    model_id3 = joblib.load('id3.dat')
    rate_len=[]
    testdf = pd.read_csv(outfile,encoding='unicode_escape')# 改为输入文件名
    for i in range(0, testdf.shape[0]):
        rate_len.append((testdf['Bwd Pkt Len Mean'][i] + 20) / (testdf['Fwd Pkt Len Mean'][i] + 20))
    # 把流量比特征添加进去
    testdf['Bwd/Fwd len'] = rate_len
    # print(testdf.shape)
    feature = ['Flow ID', 'Src IP', 'Src Port', 'Dst IP', 'Dst Port', 'Protocol', 'Timestamp', 'Label']
    testdf= testdf.drop(feature, axis=1)
    nan_list = testdf.isnull().sum().tolist()  # 把每一列的空值个数加起来
    # print(nan_list)
    # print(sum(nan_list))
    # 无穷值排查
    inf_list = np.isinf(testdf).sum().tolist()  # 把每一列的无穷值个数加起来
    # print(inf_list)
    # print(sum(inf_list))
    # 删除空行，无穷值
    testdf = testdf.replace([np.inf, -np.inf], np.nan).dropna(axis=0)
    inf_list = np.isinf(testdf).sum().tolist()  # 把每一列的无穷值个数加起来
    # print(sum(inf_list))
    # print(testdf.shape)
    features = ['Bwd Pkt Len Max', 'Pkt Len Max', 'PSH Flag Cnt', 'Bwd Pkt Len Min', 'Pkt Len Var',
                'Bwd/Fwd len', 'Pkt Size Avg', 'Bwd Pkt Len Mean', 'Pkt Len Mean', 'Fwd Pkt Len Max',
                'Fwd Pkt Len Mean', 'Flow Byts/s', 'Flow Pkts/s']
    X_test = testdf[features]

    Y_cart = model_cart.predict(X_test)
    Y_id3 = model_id3.predict(X_test)
    Y = []
    for i in range(0, X_test.shape[0]):
        if Y_id3[i] == 0 or Y_cart[i] == 0:
            Y.append(0)
        else:
            Y.append(Y_cart[i])
    for i in range(0, X_test.shape[0]):
        if Y[i] == 1:
            #print(testdf['Flow ID'],'疑似XMR挖掘流量')
            print('第', i, '条流量是XMR挖掘流量')
        elif Y[i] == 2:
            # print(testdf['Flow ID'],'疑似BTC挖掘流量')
            print('第', i, '条流量是BTC挖掘流量')
        elif Y[i] == 3:
            # print(testdf['Flow ID'],'疑似DCR挖掘流量')
            print('第', i, '条流量是DCR挖掘流量')