博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
keras实现网络流量分类功能的BP神经网络
阅读量:4281 次
发布时间:2019-05-27

本文共 6700 字,大约阅读时间需要 22 分钟。

  1. 数据集选用KDD99

    数据下载地址:
    需求:

  2. 运行环境

    win10+keras
    安装步骤:

  3. 数据预处理

    包含数值替换文本、数值归一化、标签独热编码

# -*- coding: utf-8 -*-"""Created on Tue Nov  6 09:24:20 2018@author: hrh"""import pandas as pdfrom sklearn.preprocessing import OneHotEncoderfrom pandas.core.frame import DataFramedef get_total_data():        data = pd.read_csv('data_test.csv',header=None)        data[1]=data[1].map({'tcp':0, 'udp':1, 'icmp':2})    data[2]=data[2].map({'aol':0, 'auth':1, 'bgp':2, 'courier':3, 'csnet_ns':4,'ctf':5, 'daytime':6, 'discard':7, 'domain':8, 'domain_u':9,'echo':10, 'eco_i':11, 'ecr_i':12, 'efs':13, 'exec':14,'finger':15, 'ftp':16, 'ftp_data':17, 'gopher':18, 'harvest':19,'hostnames':20, 'http':21, 'http_2784':22, 'http_443':23, 'http_8001':24,'imap4':25, 'IRC':26, 'iso_tsap':27, 'klogin':28, 'kshell':29,'ldap':30, 'link':31, 'login':32, 'mtp':33, 'name':34,'netbios_dgm':35, 'netbios_ns':36, 'netbios_ssn':37, 'netstat':38, 'nnsp':39,'nntp':40, 'ntp_u':41, 'other':42, 'pm_dump':43, 'pop_2':44,'pop_3':45, 'printer':46, 'private':47, 'red_i':48, 'remote_job':49,'rje':50, 'shell':51, 'smtp':52, 'sql_net':53, 'ssh':54,'sunrpc':55, 'supdup':56, 'systat':57, 'telnet':58, 'tftp_u':59,'tim_i':60, 'time':61, 'urh_i':62, 'urp_i':63, 'uucp':64,'uucp_path':65, 'vmnet':66, 'whois':67, 'X11':68, 'Z39_50':69})    data[3]=data[3].map({'OTH':0, 'REJ':0, 'RSTO':0,'RSTOS0':0, 'RSTR':0, 'S0':0,'S1':0, 'S2':0, 'S3':0,'SF':1, 'SH':0})    data[41]=data[41].map({'normal.':0, 'ipsweep.':1, 'mscan.':2, 'nmap.':3, 'portsweep.':4, 'saint.':5, 'satan.':6, 'apache2.':7,'back.':8, 'land.':9, 'mailbomb.':10, 'neptune.':11, 'pod.':12,'processtable.':13, 'smurf.':14, 'teardrop.':15, 'udpstorm.':16, 'buffer_overflow.':17, 'httptunnel.':18, 'loadmodule.':19, 'perl.':20, 'ps.':21,'rootkit.':22, 'sqlattack.':23, 'xterm.':24, 'ftp_write.':25,'guess_passwd.':26, 'imap.':27, 'multihop.':28, 'named.':29, 'phf.':30,'sendmail.':31, 'snmpgetattack.':32, 'snmpguess.':33, 'spy.':34, 'warezclient.':35,'warezmaster.':36, 'worm.':37, 'xlock.':38, 'xsnoop.':39})    data[2] = (data[2]-data[2].min())/(data[2].max() - data[2].min())    data[4] = (data[4]-data[4].min())/(data[4].max() - data[4].min())    data[5] = (data[5]-data[5].min())/(data[5].max() - data[5].min())    data[22] = (data[22]-data[22].min())/(data[22].max() - data[22].min())    data[23] = (data[23]-data[23].min())/(data[23].max() - data[23].min())    data[31] = (data[31]-data[31].min())/(data[31].max() - data[31].min())    data[32] = (data[32]-data[32].min())/(data[32].max() - data[32].min())        return data    def get_target_data():        data = get_total_data()        enc = OneHotEncoder(sparse = False)    enc.fit([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39]])    result = enc.transform(data[[41]])        return DataFrame(result)def get_input_data():        data = get_total_data()    del data[41]        return data    if __name__ == '__main__':    data_input = get_input_data()#    data = get_total_data()    data_input.to_csv('data_test_input.csv',header=None,index=None)    data_target = get_target_data()    data_target.to_csv('data_test_target.csv',index=None,header=None)
  1. 代码
# -*- coding: utf-8 -*-"""Created on Mon Nov  5 16:34:42 2018@author: hrh"""import time start = time.time()from sklearn.preprocessing import OneHotEncoderimport tensorflow as tffrom keras.models import Sequentialfrom keras.layers.core import Dense, Activationimport pandas as pdfrom keras.optimizers import SGDif __name__ == '__main__':        input_data = pd.read_csv('data_input.csv',header=None)    target_data = pd.read_csv('data_target.csv',header=None)    input_data_test = pd.read_csv('data_test_input.csv',header=None)    target_data_test = pd.read_csv('data_test_target.csv',header=None)        model = Sequential()  #层次模型    model.add(Dense(54, input_dim=41, init='uniform', activation='relu'))#    model.add(Dense(64, input_dim=54, init='uniform', activation='relu'))    model.add(Dense(40, init='uniform', activation='relu'))    model.add(Dense(40, init='uniform', activation='softmax'))        sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])    model.fit(input_data, target_data, nb_epoch=12, batch_size=128)    #    scores = model.evaluate(input_data, target_data)#    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*10))        # 将测试集输入到训练好的模型中,查看测试集的误差    loss_and_metrics = model.evaluate(input_data_test, target_data_test, batch_size=128)        result = model.predict(input_data_test, batch_size=128)    print('测试集的预测结果为:', result)      stop = time.time()    print(str(stop-start) + "秒")
  1. 运行结果
Epoch 1/12494021/494021 [==============================] - 5s 10us/step - loss: 0.1277 - acc: 0.9720  Epoch 2/12494021/494021 [==============================] - 4s 9us/step - loss: 0.0327 - acc: 0.9946Epoch 3/12494021/494021 [==============================] - 4s 9us/step - loss: 0.0252 - acc: 0.9954Epoch 4/12494021/494021 [==============================] - 4s 9us/step - loss: 0.0215 - acc: 0.9963Epoch 5/12494021/494021 [==============================] - 4s 9us/step - loss: 0.0192 - acc: 0.9967Epoch 6/12494021/494021 [==============================] - 4s 9us/step - loss: 0.0177 - acc: 0.9971Epoch 7/12494021/494021 [==============================] - 4s 9us/step - loss: 0.0164 - acc: 0.9973Epoch 8/12494021/494021 [==============================] - 4s 9us/step - loss: 0.0155 - acc: 0.9976Epoch 9/12494021/494021 [==============================] - 4s 9us/step - loss: 0.0149 - acc: 0.9977Epoch 10/12494021/494021 [==============================] - 4s 9us/step - loss: 0.0142 - acc: 0.9978Epoch 11/12494021/494021 [==============================] - 4s 9us/step - loss: 0.0138 - acc: 0.9979 Epoch 12/12494021/494021 [==============================] - 4s 9us/step - loss: 0.0134 - acc: 0.9979311029/311029 [==============================] - 1s 4us/stepTest loss: nanTest accuracy: 0.9157281153902965测试集的预测结果为: [[9.9969363e-01 6.1610535e-07 7.2848221e-11 ... 7.0689246e-11  1.6535544e-10 2.0521278e-10] [9.9969363e-01 6.1610535e-07 7.2848221e-11 ... 7.0689246e-11  1.6535544e-10 2.0521278e-10] [9.9969363e-01 6.1610535e-07 7.2848221e-11 ... 7.0689246e-11  1.6535544e-10 2.0521278e-10] ... [9.9966860e-01 6.4044093e-07 7.9545072e-11 ... 7.7094046e-11  1.8038426e-10 2.2377748e-10] [9.9966013e-01 6.5163550e-07 7.8919801e-11 ... 7.6405368e-11  1.7924477e-10 2.2227653e-10] [9.9966860e-01 6.4044093e-07 7.9545072e-11 ... 7.7094046e-11  1.8038426e-10 2.2377748e-10]]61.35442543029785秒

转载地址:http://kybgi.baihongyu.com/

你可能感兴趣的文章
SVN Ubuntu创建仓库
查看>>
spring与mybatis项目出现 java.lang.NumberFormatException: For input string: "${jdbc.ma
查看>>
Nginx学习笔记
查看>>
使用Squid搭建局域网视频缓存代理
查看>>
POI读取word2003和word2007
查看>>
使用Squid搭建内网视频缓存系统
查看>>
Tomcat 开启GZIP压缩网页
查看>>
malloc 详解
查看>>
如何在自定义Listener(监听器)中使用Spring容器管理的bean
查看>>
运维DBA规范(4大纪律9项注意)
查看>>
程序员简历修养
查看>>
ThreadPoolExecutor 判断多线程执行完成
查看>>
神经网络通俗指南:一文看懂神经网络工作原理
查看>>
Windows共享权限和相关管理
查看>>
页面制作HTML+CSS基础乱炖
查看>>
验证最小化可行产品(MVP)的15种方法
查看>>
JVM实用参数系列
查看>>
系统运维(灰度发布)
查看>>
几款强大的PPT制作辅助软件
查看>>
程序员应该常问常思考
查看>>