Commit 6576c085 authored by Ignacio Crespo's avatar Ignacio Crespo

New form to process redis flows

parent 39691002
......@@ -13,9 +13,20 @@ import time
import pytz
import argparse
HOST_logstash = ""
PORT_logstash = ""
data_logstash = ""
def split_data():
global data_logstash
new_data = data_logstash.split("\n",1)[1]
flow = data_logstash.split("\n",1)[0]
data_logstash = new_data
return flow
def parse_timestamp(date):
print date
#print date
date = datetime.datetime.strptime(date, "%Y-%m-%dT%H:%M:%S")
dtobj1=date.replace(tzinfo=pytz.UTC) #replace method
......@@ -23,7 +34,7 @@ def parse_timestamp(date):
time_tuple = dtobj_madrid.timetuple()
timestamp = time.mktime(time_tuple)
print int(timestamp)
#print int(timestamp)
return int(timestamp)
def check_attack(json_flow):
......@@ -37,48 +48,24 @@ def check_attack(json_flow):
predictions = moev_Manager.createModels()
return predictions
parser = argparse.ArgumentParser()
parser.add_argument("-r", "--redis", help="IP redis")
parser.add_argument("-l", "--logstash", help="IP logstash")
parser.add_argument("-p", "--port", help="Port logstash")
args = parser.parse_args()
HOST_redis = ""
HOST_logstash = ""
PORT_logstash = ""
if not args.redis or not args.logstash or not args.port:
print("It is neccesary to indicate IPs")
parser.print_help()
quit()
else:
HOST_redis = args.redis
HOST_logstash = args.logstash
PORT_logstash = args.port
r = redis.StrictRedis(host=HOST_redis, port=6379, db=0)
print(r)
# delete the key
netflow_redis = r.lindex("logstash", -1)
def process_flow():
data = split_data()
netflow = json.loads(netflow_redis)
#print(data)
netflow = json.loads(data)
#print(netflow)
print(netflow["netflow"]["in_bytes"])
#print(netflow["netflow"]["in_bytes"])
netflow_moev = collections.OrderedDict()
netflow_moev['#:unix_secs'] = parse_timestamp(netflow["@timestamp"].split('.')[0])
netflow_moev['unix_nsecs'] = 886670
netflow_moev['sysuptime'] = 69000
netflow_moev['exaddr'] = netflow["host"]["ip"]
netflow_moev['dpkts'] = netflow["netflow"]["in_pkts"]
netflow_moev['doctets'] = netflow["netflow"]["in_bytes"]
netflow_moev['first'] = 53440
netflow_moev['last'] = 53440
netflow_moev['engine_type'] = netflow["netflow"]["engine_type"]
netflow_moev['engine_id'] = netflow["netflow"]["engine_id"]
......@@ -120,7 +107,6 @@ else:
netflow.update(y)
print(json.dumps(netflow))
try:
......@@ -136,10 +122,162 @@ else:
sys.exit(2)
msg = netflow
print("Envio datos")
sock.send(json.dumps(netflow))
sock.close()
sys.exit(0)
parser = argparse.ArgumentParser()
parser.add_argument("-l", "--logstash", help="IP logstash")
parser.add_argument("-p", "--port", help="Port logstash")
args = parser.parse_args()
if not args.logstash or not args.port:
print("It is neccesary to indicate IPs")
parser.print_help()
quit()
else:
try:
HOST_logstash = args.logstash
PORT_logstash = args.port
hostname = socket.gethostname()
local_ip = socket.gethostbyname(hostname)
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# Bind the socket to the port
server_address = ('0.0.0.0', 10000)
print('starting up on {} port {}'.format(*server_address))
sock.bind(server_address)
# Listen for incoming connections
sock.listen(1)
while True:
# Wait for a connection
print('waiting for a connection')
connection, client_address = sock.accept()
try:
print('connection from', client_address)
# Receive the data in small chunks and retransmit it
while True:
data = connection.recv(65536)
#print('received {!r}'.format(data))
data_logstash += data
process_flow()
#print("--------------------------------------------------------------------------------------------------")
if data:
#print('sending data back to the client')
connection.sendall("Connection")
else:
print('no data from', client_address)
break
finally:
# Clean up the connection
connection.close()
except:
print("An exception occurred")
'''
HOST_redis = args.redis
while True:
r = redis.StrictRedis(host=HOST_redis, port=6379, db=0)
print("size list redis")
print(r.llen("logstash"))
size_new_list = r.llen("logstash")
for i in range(0,r.llen("logstash")):
# delete the key
print(i)
netflow_redis = r.lindex("logstash", i)
netflow = json.loads(netflow_redis)
#print(netflow)
#print(netflow["netflow"]["in_bytes"])
netflow_moev = collections.OrderedDict()
netflow_moev['#:unix_secs'] = parse_timestamp(netflow["@timestamp"].split('.')[0])
netflow_moev['exaddr'] = netflow["host"]["ip"]
netflow_moev['dpkts'] = netflow["netflow"]["in_pkts"]
netflow_moev['doctets'] = netflow["netflow"]["in_bytes"]
netflow_moev['engine_type'] = netflow["netflow"]["engine_type"]
netflow_moev['engine_id'] = netflow["netflow"]["engine_id"]
netflow_moev['srcaddr'] = netflow["source"]["ip"]
netflow_moev['dstaddr'] = netflow["destination"]["ip"]
netflow_moev['nexthop'] = netflow["flow"]["next_hop"]
netflow_moev['input'] = netflow["flow"]["input_snmp"]
netflow_moev['ouput'] = netflow["flow"]["output_snmp"]
netflow_moev['srcport'] = netflow["source"]["port"]
netflow_moev['dstport'] = netflow["destination"]["port"]
netflow_moev['prot'] = netflow["network"]["iana_number"]
netflow_moev['tos'] = netflow["flow"]["tos"]
if "tcp_flags" in netflow:
with open("./tcp_flags.yml") as f:
file_flags= yaml.load(f, Loader=yaml.FullLoader)
tcp_flags = file_flags[('-'.join(netflow["flow"]["tcp_flags"]))]
f.close()
else:
tcp_flags = 0
netflow_moev['tcp_flags'] = tcp_flags
netflow_moev['src_mask'] = netflow["flow"]["src_mask_len"]
netflow_moev['dst_mask'] = netflow["flow"]["dst_mask_len"]
netflow_moev['src_as'] = netflow["netflow"]["src_as"]
netflow_moev['dst_as'] = netflow["netflow"]["dst_as"]
test = json.dumps(netflow_moev)
#print(json.dumps(netflow_moev))
#print(json.loads(test))
predictions = check_attack(json.loads(test,object_pairs_hook=OrderedDict))
y = {"Check_Attack":predictions}
netflow.update(y)
#print(json.dumps(netflow))
try:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
except socket.error as msg:
sys.stderr.write("[ERROR] %s\n" % msg[1])
sys.exit(1)
try:
sock.connect((HOST_logstash, int(PORT_logstash)))
except socket.error as msg:
sys.stderr.write("[ERROR] %s\n" % msg[1])
sys.exit(2)
msg = netflow
sock.send(json.dumps(netflow))
sock.close()
'''
......
......@@ -63,8 +63,8 @@ class MoEv:
self.df = pd.DataFrame([json])
logging.info("Dataset loaded")
print("json cargado")
print(self.df)
#print("json cargado")
#print(self.df)
if conf["cleanData"]["enabled"] or conf["analyzeData"]["enabled"]:
self.cleaner = Cleaner.Cleaner()
......@@ -294,7 +294,7 @@ class MoEv:
#We check whether it is intended to test models or train new ones
if conf["testSaveModels"]["enabled"]:
#print("Holaaaaaaaaaaaaaaaaaaaa")
print(conf["models_path"] + self.data_type + "/")
#print(conf["models_path"] + self.data_type + "/")
for base, dirs, files in os.walk(conf["models_path"] + self.data_type + "/"):
#print("Estoy aqui")
for name in files:
......@@ -303,7 +303,7 @@ class MoEv:
imported_model.modelName = name
prediction.update(self.testModels(imported_model))
print prediction
#print prediction
return prediction
else:
......@@ -368,10 +368,10 @@ class MoEv:
prediction[model.modelName.split('.')[0]] = "Attack: scan port"
print( '\n\n\t\t\t' + model.modelName + '\n\n\n' )
print("X=%s, Predicted=%s" % (self.X_test, predictions[0]))
#print( '\n\n\t\t\t' + model.modelName + '\n\n\n' )
#print("X=%s, Predicted=%s" % (self.X_test, predictions[0]))
print("##########################################################")
#print("##########################################################")
return prediction
......
File added
......@@ -18,6 +18,8 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import logging
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import itertools
from sklearn.metrics import accuracy_score
......@@ -35,3 +37,64 @@ def predict(classifier, X_test):
def accuracyScore(y_test, predictions):
return accuracy_score(y_test, predictions)
def learning_curves(X, y):
title = "Learning Curves (SGD)"
# Cross validation with 100 iterations to get smoother mean test and train
# score curves, each time with 20% data randomly selected as a validation set.
cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0)
estimator = QuadraticDiscriminantAnalysis()
train_sizes=np.linspace(.1, 1.0, 5)
ylim=(0.7, 1.01)
n_jobs=4
plt.figure()
plt.title(title)
plt.xlabel("Training examples")
plt.ylabel("Score")
train_sizes, train_scores, test_scores = learning_curve(
estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes)
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)
plt.grid()
plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
train_scores_mean + train_scores_std, alpha=0.1,
color="r")
plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
test_scores_mean + test_scores_std, alpha=0.1, color="g")
plt.plot(train_sizes, train_scores_mean, 'o-', color="r",
label="Training score")
plt.plot(train_sizes, test_scores_mean, 'o-', color="g",
label="Cross-validation score")
plt.legend(loc="best")
plt.show()
def classificationReport(predictions, Y_test):
print("Classification report:\n\n%s \n" % classification_report(Y_test, predictions, digits=6))
def confusionMatrix (predictions, Y_test, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
font = {'family' : 'normal',
'weight' : 'normal',
'size' : 24}
# calculate confusion matrix
print("Confusion Matrix:\n")
cm = confusion_matrix(Y_test, predictions)
np.set_printoptions(precision=2)
print(cm)
def gridSearch(model, X, y, parameters):
#parameters: list of parameters in the configuration file
estimator = model
clf = GridSearchCV(estimator, parameters, cv=5)
print(clf.get_params())
clf.fit(X, y)
self.classifier = clf
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment