Introducing neural stuff

This commit is contained in:
BlackLight 2010-10-21 17:36:47 +02:00
parent af14a6b826
commit a15e1991e4
10 changed files with 321 additions and 27 deletions

View file

@ -4,7 +4,7 @@ AUTOMAKE_OPTIONS=foreign no-dependencies
libdir = ${exec_prefix}/lib/snort_dynamicpreprocessor
lib_LTLIBRARIES = libsf_ai_preproc.la
libsf_ai_preproc_la_CFLAGS = -I./uthash -I./base64 -I./include ${LIBXML2_INCLUDES} ${LIBGRAPH_INCLUDES} -DDYNAMIC_PLUGIN -D_XOPEN_SOURCE -D_GNU_SOURCE -fvisibility=hidden -fno-strict-aliasing -Wall -pedantic -pedantic-errors -std=c99 -fstack-protector
libsf_ai_preproc_la_CFLAGS = -I./uthash -I./base64 -I./fsom -I./include ${LIBXML2_INCLUDES} ${LIBGRAPH_INCLUDES} -DDYNAMIC_PLUGIN -D_XOPEN_SOURCE -D_GNU_SOURCE -fvisibility=hidden -fno-strict-aliasing -Wall -pedantic -pedantic-errors -std=c99 -fstack-protector
libsf_ai_preproc_la_LDFLAGS = -module -export-dynamic
BUILT_SOURCES = \
@ -27,6 +27,7 @@ correlation.c \
db.c \
fsom/fsom.c \
mysql.c \
neural.c \
outdb.c \
postgresql.c \
regex.c \

View file

@ -85,9 +85,10 @@ am_libsf_ai_preproc_la_OBJECTS = libsf_ai_preproc_la-alert_history.lo \
libsf_ai_preproc_la-cluster.lo \
libsf_ai_preproc_la-correlation.lo libsf_ai_preproc_la-db.lo \
libsf_ai_preproc_la-fsom.lo libsf_ai_preproc_la-mysql.lo \
libsf_ai_preproc_la-outdb.lo libsf_ai_preproc_la-postgresql.lo \
libsf_ai_preproc_la-regex.lo libsf_ai_preproc_la-spp_ai.lo \
libsf_ai_preproc_la-stream.lo libsf_ai_preproc_la-webserv.lo
libsf_ai_preproc_la-neural.lo libsf_ai_preproc_la-outdb.lo \
libsf_ai_preproc_la-postgresql.lo libsf_ai_preproc_la-regex.lo \
libsf_ai_preproc_la-spp_ai.lo libsf_ai_preproc_la-stream.lo \
libsf_ai_preproc_la-webserv.lo
nodist_libsf_ai_preproc_la_OBJECTS = \
libsf_ai_preproc_la-sf_dynamic_preproc_lib.lo \
libsf_ai_preproc_la-sfPolicyUserData.lo
@ -246,7 +247,7 @@ top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
AUTOMAKE_OPTIONS = foreign no-dependencies
lib_LTLIBRARIES = libsf_ai_preproc.la
libsf_ai_preproc_la_CFLAGS = -I./uthash -I./base64 -I./include ${LIBXML2_INCLUDES} ${LIBGRAPH_INCLUDES} -DDYNAMIC_PLUGIN -D_XOPEN_SOURCE -D_GNU_SOURCE -fvisibility=hidden -fno-strict-aliasing -Wall -pedantic -pedantic-errors -std=c99 -fstack-protector
libsf_ai_preproc_la_CFLAGS = -I./uthash -I./base64 -I./fsom -I./include ${LIBXML2_INCLUDES} ${LIBGRAPH_INCLUDES} -DDYNAMIC_PLUGIN -D_XOPEN_SOURCE -D_GNU_SOURCE -fvisibility=hidden -fno-strict-aliasing -Wall -pedantic -pedantic-errors -std=c99 -fstack-protector
libsf_ai_preproc_la_LDFLAGS = -module -export-dynamic
BUILT_SOURCES = \
include/sf_dynamic_preproc_lib.c \
@ -268,6 +269,7 @@ correlation.c \
db.c \
fsom/fsom.c \
mysql.c \
neural.c \
outdb.c \
postgresql.c \
regex.c \
@ -419,6 +421,9 @@ libsf_ai_preproc_la-fsom.lo: fsom/fsom.c
libsf_ai_preproc_la-mysql.lo: mysql.c
$(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsf_ai_preproc_la_CFLAGS) $(CFLAGS) -c -o libsf_ai_preproc_la-mysql.lo `test -f 'mysql.c' || echo '$(srcdir)/'`mysql.c
libsf_ai_preproc_la-neural.lo: neural.c
$(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsf_ai_preproc_la_CFLAGS) $(CFLAGS) -c -o libsf_ai_preproc_la-neural.lo `test -f 'neural.c' || echo '$(srcdir)/'`neural.c
libsf_ai_preproc_la-outdb.lo: outdb.c
$(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsf_ai_preproc_la_CFLAGS) $(CFLAGS) -c -o libsf_ai_preproc_la-outdb.lo `test -f 'outdb.c' || echo '$(srcdir)/'`outdb.c

1
TODO
View file

@ -2,6 +2,7 @@
AVERAGE/HIGH PRIORITY:
======================
- Neural network for alert correlation
- Modules for correlation coefficients
- Code profiling
- Comment all the code!!!

5
db.h
View file

@ -30,7 +30,7 @@
#define DB_init mysql_do_init
#define DB_is_init mysql_is_init
#define DB_query mysql_do_query
#define DB_num_rows mysql_num_rows
#define DB_num_rows mysql_do_num_rows
#define DB_fetch_row mysql_fetch_row
#define DB_free_result mysql_free_result
#define DB_escape_string mysql_do_escape_string
@ -39,11 +39,14 @@
#define DB_out_init mysql_do_out_init
#define DB_is_out_init mysql_is_out_init
#define DB_out_query mysql_do_out_query
#define DB_out_num_rows mysql_do_out_num_rows
#define DB_out_escape_string mysql_do_out_escape_string
#define DB_out_close mysql_do_out_close
DB_result* DB_query ( const char* );
DB_result* DB_out_query ( const char* );
unsigned long DB_num_rows();
unsigned long DB_out_num_rows();
#endif
#ifdef HAVE_LIBPQ

View file

@ -60,7 +60,7 @@ void som_set_inputs ( som_network_t*, double* );
void som_train ( som_network_t*, double**, size_t, size_t );
void som_serialize ( som_network_t*, const char* );
double som_get_best_neuron_coordinates ( som_network_t*, size_t*, size_t* );
som_network_t* som_deserialize ( const char* fname );
som_network_t* som_deserialize ( const char* );
som_network_t* som_network_new ( size_t, size_t, size_t );
#endif

12
mysql.c
View file

@ -156,6 +156,18 @@ mysql_do_out_escape_string ( char **to, const char *from, unsigned long length )
return mysql_real_escape_string ( outdb, *to, from, length );
}
unsigned long
mysql_do_num_rows ()
{
return mysql_num_rows ( db );
}
unsigned long
mysql_do_num_rows ()
{
return mysql_num_rows ( outdb );
}
void
mysql_do_out_close ()
{

185
neural.c Normal file
View file

@ -0,0 +1,185 @@
/*
* =====================================================================================
*
* Filename: neural.c
*
* Description: Manage the alert correlation based on SOM neural network
*
* Version: 0.1
* Created: 21/10/2010 08:51:28
* Revision: none
* Compiler: gcc
*
* Author: BlackLight (http://0x00.ath.cx), <blacklight@autistici.org>
* Licence: GNU GPL v.3
* Company: DO WHAT YOU WANT CAUSE A PIRATE IS FREE, YOU ARE A PIRATE!
*
* =====================================================================================
*/
#include "spp_ai.h"
/** \defgroup neural Module for the neural network-based alert correlation
* @{ */
#ifdef HAVE_DB
#include "db.h"
#include "fsom.h"
#include <alloca.h>
#include <limits.h>
#include <pthread.h>
#include <stdio.h>
#include <sys/stat.h>
#include <time.h>
#include <unistd.h>
enum { som_src_ip, som_dst_ip, som_src_port, som_dst_port, som_time, som_alert_id, SOM_NUM_ITEMS };
PRIVATE time_t latest_serialization_time = ( time_t ) 0;
PRIVATE som_network_t *net = NULL;
/**
* \brief Train the neural network taking the alerts from the latest serialization time
*/
PRIVATE void
AI_som_train ()
{
unsigned long snort_id = 0;
double **inputs;
char query[1024] = { 0 };
size_t i = 0,
num_rows = 0;
DB_result res;
DB_row row;
if ( !DB_out_init() )
{
AI_fatal_err ( "Unable to connect to the database specified in module configuration", __FILE__, __LINE__ );
}
#ifdef HAVE_LIBMYSQLCLIENT
snprintf ( query, sizeof ( query ),
"SELECT gid, sid, rev, timestamp, ip_src_addr, ip_dst_addr, tcp_src_port, tcp_dst_port "
"FROM %s a JOIN %s ip JOIN %s tcp "
"ON a.ip_hdr=ip.ip_hdr_id AND a.tcp_hdr=tcp.tcp_hdr_id "
"WHERE unix_timestamp(timestamp) > %lu",
outdb_config[ALERTS_TABLE], outdb_config[IPV4_HEADERS_TABLE], outdb_config[TCP_HEADERS_TABLE],
latest_serialization_time
);
#elif HAVE_LIBPQ
snprintf ( query, sizeof ( query ),
"SELECT gid, sid, rev, timestamp, ip_src_addr, ip_dst_addr, tcp_src_port, tcp_dst_port "
"FROM %s a JOIN %s ip JOIN %s tcp "
"ON a.ip_hdr=ip.ip_hdr_id AND a.tcp_hdr=tcp.tcp_hdr_id "
"WHERE date_part ('epoch', \"timestamp\"(timestamp)) > %lu",
outdb_config[ALERTS_TABLE], outdb_config[IPV4_HEADERS_TABLE], outdb_config[TCP_HEADERS_TABLE],
latest_serialization_time
);
#endif
if ( !( res = (DB_result) DB_out_query ( query )))
{
AI_fatal_err ( "AIPreproc: Query error", __FILE__, __LINE__ );
}
num_rows = DB_out_num_rows();
if ( !( inputs = (double**) alloca ( num_rows * sizeof ( double* ))))
{
AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ );
}
for ( i=0; i < num_rows; i++ )
{
row = (DB_row) DB_fetch_row ( res );
snort_id = 0;
if ( !( inputs[i] = (double*) alloca ( SOM_NUM_ITEMS * sizeof ( double ))))
{
AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ );
}
snort_id = (( strtoul ( row[0], NULL, 10 ) & 0xFFFF ) << 16 ) | ( strtoul ( row[1], NULL, 10 ) & 0xFFFF );
inputs[i][som_alert_id] = (double) snort_id / (double) UINT_MAX;
inputs[i][som_time] = (double) strtol ( row[3], NULL, 10 ) / (double) INT_MAX;
inputs[i][som_src_ip] = (double) ntohl ( inet_addr ( row[4] )) / (double) UINT_MAX;
inputs[i][som_dst_ip] = (double) ntohl ( inet_addr ( row[5] )) / (double) UINT_MAX;
inputs[i][som_src_port] = (double) strtol ( row[6], NULL, 10 ) / (double) USHRT_MAX;
inputs[i][som_dst_port] = (double) strtol ( row[7], NULL, 10 ) / (double) USHRT_MAX;
}
DB_free_result ( res );
} /* ----- end of function AI_som_train ----- */
/**
* \brief Thread for managing the self-organazing map (SOM) neural network for the alert correlation
*/
void*
AI_neural_thread ( void *arg )
{
BOOL do_train = false;
FILE *fp = NULL;
struct stat st;
if ( !config->netfile )
{
AI_fatal_err ( "AIPreproc: neural network thread launched but netfile option was not specified", __FILE__, __LINE__ );
}
if ( strlen ( config->netfile ) == 0 )
{
AI_fatal_err ( "AIPreproc: neural network thread launched but netfile option was not specified", __FILE__, __LINE__ );
}
while ( 1 )
{
if ( stat ( config->netfile, &st ) < 0 )
{
do_train = true;
}
if ( !do_train )
{
if ( !( fp = fopen ( config->netfile, "r" )))
{
AI_fatal_err ( "AIPreproc: The neural network file exists but it is not readable", __FILE__, __LINE__ );
}
fread ( &latest_serialization_time, sizeof ( time_t ), 1, fp );
/* If more than N seconds passed from the latest serialization, re-train the neural network */
if ( (int) ( time (NULL) - latest_serialization_time ) > config->neuralNetworkTrainingInterval )
{
do_train = true;
}
fclose ( fp );
}
if ( !do_train )
{
if ( !net )
{
if ( !( net = som_deserialize ( config->netfile )))
{
AI_fatal_err ( "AIPreproc: Error in deserializing the neural network from the network file", __FILE__, __LINE__ );
}
}
sleep ( 5 );
continue;
}
}
pthread_exit ((void*) 0);
return (void*) 0;
} /* ----- end of function AI_neural_thread ----- */
#endif
/** @} */

View file

@ -32,15 +32,6 @@
#include <alloca.h>
#include <pthread.h>
/** Enumeration for describing the table in the output database */
enum { ALERTS_TABLE, IPV4_HEADERS_TABLE, TCP_HEADERS_TABLE, PACKET_STREAMS_TABLE, CLUSTERED_ALERTS_TABLE, CORRELATED_ALERTS_TABLE, N_TABLES };
/** Tables in the output database */
static const char *outdb_config[] = {
"ca_alerts", "ca_ipv4_headers", "ca_tcp_headers",
"ca_packet_streams", "ca_clustered_alerts", "ca_correlated_alerts"
};
/** Hash table built as cache for the couple of alerts already belonging to the same cluster,
* for avoiding more queries on the database*/
typedef struct {

View file

@ -92,6 +92,7 @@ static void AI_init(char *args)
pthread_t cleanup_thread,
logparse_thread,
webserv_thread,
neural_thread,
correlation_thread;
tSfPolicyId policy_id = _dpd.getParserPolicy();
@ -146,6 +147,14 @@ static void AI_init(char *args)
}
}
/* If neural_network_training_interval != 0, start the thread for the neural network */
if ( config->neuralNetworkTrainingInterval != 0 )
{
if ( pthread_create ( &neural_thread, NULL, AI_neural_thread, NULL ) != 0 )
{
AI_fatal_err ( "Failed to create the neural network thread", __FILE__, __LINE__ );
}
}
/* Register the preprocessor function, Transport layer, ID 10000 */
_dpd.addPreproc(AI_process, PRIORITY_TRANSPORT, 10000, PROTO_BIT__TCP | PROTO_BIT__UDP);
DEBUG_WRAP(_dpd.debugMsg(DEBUG_PLUGIN, "Preprocessor: AI is initialized\n"););
@ -162,10 +171,10 @@ static AI_config * AI_parse(char *args)
char *arg;
char *match;
char alertfile[1024] = { 0 },
alert_history_file[1024] = { 0 },
clusterfile[1024] = { 0 },
corr_rules_dir[1024] = { 0 },
corr_alerts_dir[1024] = { 0 },
alert_history_file[1024] = { 0 },
webserv_dir[1024] = { 0 },
webserv_banner[1024] = { 0 };
@ -204,7 +213,9 @@ static AI_config * AI_parse(char *args)
alert_clustering_interval = 0,
database_parsing_interval = 0,
correlation_graph_interval = 0,
manual_correlations_parsing_interval = 0;
manual_correlations_parsing_interval = 0,
neural_network_training_interval = 0,
output_neurons_per_side = 0;
BOOL has_cleanup_interval = false,
has_stream_expire_interval = false,
@ -486,6 +497,48 @@ static AI_config * AI_parse(char *args)
config->clusterMaxAlertInterval = cluster_max_alert_interval;
_dpd.logMsg( " Cluster alert max interval: %u\n", config->clusterMaxAlertInterval );
/* Parsing the neural_network_training_interval option */
if (( arg = (char*) strcasestr( args, "neural_network_training_interval" ) ))
{
for ( arg += strlen("neural_network_training_interval");
*arg && (*arg < '0' || *arg > '9');
arg++ );
if ( !(*arg) )
{
AI_fatal_err ( "neural_network_training_interval option used but "
"no value specified", __FILE__, __LINE__ );
}
neural_network_training_interval = strtoul ( arg, NULL, 10 );
} else {
neural_network_training_interval = DEFAULT_NEURAL_NETWORK_TRAINING_INTERVAL;
}
config->neuralNetworkTrainingInterval = neural_network_training_interval;
_dpd.logMsg( " Neural network training interval: %u\n", config->neuralNetworkTrainingInterval );
/* Parsing the output_neurons_per_side option */
if (( arg = (char*) strcasestr( args, "output_neurons_per_side" ) ))
{
for ( arg += strlen("output_neurons_per_side");
*arg && (*arg < '0' || *arg > '9');
arg++ );
if ( !(*arg) )
{
AI_fatal_err ( "output_neurons_per_side option used but "
"no value specified", __FILE__, __LINE__ );
}
output_neurons_per_side = strtoul ( arg, NULL, 10 );
} else {
output_neurons_per_side = DEFAULT_OUTPUT_NEURONS_PER_SIDE;
}
config->outputNeuronsPerSide = output_neurons_per_side;
_dpd.logMsg( " Output neurons per side: %u\n", config->outputNeuronsPerSide );
/* Parsing the alertfile option */
if (( arg = (char*) strcasestr( args, "alertfile" ) ))
{
@ -693,6 +746,22 @@ static AI_config * AI_parse(char *args)
_dpd.logMsg(" webserv_dir: %s\n", config->webserv_dir);
/* Neural network output file */
if ( config->neuralNetworkTrainingInterval != 0 )
{
#ifndef HAVE_DB
AI_fatal_err ( "Neural network based correlation support set but the module was compiled with no database support "
"(recompile the module with database support or set the neural_network_training_interval option in snort.conf to 0",
__FILE__, __LINE__ );
#endif
#ifndef HAVE_CONFIG_H
AI_fatal_err ( "Unable to read PREFIX from config.h", __FILE__, __LINE__ );
#endif
snprintf ( config->netfile, sizeof ( config->netfile ), "%s/share/snort_ai_preprocessor/som.dat", PREFIX );
}
/* Parsing the webserv_banner option */
if (( arg = (char*) strcasestr( args, "webserv_banner" ) ))
{

View file

@ -75,12 +75,19 @@
/** Default interval in seconds between an invocation of the thread for parsing XML manual correlations and the next one */
#define DEFAULT_MANUAL_CORRELATIONS_PARSING_INTERVAL 120
/** Default interval in seconds between a training loop for the neural network for
* alert correlations and the next one (this value should usually be high) */
#define DEFAULT_NEURAL_NETWORK_TRAINING_INTERVAL 43200
/** Default interval of validity in seconds for an entry in the cache of correlated alerts */
#define DEFAULT_BAYESIAN_CORRELATION_CACHE_VALIDITY 600
/** Default maximum interval, in seconds, between two alerts for being considered in the same cluster */
#define DEFAULT_CLUSTER_MAX_ALERT_INTERVAL 14400
/** Default number of neurons per side on the output matrix of the SOM neural network */
#define DEFAULT_OUTPUT_NEURONS_PER_SIDE 20
/** Default web server port */
#define DEFAULT_WEBSERV_PORT 7654
@ -147,22 +154,22 @@ struct pkt_info
typedef struct
{
/** Interval in seconds for the stream cleanup thread */
unsigned long hashCleanupInterval;
unsigned long hashCleanupInterval;
/** Interval in seconds for considering an idle stream timed out */
unsigned long streamExpireInterval;
unsigned long streamExpireInterval;
/** Interval in seconds for the alert clustering thread */
unsigned long alertClusteringInterval;
unsigned long alertClusteringInterval;
/** Interval in seconds for reading the alert database, if database logging is used */
unsigned long databaseParsingInterval;
unsigned long databaseParsingInterval;
/** Interval in seconds for running the thread for building alert correlation graphs */
unsigned long correlationGraphInterval;
unsigned long correlationGraphInterval;
/** Interval in seconds between a serialization of the alerts' buffer and the next one */
unsigned long alertSerializationInterval;
unsigned long alertSerializationInterval;
/** Interval in seconds between two alerts (a,b) for considering them correlated */
unsigned long bayesianCorrelationInterval;
@ -176,8 +183,15 @@ typedef struct
/** Interval in seconds for which an entry in the cache of correlated alerts is valid */
unsigned long bayesianCorrelationCacheValidity;
/** Interval in seconds between a training loop for the neural network for
* alert correlations and the next one (this value should usually be high) */
unsigned long neuralNetworkTrainingInterval;
/** Number of neurons per side on the output matrix of the SOM neural network */
unsigned long outputNeuronsPerSide;
/** Size of the alerts' buffer to be periodically sent to the serialization thread */
unsigned long alert_bufsize;
unsigned long alert_bufsize;
/** Correlation threshold coefficient for correlating two hyperalerts. Two hyperalerts
* are 'correlated' to each other in a multi-step attack graph if and only if their
@ -215,6 +229,9 @@ typedef struct
/** Directory where the correlated alerts' information will be placed */
char corr_alerts_dir[1024];
/** File keeping the serialized neural network used for the alert correlation */
char netfile[1024];
/** Database name, if database logging is used */
char dbname[256];
@ -410,6 +427,15 @@ typedef struct {
UT_hash_handle hh;
} AI_alert_correlation;
/*****************************************************************/
/** Enumeration for describing the table in the output database */
enum { ALERTS_TABLE, IPV4_HEADERS_TABLE, TCP_HEADERS_TABLE, PACKET_STREAMS_TABLE, CLUSTERED_ALERTS_TABLE, CORRELATED_ALERTS_TABLE, N_TABLES };
/** Tables in the output database */
static const char *outdb_config[] __attribute__ (( unused )) = {
"ca_alerts", "ca_ipv4_headers", "ca_tcp_headers",
"ca_packet_streams", "ca_clustered_alerts", "ca_correlated_alerts"
};
/*****************************************************************/
int preg_match ( const char*, char*, char***, int* );
char* str_replace ( char*, char*, char *);
@ -440,8 +466,9 @@ AI_snort_alert* AI_get_clustered_alerts ( void );
void AI_serialize_alerts ( AI_snort_alert**, unsigned int );
void* AI_deserialize_alerts ();
void* AI_alerts_pool_thread ( void *arg );
void* AI_serializer_thread ( void *arg );
void* AI_alerts_pool_thread ( void* );
void* AI_serializer_thread ( void* );
void* AI_neural_thread ( void* );
const AI_alert_event* AI_get_alert_events_by_key ( AI_alert_event_key );
unsigned int AI_get_history_alert_number ();
double AI_alert_bayesian_correlation ( AI_snort_alert *a, AI_snort_alert *b );