/* * ===================================================================================== * * Filename: neural.c * * Description: Manage the alert correlation based on SOM neural network * * Version: 0.1 * Created: 21/10/2010 08:51:28 * Revision: none * Compiler: gcc * * Author: BlackLight (http://0x00.ath.cx), * Licence: GNU GPL v.3 * Company: DO WHAT YOU WANT CAUSE A PIRATE IS FREE, YOU ARE A PIRATE! * * ===================================================================================== */ #include "spp_ai.h" /** \defgroup neural Module for the neural network-based alert correlation * @{ */ #ifdef HAVE_DB #include "db.h" #include "fsom.h" #include #include #include #include #include #include /** Enumeration for the input fields of the SOM neural network */ enum { som_src_ip, som_dst_ip, som_src_port, som_dst_port, som_time, som_gid, som_sid, som_rev, SOM_NUM_ITEMS }; PRIVATE time_t latest_serialization_time = ( time_t ) 0; PRIVATE som_network_t *net = NULL; PRIVATE AI_alerts_per_neuron *alerts_per_neuron = NULL; PRIVATE pthread_mutex_t neural_mutex; /** * \brief Get the hash table containing the alerts associated to each output neuron * \return The hash table */ AI_alerts_per_neuron* AI_get_alerts_per_neuron () { return alerts_per_neuron; } /* ----- end of function AI_get_alerts_per_neuron ----- */ /** * \brief Get the current weight of the neural correlation index using a hyperbolic tangent function with a parameter expressed in function of the current number of alerts in the database * \return The weight of the correlation index ( 0 <= weight < 1 ) */ double AI_neural_correlation_weight () { DB_result res; DB_row row; char query[1024] = { 0 }; double x = 0, k = (double) config->alert_correlation_weight / HYPERBOLIC_TANGENT_SOLUTION; pthread_mutex_lock ( &outdb_mutex ); if ( !DB_out_init() ) { pthread_mutex_unlock ( &outdb_mutex ); AI_fatal_err ( "Unable to connect to the database specified in module configuration", __FILE__, __LINE__ ); } pthread_mutex_unlock ( &outdb_mutex ); snprintf ( query, sizeof ( query ), "SELECT count(*) FROM %s", outdb_config[ALERTS_TABLE] ); pthread_mutex_lock ( &outdb_mutex ); if ( !( res = (DB_result) DB_out_query ( query ))) { _dpd.errMsg ( "Warning: Database error while executing the query '%s'\n", query ); pthread_mutex_unlock ( &outdb_mutex ); return 0.0; } pthread_mutex_unlock ( &outdb_mutex ); row = (DB_row) DB_fetch_row ( res ); x = strtod ( row[0], NULL ); DB_free_result ( res ); return (( exp(x/k) - exp(-x/k) ) / ( exp(x/k) + exp(-x/k) )); } /* ----- end of function AI_neural_correlation_weight ----- */ /** * \brief Convert an alert row fetched from db to a vector suitable for being elaborated by the SOM neural network * \param alert AI_som_alert_tuple object identifying the alert tuple * \param data Reference to the vector that will contain the SOM data */ PRIVATE void __AI_alert_to_som_data ( const AI_som_alert_tuple alert, double **input ) { (*input)[som_gid] = (double) alert.gid / (double) USHRT_MAX; (*input)[som_sid] = (double) alert.sid / (double) USHRT_MAX; (*input)[som_rev] = (double) alert.rev / (double) USHRT_MAX; (*input)[som_time] = (double) alert.timestamp / (double) INT_MAX; (*input)[som_src_ip] = (double) alert.src_ip_addr / (double) UINT_MAX; (*input)[som_dst_ip] = (double) alert.dst_ip_addr / (double) UINT_MAX; (*input)[som_src_port] = (double) alert.src_port / (double) USHRT_MAX; (*input)[som_dst_port] = (double) alert.dst_port / (double) USHRT_MAX; } /* ----- end of function __AI_alert_to_som_data ----- */ /** * \brief Get the distance between two alerts mapped on the SOM neural network * \param alert1 Tuple identifying the first alert * \param alert2 Tuple identifying the second alert * \return The distance between the alerts */ PRIVATE double __AI_som_alert_distance ( const AI_som_alert_tuple alert1, const AI_som_alert_tuple alert2 ) { double *input1 = NULL, *input2 = NULL; size_t x1 = 0, y1 = 0, x2 = 0, y2 = 0; int i; BOOL is_found = false; AI_alerts_per_neuron *found = NULL; AI_alerts_per_neuron_key key; if ( !( input1 = (double*) alloca ( SOM_NUM_ITEMS * sizeof ( double )))) { AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ ); } if ( !( input2 = (double*) alloca ( SOM_NUM_ITEMS * sizeof ( double )))) { AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ ); } if ( !net ) { return 0.0; } __AI_alert_to_som_data ( alert1, &input1 ); __AI_alert_to_som_data ( alert2, &input2 ); pthread_mutex_lock ( &neural_mutex ); som_set_inputs ( net, input1 ); som_get_best_neuron_coordinates ( net, &x1, &y1 ); som_set_inputs ( net, input2 ); som_get_best_neuron_coordinates ( net, &x2, &y2 ); pthread_mutex_unlock ( &neural_mutex ); /* Check if there are already entries in the hash table for these two neurons, otherwise * it creates them and append these two alerts */ key.x = x1; key.y = y1; HASH_FIND ( hh, alerts_per_neuron, &key, sizeof ( key ), found ); if ( !found ) { if ( !( found = (AI_alerts_per_neuron*) calloc ( 1, sizeof ( AI_alerts_per_neuron )))) { AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ ); } found->key = key; found->n_alerts = 1; if ( !( found->alerts = (AI_som_alert_tuple*) calloc ( 1, sizeof ( AI_som_alert_tuple )))) { AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ ); } found->alerts[0] = alert1; HASH_ADD ( hh, alerts_per_neuron, key, sizeof ( key ), found ); } else { is_found = false; for ( i=0; i < found->n_alerts && !is_found; i++ ) { if ( alert1.gid == found->alerts[i].gid && alert1.sid == found->alerts[i].sid && alert1.rev == found->alerts[i].rev && alert1.src_ip_addr == found->alerts[i].src_ip_addr && alert1.dst_ip_addr == found->alerts[i].dst_ip_addr && alert1.src_port == found->alerts[i].src_port && alert1.dst_port == found->alerts[i].dst_port ) { is_found = true; } } if ( !is_found ) { if ( !( found->alerts = (AI_som_alert_tuple*) realloc ( found->alerts, (++(found->n_alerts)) * sizeof ( AI_som_alert_tuple )))) { AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ ); } found->alerts[ found->n_alerts - 1 ] = alert1; } } key.x = x2; key.y = y2; HASH_FIND ( hh, alerts_per_neuron, &key, sizeof ( key ), found ); if ( !found ) { if ( !( found = (AI_alerts_per_neuron*) calloc ( 1, sizeof ( AI_alerts_per_neuron )))) { AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ ); } found->key = key; found->n_alerts = 1; if ( !( found->alerts = (AI_som_alert_tuple*) calloc ( 1, sizeof ( AI_som_alert_tuple )))) { AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ ); } found->alerts[0] = alert2; HASH_ADD ( hh, alerts_per_neuron, key, sizeof ( key ), found ); } else { is_found = false; for ( i=0; i < found->n_alerts && !is_found; i++ ) { if ( alert2.gid == found->alerts[i].gid && alert2.sid == found->alerts[i].sid && alert2.rev == found->alerts[i].rev && alert2.src_ip_addr == found->alerts[i].src_ip_addr && alert2.dst_ip_addr == found->alerts[i].dst_ip_addr && alert2.src_port == found->alerts[i].src_port && alert2.dst_port == found->alerts[i].dst_port ) { is_found = true; } } if ( !is_found ) { if ( !( found->alerts = (AI_som_alert_tuple*) realloc ( found->alerts, (++(found->n_alerts)) * sizeof ( AI_som_alert_tuple )))) { AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ ); } found->alerts[ found->n_alerts - 1 ] = alert2; } } /* Return the normalized euclidean distance in [0,1] (the normalization is made considering that the maximum distance * between two points on the output neurons matrix is the distance between the upper-left and bottom-right points) */ /* return sqrt ((double) ( (x2-x1)*(x2-x1) + (y2-y1)*(y2-y1) )) / */ /* sqrt ((double) ( 2 * (config->outputNeuronsPerSide-1) * (config->outputNeuronsPerSide-1) )); */ return sqrt ((double) ( (x2-x1)*(x2-x1) + (y2-y1)*(y2-y1) )); } /* ----- end of function __AI_som_alert_distance ----- */ /** * \brief Get the SOM neural correlation between two alerts given as AI_snort_alert objects * \param a First alert * \param b Second alert * \return The correlation between a and b computed by the neural network */ double AI_alert_neural_som_correlation ( const AI_snort_alert *a, const AI_snort_alert *b ) { AI_som_alert_tuple t1, t2; double distance = 0.0, max_distance = 0.0; t1.gid = a->gid; t1.sid = a->sid; t1.rev = a->rev; t1.src_ip_addr = ntohl ( a->ip_src_addr ); t1.dst_ip_addr = ntohl ( a->ip_dst_addr ); t1.src_port = ntohs ( a->tcp_src_port ); t1.dst_port = ntohs ( a->tcp_dst_port ); t1.timestamp = a->timestamp; t1.desc = a->desc; t2.gid = b->gid; t2.sid = b->sid; t2.rev = b->rev; t2.src_ip_addr = ntohl ( b->ip_src_addr ); t2.dst_ip_addr = ntohl ( b->ip_dst_addr ); t2.src_port = ntohs ( b->tcp_src_port ); t2.dst_port = ntohs ( b->tcp_dst_port ); t2.timestamp = b->timestamp; t2.desc = b->desc; distance = __AI_som_alert_distance ( t1, t2 ); max_distance = sqrt ((double) ( 2 * (config->outputNeuronsPerSide-1) * (config->outputNeuronsPerSide-1) )); return (( distance == max_distance ) ? 0.0 : ( 1.0 / ( 1.0 + distance ))); } /* ----- end of function AI_alert_neural_som_correlation ----- */ /** * \brief Train the neural network taking the alerts from the latest serialization time */ PRIVATE void __AI_som_train () { double **inputs = NULL; char query[1024] = { 0 }; size_t i = 0, num_rows = 0; DB_result res; DB_row row; AI_som_alert_tuple *tuples = NULL; pthread_mutex_lock ( &outdb_mutex ); if ( !DB_out_init() ) { pthread_mutex_unlock ( &outdb_mutex ); AI_fatal_err ( "Unable to connect to the database specified in module configuration", __FILE__, __LINE__ ); } pthread_mutex_unlock ( &outdb_mutex ); #ifdef HAVE_LIBMYSQLCLIENT snprintf ( query, sizeof ( query ), "SELECT gid, sid, rev, unix_timestamp(timestamp), ip_src_addr, ip_dst_addr, tcp_src_port, tcp_dst_port " "FROM (%s a LEFT JOIN %s ip ON a.ip_hdr=ip.ip_hdr_id) LEFT JOIN %s tcp " "ON a.tcp_hdr=tcp.tcp_hdr_id " "WHERE unix_timestamp(timestamp) >= %lu", outdb_config[ALERTS_TABLE], outdb_config[IPV4_HEADERS_TABLE], outdb_config[TCP_HEADERS_TABLE], latest_serialization_time ); #elif HAVE_LIBPQ snprintf ( query, sizeof ( query ), "SELECT gid, sid, rev, date_part('epoch', \"timestamp\"(timestamp)), ip_src_addr, ip_dst_addr, tcp_src_port, tcp_dst_port " "FROM (%s a LEFT JOIN %s ip ON a.ip_hdr=ip.ip_hdr_id) LEFT JOIN %s tcp " "ON a.tcp_hdr=tcp.tcp_hdr_id " "WHERE date_part ('epoch', \"timestamp\"(timestamp)) >= %lu", outdb_config[ALERTS_TABLE], outdb_config[IPV4_HEADERS_TABLE], outdb_config[TCP_HEADERS_TABLE], latest_serialization_time ); #endif pthread_mutex_lock ( &outdb_mutex ); if ( !( res = (DB_result) DB_out_query ( query ))) { _dpd.errMsg ( "Warning: Database error while executing the query '%s'\n", query ); pthread_mutex_unlock ( &outdb_mutex ); return; } pthread_mutex_unlock ( &outdb_mutex ); num_rows = DB_num_rows ( res ); if ( num_rows == 0 ) { DB_free_result ( res ); latest_serialization_time = time ( NULL ); return; } if ( !( inputs = (double**) alloca ( num_rows * sizeof ( double* )))) { AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ ); } if ( !( tuples = (AI_som_alert_tuple*) alloca ( num_rows * sizeof ( AI_som_alert_tuple )))) { AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ ); } for ( i=0; i < num_rows; i++ ) { row = (DB_row) DB_fetch_row ( res ); tuples[i].gid = row[0] ? strtoul ( row[0], NULL, 10 ) : 0; tuples[i].sid = row[1] ? strtoul ( row[1], NULL, 10 ) : 0; tuples[i].rev = row[2] ? strtoul ( row[2], NULL, 10 ) : 0; tuples[i].timestamp = row[3] ? (time_t) strtol ( row[3], NULL, 10 ) : (time_t) 0; tuples[i].src_ip_addr = row[4] ? ntohl ( inet_addr ( row[4] )) : 0; tuples[i].dst_ip_addr = row[5] ? ntohl ( inet_addr ( row[5] )) : 0; tuples[i].src_port = row[6] ? (uint16_t) strtoul ( row[6], NULL, 10 ) : 0; tuples[i].dst_port = row[7] ? (uint16_t) strtoul ( row[7], NULL, 10 ) : 0; if ( !( inputs[i] = (double*) alloca ( SOM_NUM_ITEMS * sizeof ( double )))) { AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ ); } __AI_alert_to_som_data ( tuples[i], &inputs[i] ); } DB_free_result ( res ); pthread_mutex_lock ( &neural_mutex ); if ( !net ) { if ( !( net = som_network_new ( SOM_NUM_ITEMS, config->outputNeuronsPerSide, config->outputNeuronsPerSide ))) { pthread_mutex_unlock ( &neural_mutex ); AI_fatal_err ( "AIPreproc: Could not create the neural network", __FILE__, __LINE__ ); } som_init_weights ( net, inputs, num_rows ); som_train ( net, inputs, num_rows, config->neural_train_steps ); } else { som_train ( net, inputs, num_rows, config->neural_train_steps ); } pthread_mutex_unlock ( &neural_mutex ); latest_serialization_time = time ( NULL ); net->serialization_time = latest_serialization_time; som_serialize ( net, config->netfile ); } /* ----- end of function __AI_som_train ----- */ /** * \brief Thread for managing the self-organazing map (SOM) neural network for the alert correlation */ void* AI_neural_thread ( void *arg ) { struct stat st; BOOL do_train = false; pthread_t neural_clustering_thread; pthread_mutex_init ( &neural_mutex, NULL ); if ( !config->netfile ) { AI_fatal_err ( "AIPreproc: neural network thread launched but netfile option was not specified", __FILE__, __LINE__ ); } if ( strlen ( config->netfile ) == 0 ) { AI_fatal_err ( "AIPreproc: neural network thread launched but netfile option was not specified", __FILE__, __LINE__ ); } if ( config->neuralClusteringInterval != 0 ) { if ( pthread_create ( &neural_clustering_thread, NULL, AI_neural_clustering_thread, NULL ) != 0 ) { AI_fatal_err ( "Failed to create the manual correlations parsing thread", __FILE__, __LINE__ ); } } while ( 1 ) { if ( stat ( config->netfile, &st ) < 0 ) { do_train = true; } else { if ( !( net = som_deserialize ( config->netfile ))) { AI_fatal_err ( "AIPreproc: Error in deserializing the neural network from the network file", __FILE__, __LINE__ ); } /* If more than N seconds passed from the latest serialization, re-train the neural network */ if ( (int) ( time (NULL) - net->serialization_time ) > config->neuralNetworkTrainingInterval ) { do_train = true; } } if ( do_train ) { __AI_som_train(); } sleep ( config->neuralNetworkTrainingInterval ); } pthread_exit ((void*) 0); return (void*) 0; } /* ----- end of function AI_neural_thread ----- */ #endif /** @} */