diff --git a/README b/README index b653425..61468fd 100644 --- a/README +++ b/README @@ -188,12 +188,14 @@ preprocessor ai: \ database_parsing_interval 30 \ hashtable_cleanup_interval 300 \ manual_correlations_parsing_interval 120 \ + max_hash_pkt_number 1000 \ neural_clustering_interval 1200 \ neural_network_training_interval 43200 \ neural_train_steps 10 \ output_database ( type="dbtype", name="snort", user="snortusr", password="snortpass", host="dbhost" ) \ output_neurons_per_side 20 \ tcp_stream_expire_interval 300 \ + use_stream_hash_table 1 \ webserv_banner "Snort AIPreprocessor module" \ webserv_dir "/prefix/share/htdocs" \ webserv_port 7654 @@ -323,7 +325,13 @@ the alerts from database and the next one (default if not specified: 30 seconds) - hashtable_cleanup_interval: The interval that should occur from the cleanup of the hashtable of TCP streams and the next one (default if not specified: 300 - seconds) +seconds). Set this option to 0 for performing no cleanup on the stream hash +table + + +- max_hash_pkt_number: Maximum number of packets that each element of the stream +hash table should hold, set it to 0 for no limit (default value if not +specified: 1000) - manual_correlations_parsing_interval: Interval in seconds between an execution @@ -370,6 +378,12 @@ stream as "expired", if no more packets are received inside of that and it's not "marked" as suspicious (default if not specified: 300 seconds) +- use_stream_hash_table: Set this option to 0 if you do not want to use the hash +table for storing the streams of packets associated to alerts, this is a good +choice on a system where many alerts are triggered (default value if not +specified: 1) + + - webserv_banner: Banner of the web server, to be placed on the error pages and in the "Server" HTTP reply header diff --git a/alert_parser.c b/alert_parser.c index e770ff6..9abb40b 100644 --- a/alert_parser.c +++ b/alert_parser.c @@ -80,8 +80,6 @@ AI_serializer ( AI_snort_alert *alert ) void* AI_alerts_pool_thread ( void *arg ) { - pthread_t serializer_thread; - while ( 1 ) { sleep ( config->alertSerializationInterval ); @@ -138,8 +136,6 @@ AI_file_alertparser_thread ( void* arg ) BOOL in_alert = false; pthread_t alerts_pool_thread; - pthread_t serializer_thread; - pthread_t db_thread; /* Initialize the mutex lock, so nobody can read the alerts while we write there */ pthread_mutex_init ( &alert_mutex, NULL ); diff --git a/cluster.c b/cluster.c index f9626a3..d623965 100644 --- a/cluster.c +++ b/cluster.c @@ -262,7 +262,6 @@ __AI_merge_alerts ( AI_snort_alert **log ) { AI_snort_alert *tmp, *tmp2, *tmp3; AI_alerts_couple *alerts_couple; - pthread_t db_thread; int count = 0; for ( tmp = *log; tmp; tmp = tmp->next ) diff --git a/correlation.c b/correlation.c index 5f4dc6a..80d7654 100644 --- a/correlation.c +++ b/correlation.c @@ -1238,8 +1238,7 @@ AI_alert_correlation_thread ( void *arg ) AI_snort_alert *alert_iterator = NULL, *alert_iterator2 = NULL; - pthread_t db_thread, - manual_corr_thread; + pthread_t manual_corr_thread; #ifdef HAVE_LIBGVC char corr_png_file[4096] = { 0 }; diff --git a/db.c b/db.c index 4782641..00baba0 100644 --- a/db.c +++ b/db.c @@ -45,7 +45,6 @@ AI_db_alertparser_thread ( void *arg ) int latest_cid = 0; time_t latest_time = time ( NULL ); pthread_t alerts_pool_thread; - pthread_t serializer_thread; DB_result res, res2; DB_row row, row2; diff --git a/spp_ai.c b/spp_ai.c index 7d0f2ef..ebaa122 100644 --- a/spp_ai.c +++ b/spp_ai.c @@ -218,11 +218,13 @@ static AI_config * AI_parse(char *args) correlation_graph_interval = 0, database_parsing_interval = 0, manual_correlations_parsing_interval = 0, + max_hash_pkt_number = 0, neural_clustering_interval = 0, neural_network_training_interval = 0, neural_train_steps = 0, output_neurons_per_side = 0, stream_expire_interval = 0, + use_stream_hash_table = 0, webserv_banner_len = 0, webserv_dir_len = 0; @@ -591,6 +593,48 @@ static AI_config * AI_parse(char *args) config->neural_train_steps = neural_train_steps; _dpd.logMsg( " Neural train steps: %u\n", config->neural_train_steps ); + /* Parsing the max_hash_pkt_number option */ + if (( arg = (char*) strcasestr( args, "max_hash_pkt_number" ) )) + { + for ( arg += strlen("max_hash_pkt_number"); + *arg && (*arg < '0' || *arg > '9'); + arg++ ); + + if ( !(*arg) ) + { + AI_fatal_err ( "max_hash_pkt_number option used but " + "no value specified", __FILE__, __LINE__ ); + } + + max_hash_pkt_number = strtoul ( arg, NULL, 10 ); + } else { + max_hash_pkt_number = DEFAULT_MAX_HASH_PKT_NUMBER; + } + + config->max_hash_pkt_number = max_hash_pkt_number; + _dpd.logMsg( " Maximum number of packets stored in the hash table: %u\n", config->max_hash_pkt_number ); + + /* Parsing the use_stream_hash_table option */ + if (( arg = (char*) strcasestr( args, "use_stream_hash_table" ) )) + { + for ( arg += strlen("use_stream_hash_table"); + *arg && (*arg < '0' || *arg > '9'); + arg++ ); + + if ( !(*arg) ) + { + AI_fatal_err ( "use_stream_hash_table option used but " + "no value specified", __FILE__, __LINE__ ); + } + + use_stream_hash_table = strtoul ( arg, NULL, 10 ); + } else { + use_stream_hash_table = DEFAULT_USE_STREAM_HASH_TABLE; + } + + config->use_stream_hash_table = use_stream_hash_table; + _dpd.logMsg( " Using the stream hash table: %u\n", config->use_stream_hash_table ); + /* Parsing the alert_correlation_weight option */ if (( arg = (char*) strcasestr( args, "alert_correlation_weight" ) )) { diff --git a/spp_ai.h b/spp_ai.h index 8384115..f0cbe24 100644 --- a/spp_ai.h +++ b/spp_ai.h @@ -98,11 +98,18 @@ /** Default number of steps used for training the neural network */ #define DEFAULT_NEURAL_TRAIN_STEPS 10 +/** Default maximum number of packets that an observed stream in the hash table should hold */ +#define DEFAULT_MAX_HASH_PKT_NUMBER 1000 + /** Default number of alerts needed in the history file or database for letting a certain * heuristic correlation index weight be =~ 0.95 (the weight monotonically increases * with the number of alerts according to a hyperbolic tangent function) */ #define DEFAULT_ALERT_CORRELATION_WEIGHT 5000 +/** Default setting for the use of the hash table for holding streams of packets + * associated to a certain alert (0 = do not use, 1 or any value != 0: use) */ +#define DEFAULT_USE_STREAM_HASH_TABLE 1 + /** Default web server port */ #define DEFAULT_WEBSERV_PORT 7654 @@ -164,6 +171,9 @@ struct pkt_info /** Flag set if the packet is observed, i.e. associated to a security alert */ BOOL observed; + /** Number of packets in the current flow, if available */ + unsigned int n_packets; + /** Make the struct 'hashable' */ UT_hash_handle hh; }; @@ -218,11 +228,18 @@ typedef struct * with the number of alerts according to a hyperbolic tangent function) */ unsigned long alert_correlation_weight; + /** Maximum number of packets that an observed stream in the hash table should hold */ + unsigned long max_hash_pkt_number; + /** Number of steps used for training the neural network */ unsigned long neural_train_steps; /** Size of the alerts' buffer to be periodically sent to the serialization thread */ unsigned long alert_bufsize; + + /** Setting for the use of the hash table for holding streams of packets + * associated to a certain alert (0 = do not use, 1 or any value != 0: use) */ + unsigned long use_stream_hash_table; /** Correlation threshold coefficient for correlating two hyperalerts. Two hyperalerts * are 'correlated' to each other in a multi-step attack graph if and only if their diff --git a/stream.c b/stream.c index 7c899b8..e040934 100644 --- a/stream.c +++ b/stream.c @@ -98,8 +98,16 @@ __AI_stream_free ( struct pkt_info* stream ) void* AI_hashcleanup_thread ( void* arg ) { - struct pkt_info *h, *stream; - time_t max_timestamp; + struct pkt_info *h, *stream, *tmp; + time_t max_timestamp; + int pkt_count, pkt_rm; + BOOL has_old_streams; + + if ( config->hashCleanupInterval == 0 ) + { + pthread_exit ((void*) 0); + return (void*) 0; + } while ( 1 ) { /* Sleep for the specified number of seconds */ @@ -109,31 +117,69 @@ AI_hashcleanup_thread ( void* arg ) if ( !hash || !HASH_COUNT(hash) ) continue; - /* Check all the streams in the hash */ - for ( h = hash; h; h = (struct pkt_info*) h->next ) { - if ( h->observed ) continue; - max_timestamp = 0; + has_old_streams = true; - /* Find the maximum timestamp in the flow */ - for ( stream = h; stream; stream = (struct pkt_info*) stream->next ) { - if ( stream->timestamp > max_timestamp ) - max_timestamp = stream->timestamp; - } + while ( has_old_streams ) + { + has_old_streams = false; - /* If the most recent packet in the stream is older than the specified threshold, remove that stream */ - if ( time(NULL) - max_timestamp > config->streamExpireInterval ) { - stream = h; - - if ( stream ) + /* Check all the streams in the hash */ + for ( h = hash; h; h = (struct pkt_info*) h->next ) { + if ( h->observed ) { - /* XXX This, sometimes, randomly, leads to the crash of the module. - * WHY??? Why can't computer science be deterministic, and if a - * certain not-NULL stream exists in a not-NULL hash table why - * should there be a crash, one day yes and the next one no? Until - * I won't find an answer to these enigmatic questions, I will leave - * this code commented, so if a certain stream goes timeout it won't - * be removed. I'm sorry but it's not my fault. Ask the karma about this */ - /* __AI_stream_free ( stream ); */ + continue; + } + + if ( h->next ) + { + if ( h->next->observed ) + { + if ( config->max_hash_pkt_number != 0 ) + { + if ( h->next->n_packets == 0 ) + { + for ( stream = h->next, pkt_count=0; stream; stream = (struct pkt_info*) stream->next, pkt_count++ ); + h->next->n_packets = pkt_count; + } else { + pkt_count = h->next->n_packets; + } + + /* If this stream has too many packets inside, remove the oldest ones */ + if ( pkt_count > config->max_hash_pkt_number ) + { + for ( stream = h->next, pkt_rm = 0; + stream && pkt_rm < pkt_count - config->max_hash_pkt_number; + stream = stream->next, pkt_rm++ ) + { + tmp = stream->next; + __AI_stream_free ( stream ); + stream = tmp; + } + + h->next = stream; + } + } + } + } + + max_timestamp = 0; + + /* Find the maximum timestamp in the flow */ + for ( stream = h; stream; stream = (struct pkt_info*) stream->next ) { + if ( stream->timestamp > max_timestamp ) + max_timestamp = stream->timestamp; + } + + /* If the most recent packet in the stream is older than the specified threshold, remove that stream */ + + if ( time (NULL) - max_timestamp > config->streamExpireInterval ) { + has_old_streams = true; + stream = h; + + if ( stream ) + { + __AI_stream_free ( stream ); + } } } } @@ -163,12 +209,18 @@ AI_pkt_enqueue ( SFSnortPacket* pkt ) if ( start_time == 0 ) start_time = time (NULL); + /* If we are not using the stream hash table, just return */ + if ( config->use_stream_hash_table == 0 ) + return; + /* If this is not an IP and/or TCP packet, it's not for me */ if ( !( pkt->ip4_header && pkt->tcp_header )) return; if ( !( info = (struct pkt_info*) malloc( sizeof(struct pkt_info) )) ) + { AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ ); + } memset ( &key, 0, sizeof(struct pkt_key)); key.src_ip = pkt->ip4_header->source.s_addr; @@ -177,10 +229,13 @@ AI_pkt_enqueue ( SFSnortPacket* pkt ) info->key = key; info->timestamp = time(NULL); info->observed = false; + info->n_packets = 0; info->next = NULL; if ( !( info->pkt = (SFSnortPacket*) malloc ( sizeof (SFSnortPacket) )) ) + { AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ ); + } memcpy ( info->pkt, pkt, sizeof (SFSnortPacket) ); @@ -193,8 +248,12 @@ AI_pkt_enqueue ( SFSnortPacket* pkt ) /* If there is already an element of this traffic stream in my hash table, * append the packet just received to this stream*/ if ( found ) { - /* If the current packet contains a RST, just deallocate the stream */ - if ( info->pkt->tcp_header->flags & TCPHEADER_RST ) { + /* If the current packet contains a RST or a FIN, just deallocate the stream */ + if ( + ( info->pkt->tcp_header->flags & TCPHEADER_RST ) || + (( info->pkt->tcp_header->flags & TCPHEADER_FIN ) && + ( info->pkt->tcp_header->flags & TCPHEADER_ACK )) + ) { pthread_mutex_lock ( &hash_mutex ); HASH_FIND ( hh, hash, &key, sizeof(struct pkt_key), found ); pthread_mutex_unlock ( &hash_mutex ); @@ -227,23 +286,6 @@ AI_pkt_enqueue ( SFSnortPacket* pkt ) if ( !tmp ) { found->next = info; } - - /* If the current packet contains an ACK and the latest one - * in this stream contained a FIN, then the communication - * on this stream is over */ - if ( found->pkt->tcp_header->flags & TCPHEADER_FIN ) { - if ( info->pkt->tcp_header->flags & TCPHEADER_ACK ) { - pthread_mutex_unlock ( &hash_mutex ); - HASH_FIND ( hh, hash, &key, sizeof(struct pkt_key), found ); - pthread_mutex_unlock ( &hash_mutex ); - - if ( found ) { - if ( !found->observed ) { - __AI_stream_free ( found ); - } - } - } - } } } else { /* If the packet contains the ACK flag, no payload and it is