diff --git a/README b/README index b8053f8..7c8b30e 100644 --- a/README +++ b/README @@ -160,6 +160,7 @@ preprocessor ai: \ correlation_threshold_coefficient 0.5 \ database ( type="mysql", name="snort", user="snortusr", password="snortpass", host="dbhost" ) \ database_parsing_interval 30 \ + cluster_max_alert_interval 14400 \ clusterfile "/your/snort/dir/log/clustered_alerts" \ cluster ( class="dst_port", name="privileged_ports", range="1-1023" ) \ cluster ( class="dst_port", name="unprivileged_ports", range="1024-65535" ) \ @@ -253,6 +254,11 @@ the alerts from database and the next one (default if not specified: 30 seconds) - clusterfile: File where the clustered alerts will be saved by the module (default if not specified: /var/log/snort/clustered_alerts) +- cluster_max_alert_interval: Maximum time interval, in seconds, occurred +between two alerts for considering them as part of the same cluster (default: +14400 seconds, i.e. 4 hours). Specify 0 for this option if you want to +cluster alerts regardlessly of how much time occurred between them + - cluster: Clustering hierarchy or list of hierarchies to be applied for grouping similar alerts. This option needs to specify: -- class: Class of the cluster node. It may be src_addr, dst_addr, src_port diff --git a/TODO b/TODO index 24f146f..391881b 100644 --- a/TODO +++ b/TODO @@ -2,12 +2,12 @@ AVERAGE/HIGH PRIORITY: ====================== -- Clustering alerts with time constraints +- Save clusters and correlations to db - Web interface - Code profiling +- Comment all the code!!! - Saving packet flows as .pcap - Neural network for computing k -- Isolating independant subgraphs from hyperalert correlation graphs - Testing more scenarios, making more hyperalert models ============= @@ -30,4 +30,5 @@ DONE: + Add alerts' history serialization to db.c as well + Bayesian learning among alerts in alert log + Split bayesian correlation out of correlation.c ++ Clustering alerts with time constraints diff --git a/alert_history.c b/alert_history.c index a7af208..e829038 100644 --- a/alert_history.c +++ b/alert_history.c @@ -19,6 +19,7 @@ #include "spp_ai.h" +#include #include /** \defgroup alert_history Manage the serialization and deserialization of alert history to the history file diff --git a/bayesian.c b/bayesian.c index 67030e8..18f18aa 100644 --- a/bayesian.c +++ b/bayesian.c @@ -159,7 +159,6 @@ AI_alert_bayesian_correlation ( AI_snort_alert *a, AI_snort_alert *b ) found->latest_computation_time = time ( NULL ); } - /* _dpd.logMsg ( "Correlation ('%s') -> ('%s'): %f\\n", a->desc, b->desc, corr ); */ return corr; } /* ----- end of function AI_alert_bayesian_correlation ----- */ diff --git a/cluster.c b/cluster.c index 7a4ef6b..9f225ad 100644 --- a/cluster.c +++ b/cluster.c @@ -213,7 +213,7 @@ _AI_get_min_hierarchy_node ( int val, hierarchy_node *root ) */ PRIVATE BOOL -_AI_equal_alarms ( AI_snort_alert *a1, AI_snort_alert *a2 ) +_AI_equal_alerts ( AI_snort_alert *a1, AI_snort_alert *a2 ) { if ( a1->gid != a2->gid || a1->sid != a2->sid || a1->rev != a2->rev ) { @@ -249,7 +249,7 @@ _AI_equal_alarms ( AI_snort_alert *a1, AI_snort_alert *a2 ) } return true; -} /* ----- end of function _AI_equal_alarms ----- */ +} /* ----- end of function _AI_equal_alerts ----- */ /** @@ -270,26 +270,24 @@ _AI_merge_alerts ( AI_snort_alert **log ) { if ( tmp2->next ) { - if ( !( - tmp->gid == tmp2->next->gid && - tmp->sid == tmp2->next->sid && - tmp->rev == tmp2->next->rev && - tmp->timestamp == tmp2->next->timestamp && - tmp->ip_src_addr == tmp2->next->ip_src_addr && - tmp->ip_dst_addr == tmp2->next->ip_dst_addr && - tmp->tcp_src_port == tmp2->next->tcp_src_port && - tmp->tcp_dst_port == tmp2->next->tcp_dst_port )) + /* If the two alerts are in the same clustering time window (if a time window was defined...) */ + if ( config->clusterMaxAlertInterval == 0 || + ( config->clusterMaxAlertInterval > 0 && abs ( tmp->timestamp - tmp2->next->timestamp ) <= config->clusterMaxAlertInterval )) { - if ( _AI_equal_alarms ( tmp, tmp2->next )) + if ( tmp != tmp2->next ) { - if ( !( tmp->grouped_alerts = ( AI_snort_alert** ) realloc ( tmp->grouped_alerts, (++(tmp->grouped_alerts_count)) * sizeof ( AI_snort_alert* )))) - _dpd.fatalMsg ( "AIPreproc: Fatal dynamic memory allocation error at %s:%d\n", __FILE__, __LINE__ ); + /* If the two alerts are equal... */ + if ( _AI_equal_alerts ( tmp, tmp2->next )) + { + if ( !( tmp->grouped_alerts = ( AI_snort_alert** ) realloc ( tmp->grouped_alerts, (++(tmp->grouped_alerts_count)) * sizeof ( AI_snort_alert* )))) + _dpd.fatalMsg ( "AIPreproc: Fatal dynamic memory allocation error at %s:%d\n", __FILE__, __LINE__ ); - tmp->grouped_alerts[ tmp->grouped_alerts_count - 1 ] = tmp2->next; - count++; + tmp->grouped_alerts[ tmp->grouped_alerts_count - 1 ] = tmp2->next; + count++; - tmp3 = tmp2->next->next; - tmp2->next = tmp3; + tmp3 = tmp2->next->next; + tmp2->next = tmp3; + } } } diff --git a/spp_ai.c b/spp_ai.c index 9136487..d06fa9b 100644 --- a/spp_ai.c +++ b/spp_ai.c @@ -166,6 +166,7 @@ static AI_config * AI_parse(char *args) bayesian_correlation_interval = 0, bayesian_correlation_cache_validity = 0, clusterfile_len = 0, + cluster_max_alert_interval = 0, corr_rules_dir_len = 0, corr_alerts_dir_len = 0, alert_clustering_interval = 0, @@ -182,6 +183,7 @@ static AI_config * AI_parse(char *args) has_corr_rules_dir = false, has_clustering = false, has_database_log = false, + has_database_output = false, has_alert_history_file = false; if ( !( config = ( AI_config* ) malloc ( sizeof( AI_config )) )) @@ -388,6 +390,27 @@ static AI_config * AI_parse(char *args) _dpd.logMsg( " Bayesian cache validity interval: %u\n", config->bayesianCorrelationCacheValidity ); + /* Parsing the cluster_max_alert_interval option */ + if (( arg = (char*) strcasestr( args, "cluster_max_alert_interval" ) )) + { + for ( arg += strlen("cluster_max_alert_interval"); + *arg && (*arg < '0' || *arg > '9'); + arg++ ); + + if ( !(*arg) ) + { + _dpd.fatalMsg("AIPreproc: cluster_max_alert_interval option used but " + "no value specified\n"); + } + + cluster_max_alert_interval = strtoul ( arg, NULL, 10 ); + } else { + cluster_max_alert_interval = DEFAULT_CLUSTER_MAX_ALERT_INTERVAL; + } + + config->clusterMaxAlertInterval = cluster_max_alert_interval; + _dpd.logMsg( " Cluster alert max interval: %u\n", config->clusterMaxAlertInterval ); + /* Parsing the alertfile option */ if (( arg = (char*) strcasestr( args, "alertfile" ) )) { @@ -550,7 +573,7 @@ static AI_config * AI_parse(char *args) } /* Parsing database option */ - if ( preg_match ( "\\s*database\\s*\\(\\s*([^\\)]+)\\)", args, &matches, &nmatches ) > 0 ) + if ( preg_match ( "\\s+database\\s*\\(\\s*([^\\)]+)\\)", args, &matches, &nmatches ) > 0 ) { if ( ! has_database_log ) has_database_log = true; @@ -627,8 +650,109 @@ static AI_config * AI_parse(char *args) { _dpd.fatalMsg ( "AIPreproc: Database option used in config, but missing configuration option (all 'host', 'type', 'name', 'user', and 'password' option must be used)\n" ); } + + _dpd.logMsg(" Reading alerts from the database %s\n", config->dbname ); } + + /* Parsing output_database option */ + if ( preg_match ( "\\s*output_database\\s*\\(\\s*([^\\)]+)\\)", args, &matches, &nmatches ) > 0 ) + { + if ( ! has_database_output ) + has_database_output = true; + + match = strdup ( matches[0] ); + + for ( i=0; i < nmatches; i++ ) + free ( matches[i] ); + + free ( matches ); + matches = NULL; + + if ( preg_match ( "type\\s*=\\s*\"([^\"]+)\"", match, &matches, &nmatches ) > 0 ) + { + if ( strcasecmp ( matches[0], "mysql" ) && strcasecmp ( matches[0], "postgresql" )) + { + _dpd.fatalMsg ( "AIPreproc: Not supported database '%s' (supported types: mysql, postgresql)\n", matches[0] ); + } + + if ( !strcasecmp ( matches[0], "mysql" )) + { + #ifndef HAVE_LIBMYSQLCLIENT + _dpd.fatalMsg ( "AIPreproc: mysql output set in 'output_database' option but the module was not compiled through --with-mysql option\n" ); + #else + config->outdbtype = mysql; + #endif + } else if ( !strcasecmp ( matches[0], "postgresql" )) { + #ifndef HAVE_LIBPQ + _dpd.fatalMsg ( "AIPreproc: postgresql output set in 'output_database' option but the module was not compiled through --with-postgresql option\n" ); + #else + config->outdbtype = postgresql; + #endif + } + + for ( i=0; i < nmatches; i++ ) + free ( matches[i] ); + + free ( matches ); + matches = NULL; + } + + if ( preg_match ( "name\\s*=\\s*\"([^\"]+)\"", match, &matches, &nmatches ) > 0 ) + { + strncpy ( config->outdbname, matches[0], sizeof ( config->outdbname )); + + for ( i=0; i < nmatches; i++ ) + free ( matches[i] ); + + free ( matches ); + matches = NULL; + } + + if ( preg_match ( "user\\s*=\\s*\"([^\"]+)\"", match, &matches, &nmatches ) > 0 ) + { + strncpy ( config->outdbuser, matches[0], sizeof ( config->outdbuser )); + + for ( i=0; i < nmatches; i++ ) + free ( matches[i] ); + + free ( matches ); + matches = NULL; + } + + if ( preg_match ( "password\\s*=\\s*\"([^\"]+)\"", match, &matches, &nmatches ) > 0 ) + { + strncpy ( config->outdbpass, matches[0], sizeof ( config->outdbpass )); + + for ( i=0; i < nmatches; i++ ) + free ( matches[i] ); + + free ( matches ); + matches = NULL; + } + + if ( preg_match ( "host\\s*=\\s*\"([^\"]+)\"", match, &matches, &nmatches ) > 0 ) + { + strncpy ( config->outdbhost, matches[0], sizeof ( config->outdbhost )); + + for ( i=0; i < nmatches; i++ ) + free ( matches[i] ); + + free ( matches ); + matches = NULL; + } + + free ( match ); + + if ( !strlen ( config->outdbhost ) || !strlen ( config->outdbname ) || !strlen ( config->outdbpass ) || !strlen ( config->outdbuser )) + { + _dpd.fatalMsg ( "AIPreproc: Output database option used in config, but missing configuration option (all 'host', 'type', 'name', 'user', and 'password' options must be used)\n" ); + } + + _dpd.logMsg(" Saving output alerts to the database %s\n", config->outdbname ); + } + + /* Parsing cluster options */ while ( preg_match ( "\\s*(cluster\\s*\\(\\s*)([^\\)]+)\\)", args, &matches, &nmatches ) > 0 ) { @@ -877,8 +1001,8 @@ static AI_config * AI_parse(char *args) #ifdef HAVE_DB alertparser_thread = AI_db_alertparser_thread; #else - _dpd.fatalMsg ( "AIPreproc: database logging enabled in config file, but the module was not compiled " - "with database support (recompile, i.e., with ./configure --with-mysql or --with-postgresql)\n" ); + _dpd.fatalMsg ( "AIPreproc: database logging enabled in config file, but the module was not compiled " + "with database support (recompile, i.e., with ./configure --with-mysql or --with-postgresql)\n" ); #endif } else if ( has_alertfile ) { alertparser_thread = AI_file_alertparser_thread; @@ -924,7 +1048,7 @@ static AI_config * AI_parse(char *args) } } - _dpd.logMsg ( "Using correlation rules from directory %s\n", config->corr_rules_dir ); + _dpd.logMsg ( " Using correlation rules from directory %s\n", config->corr_rules_dir ); if ( ! has_corr_alerts_dir ) { @@ -941,7 +1065,7 @@ static AI_config * AI_parse(char *args) config->alert_bufsize = DEFAULT_ALERT_BUFSIZE; } - _dpd.logMsg ( "Saving correlated alerts information in %s\n", config->corr_alerts_dir ); + _dpd.logMsg ( " Saving correlated alerts information in %s\n", config->corr_alerts_dir ); if ( has_database_log ) { diff --git a/spp_ai.h b/spp_ai.h index 26ce59e..5aac24f 100644 --- a/spp_ai.h +++ b/spp_ai.h @@ -75,6 +75,9 @@ /** Default interval of validity in seconds for an entry in the cache of correlated alerts */ #define DEFAULT_BAYESIAN_CORRELATION_CACHE_VALIDITY 600 +/** Default maximum interval, in seconds, between two alerts for being considered in the same cluster */ +#define DEFAULT_CLUSTER_MAX_ALERT_INTERVAL 14400 + /** Cutoff y value in the exponential decay for considering two alerts not correlated */ #define CUTOFF_Y_VALUE 0.01 @@ -155,6 +158,9 @@ typedef struct /** Interval in seconds between two alerts (a,b) for considering them correlated */ unsigned long bayesianCorrelationInterval; + /** Default maximum interval, in seconds, between two alerts for being considered in the same cluster */ + unsigned long clusterMaxAlertInterval; + /** Interval in seconds for which an entry in the cache of correlated alerts is valid */ unsigned long bayesianCorrelationCacheValidity; @@ -197,6 +203,26 @@ typedef struct /** Database host, if database logging is used */ char dbhost[256]; + + /** Output database type, if clustered alerts and + * correlations are saved to a database as well */ + enum { mysql, postgresql } outdbtype; + + /** Output database name, if clustered alerts and + * correlations are saved to a database as well */ + char outdbname[256]; + + /** Output database user, if clustered alerts and + * correlations are saved to a database as well */ + char outdbuser[256]; + + /** Output database password, if clustered alerts and + * correlations are saved to a database as well */ + char outdbpass[256]; + + /** Output database host, if clustered alerts and + * correlations are saved to a database as well */ + char outdbhost[256]; } AI_config; /*****************************************************************/ /** Data type for hierarchies used for clustering */