Now supporting clustering with time constraints

This commit is contained in:
BlackLight 2010-09-29 12:24:30 +02:00
parent c022edc637
commit a454d15d29
7 changed files with 181 additions and 26 deletions

6
README
View file

@ -160,6 +160,7 @@ preprocessor ai: \
correlation_threshold_coefficient 0.5 \
database ( type="mysql", name="snort", user="snortusr", password="snortpass", host="dbhost" ) \
database_parsing_interval 30 \
cluster_max_alert_interval 14400 \
clusterfile "/your/snort/dir/log/clustered_alerts" \
cluster ( class="dst_port", name="privileged_ports", range="1-1023" ) \
cluster ( class="dst_port", name="unprivileged_ports", range="1024-65535" ) \
@ -253,6 +254,11 @@ the alerts from database and the next one (default if not specified: 30 seconds)
- clusterfile: File where the clustered alerts will be saved by the module
(default if not specified: /var/log/snort/clustered_alerts)
- cluster_max_alert_interval: Maximum time interval, in seconds, occurred
between two alerts for considering them as part of the same cluster (default:
14400 seconds, i.e. 4 hours). Specify 0 for this option if you want to
cluster alerts regardlessly of how much time occurred between them
- cluster: Clustering hierarchy or list of hierarchies to be applied for
grouping similar alerts. This option needs to specify:
-- class: Class of the cluster node. It may be src_addr, dst_addr, src_port

5
TODO
View file

@ -2,12 +2,12 @@
AVERAGE/HIGH PRIORITY:
======================
- Clustering alerts with time constraints
- Save clusters and correlations to db
- Web interface
- Code profiling
- Comment all the code!!!
- Saving packet flows as .pcap
- Neural network for computing k
- Isolating independant subgraphs from hyperalert correlation graphs
- Testing more scenarios, making more hyperalert models
=============
@ -30,4 +30,5 @@ DONE:
+ Add alerts' history serialization to db.c as well
+ Bayesian learning among alerts in alert log
+ Split bayesian correlation out of correlation.c
+ Clustering alerts with time constraints

View file

@ -19,6 +19,7 @@
#include "spp_ai.h"
#include <stdio.h>
#include <sys/stat.h>
/** \defgroup alert_history Manage the serialization and deserialization of alert history to the history file

View file

@ -159,7 +159,6 @@ AI_alert_bayesian_correlation ( AI_snort_alert *a, AI_snort_alert *b )
found->latest_computation_time = time ( NULL );
}
/* _dpd.logMsg ( "Correlation ('%s') -> ('%s'): %f\\n", a->desc, b->desc, corr ); */
return corr;
} /* ----- end of function AI_alert_bayesian_correlation ----- */

View file

@ -213,7 +213,7 @@ _AI_get_min_hierarchy_node ( int val, hierarchy_node *root )
*/
PRIVATE BOOL
_AI_equal_alarms ( AI_snort_alert *a1, AI_snort_alert *a2 )
_AI_equal_alerts ( AI_snort_alert *a1, AI_snort_alert *a2 )
{
if ( a1->gid != a2->gid || a1->sid != a2->sid || a1->rev != a2->rev )
{
@ -249,7 +249,7 @@ _AI_equal_alarms ( AI_snort_alert *a1, AI_snort_alert *a2 )
}
return true;
} /* ----- end of function _AI_equal_alarms ----- */
} /* ----- end of function _AI_equal_alerts ----- */
/**
@ -270,26 +270,24 @@ _AI_merge_alerts ( AI_snort_alert **log )
{
if ( tmp2->next )
{
if ( !(
tmp->gid == tmp2->next->gid &&
tmp->sid == tmp2->next->sid &&
tmp->rev == tmp2->next->rev &&
tmp->timestamp == tmp2->next->timestamp &&
tmp->ip_src_addr == tmp2->next->ip_src_addr &&
tmp->ip_dst_addr == tmp2->next->ip_dst_addr &&
tmp->tcp_src_port == tmp2->next->tcp_src_port &&
tmp->tcp_dst_port == tmp2->next->tcp_dst_port ))
/* If the two alerts are in the same clustering time window (if a time window was defined...) */
if ( config->clusterMaxAlertInterval == 0 ||
( config->clusterMaxAlertInterval > 0 && abs ( tmp->timestamp - tmp2->next->timestamp ) <= config->clusterMaxAlertInterval ))
{
if ( _AI_equal_alarms ( tmp, tmp2->next ))
if ( tmp != tmp2->next )
{
if ( !( tmp->grouped_alerts = ( AI_snort_alert** ) realloc ( tmp->grouped_alerts, (++(tmp->grouped_alerts_count)) * sizeof ( AI_snort_alert* ))))
_dpd.fatalMsg ( "AIPreproc: Fatal dynamic memory allocation error at %s:%d\n", __FILE__, __LINE__ );
/* If the two alerts are equal... */
if ( _AI_equal_alerts ( tmp, tmp2->next ))
{
if ( !( tmp->grouped_alerts = ( AI_snort_alert** ) realloc ( tmp->grouped_alerts, (++(tmp->grouped_alerts_count)) * sizeof ( AI_snort_alert* ))))
_dpd.fatalMsg ( "AIPreproc: Fatal dynamic memory allocation error at %s:%d\n", __FILE__, __LINE__ );
tmp->grouped_alerts[ tmp->grouped_alerts_count - 1 ] = tmp2->next;
count++;
tmp->grouped_alerts[ tmp->grouped_alerts_count - 1 ] = tmp2->next;
count++;
tmp3 = tmp2->next->next;
tmp2->next = tmp3;
tmp3 = tmp2->next->next;
tmp2->next = tmp3;
}
}
}

134
spp_ai.c
View file

@ -166,6 +166,7 @@ static AI_config * AI_parse(char *args)
bayesian_correlation_interval = 0,
bayesian_correlation_cache_validity = 0,
clusterfile_len = 0,
cluster_max_alert_interval = 0,
corr_rules_dir_len = 0,
corr_alerts_dir_len = 0,
alert_clustering_interval = 0,
@ -182,6 +183,7 @@ static AI_config * AI_parse(char *args)
has_corr_rules_dir = false,
has_clustering = false,
has_database_log = false,
has_database_output = false,
has_alert_history_file = false;
if ( !( config = ( AI_config* ) malloc ( sizeof( AI_config )) ))
@ -388,6 +390,27 @@ static AI_config * AI_parse(char *args)
_dpd.logMsg( " Bayesian cache validity interval: %u\n", config->bayesianCorrelationCacheValidity );
/* Parsing the cluster_max_alert_interval option */
if (( arg = (char*) strcasestr( args, "cluster_max_alert_interval" ) ))
{
for ( arg += strlen("cluster_max_alert_interval");
*arg && (*arg < '0' || *arg > '9');
arg++ );
if ( !(*arg) )
{
_dpd.fatalMsg("AIPreproc: cluster_max_alert_interval option used but "
"no value specified\n");
}
cluster_max_alert_interval = strtoul ( arg, NULL, 10 );
} else {
cluster_max_alert_interval = DEFAULT_CLUSTER_MAX_ALERT_INTERVAL;
}
config->clusterMaxAlertInterval = cluster_max_alert_interval;
_dpd.logMsg( " Cluster alert max interval: %u\n", config->clusterMaxAlertInterval );
/* Parsing the alertfile option */
if (( arg = (char*) strcasestr( args, "alertfile" ) ))
{
@ -550,7 +573,7 @@ static AI_config * AI_parse(char *args)
}
/* Parsing database option */
if ( preg_match ( "\\s*database\\s*\\(\\s*([^\\)]+)\\)", args, &matches, &nmatches ) > 0 )
if ( preg_match ( "\\s+database\\s*\\(\\s*([^\\)]+)\\)", args, &matches, &nmatches ) > 0 )
{
if ( ! has_database_log )
has_database_log = true;
@ -627,8 +650,109 @@ static AI_config * AI_parse(char *args)
{
_dpd.fatalMsg ( "AIPreproc: Database option used in config, but missing configuration option (all 'host', 'type', 'name', 'user', and 'password' option must be used)\n" );
}
_dpd.logMsg(" Reading alerts from the database %s\n", config->dbname );
}
/* Parsing output_database option */
if ( preg_match ( "\\s*output_database\\s*\\(\\s*([^\\)]+)\\)", args, &matches, &nmatches ) > 0 )
{
if ( ! has_database_output )
has_database_output = true;
match = strdup ( matches[0] );
for ( i=0; i < nmatches; i++ )
free ( matches[i] );
free ( matches );
matches = NULL;
if ( preg_match ( "type\\s*=\\s*\"([^\"]+)\"", match, &matches, &nmatches ) > 0 )
{
if ( strcasecmp ( matches[0], "mysql" ) && strcasecmp ( matches[0], "postgresql" ))
{
_dpd.fatalMsg ( "AIPreproc: Not supported database '%s' (supported types: mysql, postgresql)\n", matches[0] );
}
if ( !strcasecmp ( matches[0], "mysql" ))
{
#ifndef HAVE_LIBMYSQLCLIENT
_dpd.fatalMsg ( "AIPreproc: mysql output set in 'output_database' option but the module was not compiled through --with-mysql option\n" );
#else
config->outdbtype = mysql;
#endif
} else if ( !strcasecmp ( matches[0], "postgresql" )) {
#ifndef HAVE_LIBPQ
_dpd.fatalMsg ( "AIPreproc: postgresql output set in 'output_database' option but the module was not compiled through --with-postgresql option\n" );
#else
config->outdbtype = postgresql;
#endif
}
for ( i=0; i < nmatches; i++ )
free ( matches[i] );
free ( matches );
matches = NULL;
}
if ( preg_match ( "name\\s*=\\s*\"([^\"]+)\"", match, &matches, &nmatches ) > 0 )
{
strncpy ( config->outdbname, matches[0], sizeof ( config->outdbname ));
for ( i=0; i < nmatches; i++ )
free ( matches[i] );
free ( matches );
matches = NULL;
}
if ( preg_match ( "user\\s*=\\s*\"([^\"]+)\"", match, &matches, &nmatches ) > 0 )
{
strncpy ( config->outdbuser, matches[0], sizeof ( config->outdbuser ));
for ( i=0; i < nmatches; i++ )
free ( matches[i] );
free ( matches );
matches = NULL;
}
if ( preg_match ( "password\\s*=\\s*\"([^\"]+)\"", match, &matches, &nmatches ) > 0 )
{
strncpy ( config->outdbpass, matches[0], sizeof ( config->outdbpass ));
for ( i=0; i < nmatches; i++ )
free ( matches[i] );
free ( matches );
matches = NULL;
}
if ( preg_match ( "host\\s*=\\s*\"([^\"]+)\"", match, &matches, &nmatches ) > 0 )
{
strncpy ( config->outdbhost, matches[0], sizeof ( config->outdbhost ));
for ( i=0; i < nmatches; i++ )
free ( matches[i] );
free ( matches );
matches = NULL;
}
free ( match );
if ( !strlen ( config->outdbhost ) || !strlen ( config->outdbname ) || !strlen ( config->outdbpass ) || !strlen ( config->outdbuser ))
{
_dpd.fatalMsg ( "AIPreproc: Output database option used in config, but missing configuration option (all 'host', 'type', 'name', 'user', and 'password' options must be used)\n" );
}
_dpd.logMsg(" Saving output alerts to the database %s\n", config->outdbname );
}
/* Parsing cluster options */
while ( preg_match ( "\\s*(cluster\\s*\\(\\s*)([^\\)]+)\\)", args, &matches, &nmatches ) > 0 )
{
@ -877,8 +1001,8 @@ static AI_config * AI_parse(char *args)
#ifdef HAVE_DB
alertparser_thread = AI_db_alertparser_thread;
#else
_dpd.fatalMsg ( "AIPreproc: database logging enabled in config file, but the module was not compiled "
"with database support (recompile, i.e., with ./configure --with-mysql or --with-postgresql)\n" );
_dpd.fatalMsg ( "AIPreproc: database logging enabled in config file, but the module was not compiled "
"with database support (recompile, i.e., with ./configure --with-mysql or --with-postgresql)\n" );
#endif
} else if ( has_alertfile ) {
alertparser_thread = AI_file_alertparser_thread;
@ -924,7 +1048,7 @@ static AI_config * AI_parse(char *args)
}
}
_dpd.logMsg ( "Using correlation rules from directory %s\n", config->corr_rules_dir );
_dpd.logMsg ( " Using correlation rules from directory %s\n", config->corr_rules_dir );
if ( ! has_corr_alerts_dir )
{
@ -941,7 +1065,7 @@ static AI_config * AI_parse(char *args)
config->alert_bufsize = DEFAULT_ALERT_BUFSIZE;
}
_dpd.logMsg ( "Saving correlated alerts information in %s\n", config->corr_alerts_dir );
_dpd.logMsg ( " Saving correlated alerts information in %s\n", config->corr_alerts_dir );
if ( has_database_log )
{

View file

@ -75,6 +75,9 @@
/** Default interval of validity in seconds for an entry in the cache of correlated alerts */
#define DEFAULT_BAYESIAN_CORRELATION_CACHE_VALIDITY 600
/** Default maximum interval, in seconds, between two alerts for being considered in the same cluster */
#define DEFAULT_CLUSTER_MAX_ALERT_INTERVAL 14400
/** Cutoff y value in the exponential decay for considering two alerts not correlated */
#define CUTOFF_Y_VALUE 0.01
@ -155,6 +158,9 @@ typedef struct
/** Interval in seconds between two alerts (a,b) for considering them correlated */
unsigned long bayesianCorrelationInterval;
/** Default maximum interval, in seconds, between two alerts for being considered in the same cluster */
unsigned long clusterMaxAlertInterval;
/** Interval in seconds for which an entry in the cache of correlated alerts is valid */
unsigned long bayesianCorrelationCacheValidity;
@ -197,6 +203,26 @@ typedef struct
/** Database host, if database logging is used */
char dbhost[256];
/** Output database type, if clustered alerts and
* correlations are saved to a database as well */
enum { mysql, postgresql } outdbtype;
/** Output database name, if clustered alerts and
* correlations are saved to a database as well */
char outdbname[256];
/** Output database user, if clustered alerts and
* correlations are saved to a database as well */
char outdbuser[256];
/** Output database password, if clustered alerts and
* correlations are saved to a database as well */
char outdbpass[256];
/** Output database host, if clustered alerts and
* correlations are saved to a database as well */
char outdbhost[256];
} AI_config;
/*****************************************************************/
/** Data type for hierarchies used for clustering */