Now supporting clustering with time constraints

This commit is contained in:
BlackLight 2010-09-29 12:24:30 +02:00
parent c022edc637
commit a454d15d29
7 changed files with 181 additions and 26 deletions

6
README
View file

@ -160,6 +160,7 @@ preprocessor ai: \
correlation_threshold_coefficient 0.5 \ correlation_threshold_coefficient 0.5 \
database ( type="mysql", name="snort", user="snortusr", password="snortpass", host="dbhost" ) \ database ( type="mysql", name="snort", user="snortusr", password="snortpass", host="dbhost" ) \
database_parsing_interval 30 \ database_parsing_interval 30 \
cluster_max_alert_interval 14400 \
clusterfile "/your/snort/dir/log/clustered_alerts" \ clusterfile "/your/snort/dir/log/clustered_alerts" \
cluster ( class="dst_port", name="privileged_ports", range="1-1023" ) \ cluster ( class="dst_port", name="privileged_ports", range="1-1023" ) \
cluster ( class="dst_port", name="unprivileged_ports", range="1024-65535" ) \ cluster ( class="dst_port", name="unprivileged_ports", range="1024-65535" ) \
@ -253,6 +254,11 @@ the alerts from database and the next one (default if not specified: 30 seconds)
- clusterfile: File where the clustered alerts will be saved by the module - clusterfile: File where the clustered alerts will be saved by the module
(default if not specified: /var/log/snort/clustered_alerts) (default if not specified: /var/log/snort/clustered_alerts)
- cluster_max_alert_interval: Maximum time interval, in seconds, occurred
between two alerts for considering them as part of the same cluster (default:
14400 seconds, i.e. 4 hours). Specify 0 for this option if you want to
cluster alerts regardlessly of how much time occurred between them
- cluster: Clustering hierarchy or list of hierarchies to be applied for - cluster: Clustering hierarchy or list of hierarchies to be applied for
grouping similar alerts. This option needs to specify: grouping similar alerts. This option needs to specify:
-- class: Class of the cluster node. It may be src_addr, dst_addr, src_port -- class: Class of the cluster node. It may be src_addr, dst_addr, src_port

5
TODO
View file

@ -2,12 +2,12 @@
AVERAGE/HIGH PRIORITY: AVERAGE/HIGH PRIORITY:
====================== ======================
- Clustering alerts with time constraints - Save clusters and correlations to db
- Web interface - Web interface
- Code profiling - Code profiling
- Comment all the code!!!
- Saving packet flows as .pcap - Saving packet flows as .pcap
- Neural network for computing k - Neural network for computing k
- Isolating independant subgraphs from hyperalert correlation graphs
- Testing more scenarios, making more hyperalert models - Testing more scenarios, making more hyperalert models
============= =============
@ -30,4 +30,5 @@ DONE:
+ Add alerts' history serialization to db.c as well + Add alerts' history serialization to db.c as well
+ Bayesian learning among alerts in alert log + Bayesian learning among alerts in alert log
+ Split bayesian correlation out of correlation.c + Split bayesian correlation out of correlation.c
+ Clustering alerts with time constraints

View file

@ -19,6 +19,7 @@
#include "spp_ai.h" #include "spp_ai.h"
#include <stdio.h>
#include <sys/stat.h> #include <sys/stat.h>
/** \defgroup alert_history Manage the serialization and deserialization of alert history to the history file /** \defgroup alert_history Manage the serialization and deserialization of alert history to the history file

View file

@ -159,7 +159,6 @@ AI_alert_bayesian_correlation ( AI_snort_alert *a, AI_snort_alert *b )
found->latest_computation_time = time ( NULL ); found->latest_computation_time = time ( NULL );
} }
/* _dpd.logMsg ( "Correlation ('%s') -> ('%s'): %f\\n", a->desc, b->desc, corr ); */
return corr; return corr;
} /* ----- end of function AI_alert_bayesian_correlation ----- */ } /* ----- end of function AI_alert_bayesian_correlation ----- */

View file

@ -213,7 +213,7 @@ _AI_get_min_hierarchy_node ( int val, hierarchy_node *root )
*/ */
PRIVATE BOOL PRIVATE BOOL
_AI_equal_alarms ( AI_snort_alert *a1, AI_snort_alert *a2 ) _AI_equal_alerts ( AI_snort_alert *a1, AI_snort_alert *a2 )
{ {
if ( a1->gid != a2->gid || a1->sid != a2->sid || a1->rev != a2->rev ) if ( a1->gid != a2->gid || a1->sid != a2->sid || a1->rev != a2->rev )
{ {
@ -249,7 +249,7 @@ _AI_equal_alarms ( AI_snort_alert *a1, AI_snort_alert *a2 )
} }
return true; return true;
} /* ----- end of function _AI_equal_alarms ----- */ } /* ----- end of function _AI_equal_alerts ----- */
/** /**
@ -270,17 +270,14 @@ _AI_merge_alerts ( AI_snort_alert **log )
{ {
if ( tmp2->next ) if ( tmp2->next )
{ {
if ( !( /* If the two alerts are in the same clustering time window (if a time window was defined...) */
tmp->gid == tmp2->next->gid && if ( config->clusterMaxAlertInterval == 0 ||
tmp->sid == tmp2->next->sid && ( config->clusterMaxAlertInterval > 0 && abs ( tmp->timestamp - tmp2->next->timestamp ) <= config->clusterMaxAlertInterval ))
tmp->rev == tmp2->next->rev &&
tmp->timestamp == tmp2->next->timestamp &&
tmp->ip_src_addr == tmp2->next->ip_src_addr &&
tmp->ip_dst_addr == tmp2->next->ip_dst_addr &&
tmp->tcp_src_port == tmp2->next->tcp_src_port &&
tmp->tcp_dst_port == tmp2->next->tcp_dst_port ))
{ {
if ( _AI_equal_alarms ( tmp, tmp2->next )) if ( tmp != tmp2->next )
{
/* If the two alerts are equal... */
if ( _AI_equal_alerts ( tmp, tmp2->next ))
{ {
if ( !( tmp->grouped_alerts = ( AI_snort_alert** ) realloc ( tmp->grouped_alerts, (++(tmp->grouped_alerts_count)) * sizeof ( AI_snort_alert* )))) if ( !( tmp->grouped_alerts = ( AI_snort_alert** ) realloc ( tmp->grouped_alerts, (++(tmp->grouped_alerts_count)) * sizeof ( AI_snort_alert* ))))
_dpd.fatalMsg ( "AIPreproc: Fatal dynamic memory allocation error at %s:%d\n", __FILE__, __LINE__ ); _dpd.fatalMsg ( "AIPreproc: Fatal dynamic memory allocation error at %s:%d\n", __FILE__, __LINE__ );
@ -292,6 +289,7 @@ _AI_merge_alerts ( AI_snort_alert **log )
tmp2->next = tmp3; tmp2->next = tmp3;
} }
} }
}
tmp2 = tmp2->next; tmp2 = tmp2->next;
} else } else

126
spp_ai.c
View file

@ -166,6 +166,7 @@ static AI_config * AI_parse(char *args)
bayesian_correlation_interval = 0, bayesian_correlation_interval = 0,
bayesian_correlation_cache_validity = 0, bayesian_correlation_cache_validity = 0,
clusterfile_len = 0, clusterfile_len = 0,
cluster_max_alert_interval = 0,
corr_rules_dir_len = 0, corr_rules_dir_len = 0,
corr_alerts_dir_len = 0, corr_alerts_dir_len = 0,
alert_clustering_interval = 0, alert_clustering_interval = 0,
@ -182,6 +183,7 @@ static AI_config * AI_parse(char *args)
has_corr_rules_dir = false, has_corr_rules_dir = false,
has_clustering = false, has_clustering = false,
has_database_log = false, has_database_log = false,
has_database_output = false,
has_alert_history_file = false; has_alert_history_file = false;
if ( !( config = ( AI_config* ) malloc ( sizeof( AI_config )) )) if ( !( config = ( AI_config* ) malloc ( sizeof( AI_config )) ))
@ -388,6 +390,27 @@ static AI_config * AI_parse(char *args)
_dpd.logMsg( " Bayesian cache validity interval: %u\n", config->bayesianCorrelationCacheValidity ); _dpd.logMsg( " Bayesian cache validity interval: %u\n", config->bayesianCorrelationCacheValidity );
/* Parsing the cluster_max_alert_interval option */
if (( arg = (char*) strcasestr( args, "cluster_max_alert_interval" ) ))
{
for ( arg += strlen("cluster_max_alert_interval");
*arg && (*arg < '0' || *arg > '9');
arg++ );
if ( !(*arg) )
{
_dpd.fatalMsg("AIPreproc: cluster_max_alert_interval option used but "
"no value specified\n");
}
cluster_max_alert_interval = strtoul ( arg, NULL, 10 );
} else {
cluster_max_alert_interval = DEFAULT_CLUSTER_MAX_ALERT_INTERVAL;
}
config->clusterMaxAlertInterval = cluster_max_alert_interval;
_dpd.logMsg( " Cluster alert max interval: %u\n", config->clusterMaxAlertInterval );
/* Parsing the alertfile option */ /* Parsing the alertfile option */
if (( arg = (char*) strcasestr( args, "alertfile" ) )) if (( arg = (char*) strcasestr( args, "alertfile" ) ))
{ {
@ -550,7 +573,7 @@ static AI_config * AI_parse(char *args)
} }
/* Parsing database option */ /* Parsing database option */
if ( preg_match ( "\\s*database\\s*\\(\\s*([^\\)]+)\\)", args, &matches, &nmatches ) > 0 ) if ( preg_match ( "\\s+database\\s*\\(\\s*([^\\)]+)\\)", args, &matches, &nmatches ) > 0 )
{ {
if ( ! has_database_log ) if ( ! has_database_log )
has_database_log = true; has_database_log = true;
@ -627,8 +650,109 @@ static AI_config * AI_parse(char *args)
{ {
_dpd.fatalMsg ( "AIPreproc: Database option used in config, but missing configuration option (all 'host', 'type', 'name', 'user', and 'password' option must be used)\n" ); _dpd.fatalMsg ( "AIPreproc: Database option used in config, but missing configuration option (all 'host', 'type', 'name', 'user', and 'password' option must be used)\n" );
} }
_dpd.logMsg(" Reading alerts from the database %s\n", config->dbname );
} }
/* Parsing output_database option */
if ( preg_match ( "\\s*output_database\\s*\\(\\s*([^\\)]+)\\)", args, &matches, &nmatches ) > 0 )
{
if ( ! has_database_output )
has_database_output = true;
match = strdup ( matches[0] );
for ( i=0; i < nmatches; i++ )
free ( matches[i] );
free ( matches );
matches = NULL;
if ( preg_match ( "type\\s*=\\s*\"([^\"]+)\"", match, &matches, &nmatches ) > 0 )
{
if ( strcasecmp ( matches[0], "mysql" ) && strcasecmp ( matches[0], "postgresql" ))
{
_dpd.fatalMsg ( "AIPreproc: Not supported database '%s' (supported types: mysql, postgresql)\n", matches[0] );
}
if ( !strcasecmp ( matches[0], "mysql" ))
{
#ifndef HAVE_LIBMYSQLCLIENT
_dpd.fatalMsg ( "AIPreproc: mysql output set in 'output_database' option but the module was not compiled through --with-mysql option\n" );
#else
config->outdbtype = mysql;
#endif
} else if ( !strcasecmp ( matches[0], "postgresql" )) {
#ifndef HAVE_LIBPQ
_dpd.fatalMsg ( "AIPreproc: postgresql output set in 'output_database' option but the module was not compiled through --with-postgresql option\n" );
#else
config->outdbtype = postgresql;
#endif
}
for ( i=0; i < nmatches; i++ )
free ( matches[i] );
free ( matches );
matches = NULL;
}
if ( preg_match ( "name\\s*=\\s*\"([^\"]+)\"", match, &matches, &nmatches ) > 0 )
{
strncpy ( config->outdbname, matches[0], sizeof ( config->outdbname ));
for ( i=0; i < nmatches; i++ )
free ( matches[i] );
free ( matches );
matches = NULL;
}
if ( preg_match ( "user\\s*=\\s*\"([^\"]+)\"", match, &matches, &nmatches ) > 0 )
{
strncpy ( config->outdbuser, matches[0], sizeof ( config->outdbuser ));
for ( i=0; i < nmatches; i++ )
free ( matches[i] );
free ( matches );
matches = NULL;
}
if ( preg_match ( "password\\s*=\\s*\"([^\"]+)\"", match, &matches, &nmatches ) > 0 )
{
strncpy ( config->outdbpass, matches[0], sizeof ( config->outdbpass ));
for ( i=0; i < nmatches; i++ )
free ( matches[i] );
free ( matches );
matches = NULL;
}
if ( preg_match ( "host\\s*=\\s*\"([^\"]+)\"", match, &matches, &nmatches ) > 0 )
{
strncpy ( config->outdbhost, matches[0], sizeof ( config->outdbhost ));
for ( i=0; i < nmatches; i++ )
free ( matches[i] );
free ( matches );
matches = NULL;
}
free ( match );
if ( !strlen ( config->outdbhost ) || !strlen ( config->outdbname ) || !strlen ( config->outdbpass ) || !strlen ( config->outdbuser ))
{
_dpd.fatalMsg ( "AIPreproc: Output database option used in config, but missing configuration option (all 'host', 'type', 'name', 'user', and 'password' options must be used)\n" );
}
_dpd.logMsg(" Saving output alerts to the database %s\n", config->outdbname );
}
/* Parsing cluster options */ /* Parsing cluster options */
while ( preg_match ( "\\s*(cluster\\s*\\(\\s*)([^\\)]+)\\)", args, &matches, &nmatches ) > 0 ) while ( preg_match ( "\\s*(cluster\\s*\\(\\s*)([^\\)]+)\\)", args, &matches, &nmatches ) > 0 )
{ {

View file

@ -75,6 +75,9 @@
/** Default interval of validity in seconds for an entry in the cache of correlated alerts */ /** Default interval of validity in seconds for an entry in the cache of correlated alerts */
#define DEFAULT_BAYESIAN_CORRELATION_CACHE_VALIDITY 600 #define DEFAULT_BAYESIAN_CORRELATION_CACHE_VALIDITY 600
/** Default maximum interval, in seconds, between two alerts for being considered in the same cluster */
#define DEFAULT_CLUSTER_MAX_ALERT_INTERVAL 14400
/** Cutoff y value in the exponential decay for considering two alerts not correlated */ /** Cutoff y value in the exponential decay for considering two alerts not correlated */
#define CUTOFF_Y_VALUE 0.01 #define CUTOFF_Y_VALUE 0.01
@ -155,6 +158,9 @@ typedef struct
/** Interval in seconds between two alerts (a,b) for considering them correlated */ /** Interval in seconds between two alerts (a,b) for considering them correlated */
unsigned long bayesianCorrelationInterval; unsigned long bayesianCorrelationInterval;
/** Default maximum interval, in seconds, between two alerts for being considered in the same cluster */
unsigned long clusterMaxAlertInterval;
/** Interval in seconds for which an entry in the cache of correlated alerts is valid */ /** Interval in seconds for which an entry in the cache of correlated alerts is valid */
unsigned long bayesianCorrelationCacheValidity; unsigned long bayesianCorrelationCacheValidity;
@ -197,6 +203,26 @@ typedef struct
/** Database host, if database logging is used */ /** Database host, if database logging is used */
char dbhost[256]; char dbhost[256];
/** Output database type, if clustered alerts and
* correlations are saved to a database as well */
enum { mysql, postgresql } outdbtype;
/** Output database name, if clustered alerts and
* correlations are saved to a database as well */
char outdbname[256];
/** Output database user, if clustered alerts and
* correlations are saved to a database as well */
char outdbuser[256];
/** Output database password, if clustered alerts and
* correlations are saved to a database as well */
char outdbpass[256];
/** Output database host, if clustered alerts and
* correlations are saved to a database as well */
char outdbhost[256];
} AI_config; } AI_config;
/*****************************************************************/ /*****************************************************************/
/** Data type for hierarchies used for clustering */ /** Data type for hierarchies used for clustering */