Clustering now improved

This commit is contained in:
BlackLight 2010-09-18 16:42:11 +02:00
parent 5b471d9003
commit efa5aa1ded
2 changed files with 81 additions and 17 deletions

2
TODO
View file

@ -2,7 +2,6 @@
AVERAGE/HIGH PRIORITY: AVERAGE/HIGH PRIORITY:
====================== ======================
- Dynamic cluster_min_size algorithm
- Dynamic k parameter in correlation threshold - Dynamic k parameter in correlation threshold
- Testing more scenarios, making more hyperalert models - Testing more scenarios, making more hyperalert models
- Bayesian learning among alerts in alert log - Bayesian learning among alerts in alert log
@ -22,4 +21,5 @@ DONE:
+ Regex comp cache + Regex comp cache
+ Managing hyperalert graph connection inside the alert structure itself + Managing hyperalert graph connection inside the alert structure itself
+ Keeping track of all the streams and alerts even after clustered + Keeping track of all the streams and alerts even after clustered
+ Dynamic cluster_min_size algorithm

View file

@ -21,6 +21,7 @@
#include <stdio.h> #include <stdio.h>
#include <unistd.h> #include <unistd.h>
#include <math.h>
#include <limits.h> #include <limits.h>
#include <pthread.h> #include <pthread.h>
@ -41,6 +42,13 @@ typedef struct {
UT_hash_handle hh; UT_hash_handle hh;
} attribute_value; } attribute_value;
/** Structure containing the count of occurrences of the single alerts in the log */
typedef struct {
AI_hyperalert_key key;
unsigned int count;
UT_hash_handle hh;
} AI_alert_occurrence;
PRIVATE hierarchy_node *h_root[CLUSTER_TYPES] = { NULL }; PRIVATE hierarchy_node *h_root[CLUSTER_TYPES] = { NULL };
PRIVATE AI_config *_config = NULL; PRIVATE AI_config *_config = NULL;
@ -266,6 +274,8 @@ _AI_merge_alerts ( AI_snort_alert **log )
{ {
if ( tmp != tmp2->next ) if ( tmp != tmp2->next )
{ {
_dpd.logMsg ( "Comparing '%s' and '%s'...\n", tmp->desc, tmp2->next->desc );
if ( _AI_equal_alarms ( tmp, tmp2->next )) if ( _AI_equal_alarms ( tmp, tmp2->next ))
{ {
if ( !( tmp->grouped_alerts = ( AI_snort_alert** ) realloc ( tmp->grouped_alerts, (++(tmp->grouped_alerts_count)) * sizeof ( AI_snort_alert* )))) if ( !( tmp->grouped_alerts = ( AI_snort_alert** ) realloc ( tmp->grouped_alerts, (++(tmp->grouped_alerts_count)) * sizeof ( AI_snort_alert* ))))
@ -273,9 +283,9 @@ _AI_merge_alerts ( AI_snort_alert **log )
tmp->grouped_alerts[ tmp->grouped_alerts_count - 1 ] = tmp2->next; tmp->grouped_alerts[ tmp->grouped_alerts_count - 1 ] = tmp2->next;
count++; count++;
_dpd.logMsg ( " -> Grouping '%s' and '%s'\n", tmp->desc, tmp2->next->desc );
tmp3 = tmp2->next->next; tmp3 = tmp2->next->next;
/* free ( tmp2->next ); */
tmp2->next = tmp3; tmp2->next = tmp3;
} }
} }
@ -286,9 +296,67 @@ _AI_merge_alerts ( AI_snort_alert **log )
} }
} }
_dpd.logMsg ( "\n" );
return count; return count;
} /* ----- end of function _AI_merge_alerts ----- */ } /* ----- end of function _AI_merge_alerts ----- */
/**
* \brief Get the average heterogeneity coefficient of the set of alerts
* \return The average heterogeneity coefficient of the set of alerts
*/
double
_AI_get_alerts_heterogeneity ( int *alert_count )
{
double heterogeneity = 0.0;
int distinct_count = 0;
AI_hyperalert_key key;
AI_snort_alert *alert_iterator = NULL;
AI_alert_occurrence *table = NULL,
*found = NULL;
*alert_count = 0;
for ( alert_iterator = alert_log; alert_iterator; alert_iterator = alert_iterator->next )
{
found = NULL;
*alert_count += alert_iterator->grouped_alerts_count;
key.gid = alert_iterator->gid;
key.sid = alert_iterator->sid;
key.rev = alert_iterator->rev;
HASH_FIND ( hh, table, &key, sizeof ( AI_hyperalert_key ), found );
if ( !found )
{
if ( !( found = (AI_alert_occurrence*) malloc ( sizeof ( AI_alert_occurrence ))))
_dpd.fatalMsg ( "AIPreproc: Fatal dynamic memory allocation error at %s:%d\n", __FILE__, __LINE__ );
found->key = key;
found->count = 1;
HASH_ADD ( hh, table, key, sizeof ( AI_hyperalert_key ), found );
} else {
found->count++;
}
}
for ( found = table; found; found = (AI_alert_occurrence*) found->hh.next )
distinct_count++;
if ( *alert_count > 0 )
heterogeneity = (double) distinct_count / (double) *alert_count;
else
heterogeneity = 0.0;
while ( table )
{
found = table;
HASH_DEL ( table, found );
free ( found );
}
return heterogeneity;
} /* ----- end of function _AI_get_alerts_heterogeneity ----- */
/** /**
* \brief Print the clustered alerts to a log file * \brief Print the clustered alerts to a log file
@ -361,16 +429,17 @@ _AI_cluster_thread ( void* arg )
hierarchy_node *node, *child; hierarchy_node *node, *child;
cluster_type type; cluster_type type;
cluster_type best_type; cluster_type best_type;
BOOL has_small_clusters = true;
FILE *cluster_fp; FILE *cluster_fp;
char label[256]; char label[256];
int hostval; int hostval;
int netval; int netval;
int minval; int minval;
int heuristic_val; int heuristic_val;
int cluster_min_size = 2; int cluster_min_size = 1;
int alert_count = 0; int alert_count = 0;
int old_alert_count = 0; int old_alert_count = 0;
int single_alerts_count = 0;
double heterogeneity = 0;
while ( 1 ) while ( 1 )
{ {
@ -393,8 +462,6 @@ _AI_cluster_thread ( void* arg )
continue; continue;
} }
has_small_clusters = true;
for ( tmp = alert_log, alert_count=0; tmp; tmp = tmp->next, alert_count++ ) for ( tmp = alert_log, alert_count=0; tmp; tmp = tmp->next, alert_count++ )
{ {
/* If an alert has an unitialized "grouped alarms count", set its counter to 1 (it only groupes the current alert) */ /* If an alert has an unitialized "grouped alarms count", set its counter to 1 (it only groupes the current alert) */
@ -403,12 +470,6 @@ _AI_cluster_thread ( void* arg )
tmp->grouped_alerts_count = 1; tmp->grouped_alerts_count = 1;
} }
/* If the current alarm already group at least min_size alarms, then no need to do further clusterization */
if ( tmp->grouped_alerts_count >= cluster_min_size )
{
has_small_clusters = false;
}
/* Initialize the clustering hierarchies in the current alert */ /* Initialize the clustering hierarchies in the current alert */
for ( type=0; type < CLUSTER_TYPES; type++ ) for ( type=0; type < CLUSTER_TYPES; type++ )
{ {
@ -454,8 +515,14 @@ _AI_cluster_thread ( void* arg )
} }
alert_count -= _AI_merge_alerts ( &alert_log ); alert_count -= _AI_merge_alerts ( &alert_log );
heterogeneity = _AI_get_alerts_heterogeneity( &single_alerts_count );
/* Get the minimum size for the clusters in function of the heterogeneity of alerts' set */
if ( heterogeneity > 0 )
cluster_min_size = (int) round ( 1/heterogeneity );
else
cluster_min_size = 1;
/* while ( has_small_clusters && alert_count > cluster_min_size ) */
do do
{ {
old_alert_count = alert_count; old_alert_count = alert_count;
@ -488,9 +555,6 @@ _AI_cluster_thread ( void* arg )
} }
alert_count -= _AI_merge_alerts ( &alert_log ); alert_count -= _AI_merge_alerts ( &alert_log );
/* if ( old_alert_count == alert_count ) */
/* break; */
} while ( old_alert_count != alert_count ); } while ( old_alert_count != alert_count );
lock_flag = false; lock_flag = false;
@ -664,7 +728,7 @@ _AI_copy_clustered_alerts ( AI_snort_alert *node )
AI_snort_alert* AI_snort_alert*
AI_get_clustered_alerts () AI_get_clustered_alerts ()
{ {
while ( lock_flag ); for ( ; lock_flag; usleep(100) );
return _AI_copy_clustered_alerts ( alert_log ); return _AI_copy_clustered_alerts ( alert_log );
} /* ----- end of function AI_get_clustered_alerts ----- */ } /* ----- end of function AI_get_clustered_alerts ----- */