mirror of
https://github.com/BlackLight/Snort_AIPreproc.git
synced 2024-11-24 04:35:11 +01:00
Clustering now improved
This commit is contained in:
parent
5b471d9003
commit
efa5aa1ded
2 changed files with 81 additions and 17 deletions
2
TODO
2
TODO
|
@ -2,7 +2,6 @@
|
||||||
AVERAGE/HIGH PRIORITY:
|
AVERAGE/HIGH PRIORITY:
|
||||||
======================
|
======================
|
||||||
|
|
||||||
- Dynamic cluster_min_size algorithm
|
|
||||||
- Dynamic k parameter in correlation threshold
|
- Dynamic k parameter in correlation threshold
|
||||||
- Testing more scenarios, making more hyperalert models
|
- Testing more scenarios, making more hyperalert models
|
||||||
- Bayesian learning among alerts in alert log
|
- Bayesian learning among alerts in alert log
|
||||||
|
@ -22,4 +21,5 @@ DONE:
|
||||||
+ Regex comp cache
|
+ Regex comp cache
|
||||||
+ Managing hyperalert graph connection inside the alert structure itself
|
+ Managing hyperalert graph connection inside the alert structure itself
|
||||||
+ Keeping track of all the streams and alerts even after clustered
|
+ Keeping track of all the streams and alerts even after clustered
|
||||||
|
+ Dynamic cluster_min_size algorithm
|
||||||
|
|
||||||
|
|
96
cluster.c
96
cluster.c
|
@ -21,6 +21,7 @@
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#include <math.h>
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
|
|
||||||
|
@ -41,6 +42,13 @@ typedef struct {
|
||||||
UT_hash_handle hh;
|
UT_hash_handle hh;
|
||||||
} attribute_value;
|
} attribute_value;
|
||||||
|
|
||||||
|
/** Structure containing the count of occurrences of the single alerts in the log */
|
||||||
|
typedef struct {
|
||||||
|
AI_hyperalert_key key;
|
||||||
|
unsigned int count;
|
||||||
|
UT_hash_handle hh;
|
||||||
|
} AI_alert_occurrence;
|
||||||
|
|
||||||
|
|
||||||
PRIVATE hierarchy_node *h_root[CLUSTER_TYPES] = { NULL };
|
PRIVATE hierarchy_node *h_root[CLUSTER_TYPES] = { NULL };
|
||||||
PRIVATE AI_config *_config = NULL;
|
PRIVATE AI_config *_config = NULL;
|
||||||
|
@ -266,6 +274,8 @@ _AI_merge_alerts ( AI_snort_alert **log )
|
||||||
{
|
{
|
||||||
if ( tmp != tmp2->next )
|
if ( tmp != tmp2->next )
|
||||||
{
|
{
|
||||||
|
_dpd.logMsg ( "Comparing '%s' and '%s'...\n", tmp->desc, tmp2->next->desc );
|
||||||
|
|
||||||
if ( _AI_equal_alarms ( tmp, tmp2->next ))
|
if ( _AI_equal_alarms ( tmp, tmp2->next ))
|
||||||
{
|
{
|
||||||
if ( !( tmp->grouped_alerts = ( AI_snort_alert** ) realloc ( tmp->grouped_alerts, (++(tmp->grouped_alerts_count)) * sizeof ( AI_snort_alert* ))))
|
if ( !( tmp->grouped_alerts = ( AI_snort_alert** ) realloc ( tmp->grouped_alerts, (++(tmp->grouped_alerts_count)) * sizeof ( AI_snort_alert* ))))
|
||||||
|
@ -273,9 +283,9 @@ _AI_merge_alerts ( AI_snort_alert **log )
|
||||||
|
|
||||||
tmp->grouped_alerts[ tmp->grouped_alerts_count - 1 ] = tmp2->next;
|
tmp->grouped_alerts[ tmp->grouped_alerts_count - 1 ] = tmp2->next;
|
||||||
count++;
|
count++;
|
||||||
|
_dpd.logMsg ( " -> Grouping '%s' and '%s'\n", tmp->desc, tmp2->next->desc );
|
||||||
|
|
||||||
tmp3 = tmp2->next->next;
|
tmp3 = tmp2->next->next;
|
||||||
/* free ( tmp2->next ); */
|
|
||||||
tmp2->next = tmp3;
|
tmp2->next = tmp3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -286,9 +296,67 @@ _AI_merge_alerts ( AI_snort_alert **log )
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_dpd.logMsg ( "\n" );
|
||||||
return count;
|
return count;
|
||||||
} /* ----- end of function _AI_merge_alerts ----- */
|
} /* ----- end of function _AI_merge_alerts ----- */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Get the average heterogeneity coefficient of the set of alerts
|
||||||
|
* \return The average heterogeneity coefficient of the set of alerts
|
||||||
|
*/
|
||||||
|
|
||||||
|
double
|
||||||
|
_AI_get_alerts_heterogeneity ( int *alert_count )
|
||||||
|
{
|
||||||
|
double heterogeneity = 0.0;
|
||||||
|
int distinct_count = 0;
|
||||||
|
|
||||||
|
AI_hyperalert_key key;
|
||||||
|
AI_snort_alert *alert_iterator = NULL;
|
||||||
|
|
||||||
|
AI_alert_occurrence *table = NULL,
|
||||||
|
*found = NULL;
|
||||||
|
*alert_count = 0;
|
||||||
|
|
||||||
|
for ( alert_iterator = alert_log; alert_iterator; alert_iterator = alert_iterator->next )
|
||||||
|
{
|
||||||
|
found = NULL;
|
||||||
|
*alert_count += alert_iterator->grouped_alerts_count;
|
||||||
|
key.gid = alert_iterator->gid;
|
||||||
|
key.sid = alert_iterator->sid;
|
||||||
|
key.rev = alert_iterator->rev;
|
||||||
|
HASH_FIND ( hh, table, &key, sizeof ( AI_hyperalert_key ), found );
|
||||||
|
|
||||||
|
if ( !found )
|
||||||
|
{
|
||||||
|
if ( !( found = (AI_alert_occurrence*) malloc ( sizeof ( AI_alert_occurrence ))))
|
||||||
|
_dpd.fatalMsg ( "AIPreproc: Fatal dynamic memory allocation error at %s:%d\n", __FILE__, __LINE__ );
|
||||||
|
|
||||||
|
found->key = key;
|
||||||
|
found->count = 1;
|
||||||
|
HASH_ADD ( hh, table, key, sizeof ( AI_hyperalert_key ), found );
|
||||||
|
} else {
|
||||||
|
found->count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( found = table; found; found = (AI_alert_occurrence*) found->hh.next )
|
||||||
|
distinct_count++;
|
||||||
|
|
||||||
|
if ( *alert_count > 0 )
|
||||||
|
heterogeneity = (double) distinct_count / (double) *alert_count;
|
||||||
|
else
|
||||||
|
heterogeneity = 0.0;
|
||||||
|
|
||||||
|
while ( table )
|
||||||
|
{
|
||||||
|
found = table;
|
||||||
|
HASH_DEL ( table, found );
|
||||||
|
free ( found );
|
||||||
|
}
|
||||||
|
|
||||||
|
return heterogeneity;
|
||||||
|
} /* ----- end of function _AI_get_alerts_heterogeneity ----- */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Print the clustered alerts to a log file
|
* \brief Print the clustered alerts to a log file
|
||||||
|
@ -361,16 +429,17 @@ _AI_cluster_thread ( void* arg )
|
||||||
hierarchy_node *node, *child;
|
hierarchy_node *node, *child;
|
||||||
cluster_type type;
|
cluster_type type;
|
||||||
cluster_type best_type;
|
cluster_type best_type;
|
||||||
BOOL has_small_clusters = true;
|
|
||||||
FILE *cluster_fp;
|
FILE *cluster_fp;
|
||||||
char label[256];
|
char label[256];
|
||||||
int hostval;
|
int hostval;
|
||||||
int netval;
|
int netval;
|
||||||
int minval;
|
int minval;
|
||||||
int heuristic_val;
|
int heuristic_val;
|
||||||
int cluster_min_size = 2;
|
int cluster_min_size = 1;
|
||||||
int alert_count = 0;
|
int alert_count = 0;
|
||||||
int old_alert_count = 0;
|
int old_alert_count = 0;
|
||||||
|
int single_alerts_count = 0;
|
||||||
|
double heterogeneity = 0;
|
||||||
|
|
||||||
while ( 1 )
|
while ( 1 )
|
||||||
{
|
{
|
||||||
|
@ -393,8 +462,6 @@ _AI_cluster_thread ( void* arg )
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
has_small_clusters = true;
|
|
||||||
|
|
||||||
for ( tmp = alert_log, alert_count=0; tmp; tmp = tmp->next, alert_count++ )
|
for ( tmp = alert_log, alert_count=0; tmp; tmp = tmp->next, alert_count++ )
|
||||||
{
|
{
|
||||||
/* If an alert has an unitialized "grouped alarms count", set its counter to 1 (it only groupes the current alert) */
|
/* If an alert has an unitialized "grouped alarms count", set its counter to 1 (it only groupes the current alert) */
|
||||||
|
@ -403,12 +470,6 @@ _AI_cluster_thread ( void* arg )
|
||||||
tmp->grouped_alerts_count = 1;
|
tmp->grouped_alerts_count = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If the current alarm already group at least min_size alarms, then no need to do further clusterization */
|
|
||||||
if ( tmp->grouped_alerts_count >= cluster_min_size )
|
|
||||||
{
|
|
||||||
has_small_clusters = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Initialize the clustering hierarchies in the current alert */
|
/* Initialize the clustering hierarchies in the current alert */
|
||||||
for ( type=0; type < CLUSTER_TYPES; type++ )
|
for ( type=0; type < CLUSTER_TYPES; type++ )
|
||||||
{
|
{
|
||||||
|
@ -454,8 +515,14 @@ _AI_cluster_thread ( void* arg )
|
||||||
}
|
}
|
||||||
|
|
||||||
alert_count -= _AI_merge_alerts ( &alert_log );
|
alert_count -= _AI_merge_alerts ( &alert_log );
|
||||||
|
heterogeneity = _AI_get_alerts_heterogeneity( &single_alerts_count );
|
||||||
|
|
||||||
|
/* Get the minimum size for the clusters in function of the heterogeneity of alerts' set */
|
||||||
|
if ( heterogeneity > 0 )
|
||||||
|
cluster_min_size = (int) round ( 1/heterogeneity );
|
||||||
|
else
|
||||||
|
cluster_min_size = 1;
|
||||||
|
|
||||||
/* while ( has_small_clusters && alert_count > cluster_min_size ) */
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
old_alert_count = alert_count;
|
old_alert_count = alert_count;
|
||||||
|
@ -488,9 +555,6 @@ _AI_cluster_thread ( void* arg )
|
||||||
}
|
}
|
||||||
|
|
||||||
alert_count -= _AI_merge_alerts ( &alert_log );
|
alert_count -= _AI_merge_alerts ( &alert_log );
|
||||||
|
|
||||||
/* if ( old_alert_count == alert_count ) */
|
|
||||||
/* break; */
|
|
||||||
} while ( old_alert_count != alert_count );
|
} while ( old_alert_count != alert_count );
|
||||||
|
|
||||||
lock_flag = false;
|
lock_flag = false;
|
||||||
|
@ -664,7 +728,7 @@ _AI_copy_clustered_alerts ( AI_snort_alert *node )
|
||||||
AI_snort_alert*
|
AI_snort_alert*
|
||||||
AI_get_clustered_alerts ()
|
AI_get_clustered_alerts ()
|
||||||
{
|
{
|
||||||
while ( lock_flag );
|
for ( ; lock_flag; usleep(100) );
|
||||||
return _AI_copy_clustered_alerts ( alert_log );
|
return _AI_copy_clustered_alerts ( alert_log );
|
||||||
} /* ----- end of function AI_get_clustered_alerts ----- */
|
} /* ----- end of function AI_get_clustered_alerts ----- */
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue