mirror of
https://github.com/BlackLight/Snort_AIPreproc.git
synced 2024-11-24 04:35:11 +01:00
Keeping bayesian correlation in bayesian.c
This commit is contained in:
parent
d7e0b426f4
commit
dd3ea5940d
5 changed files with 16 additions and 140 deletions
|
@ -18,6 +18,7 @@ include/sfPolicyUserData.c
|
||||||
libsf_ai_preproc_la_SOURCES = \
|
libsf_ai_preproc_la_SOURCES = \
|
||||||
alert_history.c \
|
alert_history.c \
|
||||||
alert_parser.c \
|
alert_parser.c \
|
||||||
|
bayesian.c \
|
||||||
cluster.c \
|
cluster.c \
|
||||||
correlation.c \
|
correlation.c \
|
||||||
db.c \
|
db.c \
|
||||||
|
|
|
@ -77,7 +77,7 @@ LTLIBRARIES = $(lib_LTLIBRARIES)
|
||||||
libsf_ai_preproc_la_LIBADD =
|
libsf_ai_preproc_la_LIBADD =
|
||||||
am_libsf_ai_preproc_la_OBJECTS = libsf_ai_preproc_la-alert_history.lo \
|
am_libsf_ai_preproc_la_OBJECTS = libsf_ai_preproc_la-alert_history.lo \
|
||||||
libsf_ai_preproc_la-alert_parser.lo \
|
libsf_ai_preproc_la-alert_parser.lo \
|
||||||
libsf_ai_preproc_la-cluster.lo \
|
libsf_ai_preproc_la-bayesian.lo libsf_ai_preproc_la-cluster.lo \
|
||||||
libsf_ai_preproc_la-correlation.lo libsf_ai_preproc_la-db.lo \
|
libsf_ai_preproc_la-correlation.lo libsf_ai_preproc_la-db.lo \
|
||||||
libsf_ai_preproc_la-mysql.lo libsf_ai_preproc_la-postgresql.lo \
|
libsf_ai_preproc_la-mysql.lo libsf_ai_preproc_la-postgresql.lo \
|
||||||
libsf_ai_preproc_la-regex.lo libsf_ai_preproc_la-spp_ai.lo \
|
libsf_ai_preproc_la-regex.lo libsf_ai_preproc_la-spp_ai.lo \
|
||||||
|
@ -253,6 +253,7 @@ include/sfPolicyUserData.c
|
||||||
libsf_ai_preproc_la_SOURCES = \
|
libsf_ai_preproc_la_SOURCES = \
|
||||||
alert_history.c \
|
alert_history.c \
|
||||||
alert_parser.c \
|
alert_parser.c \
|
||||||
|
bayesian.c \
|
||||||
cluster.c \
|
cluster.c \
|
||||||
correlation.c \
|
correlation.c \
|
||||||
db.c \
|
db.c \
|
||||||
|
@ -379,6 +380,9 @@ libsf_ai_preproc_la-alert_history.lo: alert_history.c
|
||||||
libsf_ai_preproc_la-alert_parser.lo: alert_parser.c
|
libsf_ai_preproc_la-alert_parser.lo: alert_parser.c
|
||||||
$(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsf_ai_preproc_la_CFLAGS) $(CFLAGS) -c -o libsf_ai_preproc_la-alert_parser.lo `test -f 'alert_parser.c' || echo '$(srcdir)/'`alert_parser.c
|
$(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsf_ai_preproc_la_CFLAGS) $(CFLAGS) -c -o libsf_ai_preproc_la-alert_parser.lo `test -f 'alert_parser.c' || echo '$(srcdir)/'`alert_parser.c
|
||||||
|
|
||||||
|
libsf_ai_preproc_la-bayesian.lo: bayesian.c
|
||||||
|
$(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsf_ai_preproc_la_CFLAGS) $(CFLAGS) -c -o libsf_ai_preproc_la-bayesian.lo `test -f 'bayesian.c' || echo '$(srcdir)/'`bayesian.c
|
||||||
|
|
||||||
libsf_ai_preproc_la-cluster.lo: cluster.c
|
libsf_ai_preproc_la-cluster.lo: cluster.c
|
||||||
$(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsf_ai_preproc_la_CFLAGS) $(CFLAGS) -c -o libsf_ai_preproc_la-cluster.lo `test -f 'cluster.c' || echo '$(srcdir)/'`cluster.c
|
$(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsf_ai_preproc_la_CFLAGS) $(CFLAGS) -c -o libsf_ai_preproc_la-cluster.lo `test -f 'cluster.c' || echo '$(srcdir)/'`cluster.c
|
||||||
|
|
||||||
|
|
8
TODO
8
TODO
|
@ -2,6 +2,12 @@
|
||||||
AVERAGE/HIGH PRIORITY:
|
AVERAGE/HIGH PRIORITY:
|
||||||
======================
|
======================
|
||||||
|
|
||||||
|
- Clustering alerts with time constraints
|
||||||
|
- Web interface
|
||||||
|
- Code profiling
|
||||||
|
- Saving packet flows as .pcap
|
||||||
|
- Neural network for computing k
|
||||||
|
- Isolating independant subgraphs from hyperalert correlation graphs
|
||||||
- Testing more scenarios, making more hyperalert models
|
- Testing more scenarios, making more hyperalert models
|
||||||
|
|
||||||
=============
|
=============
|
||||||
|
@ -9,6 +15,7 @@ LOW PRIORITY:
|
||||||
=============
|
=============
|
||||||
|
|
||||||
- Managing clusters for addresses, timestamps (and more?)
|
- Managing clusters for addresses, timestamps (and more?)
|
||||||
|
- Splitting the distinct subgraphs of the output graph
|
||||||
- libgc support
|
- libgc support
|
||||||
|
|
||||||
=====
|
=====
|
||||||
|
@ -22,4 +29,5 @@ DONE:
|
||||||
+ Dynamic cluster_min_size algorithm
|
+ Dynamic cluster_min_size algorithm
|
||||||
+ Add alerts' history serialization to db.c as well
|
+ Add alerts' history serialization to db.c as well
|
||||||
+ Bayesian learning among alerts in alert log
|
+ Bayesian learning among alerts in alert log
|
||||||
|
+ Split bayesian correlation out of correlation.c
|
||||||
|
|
||||||
|
|
140
correlation.c
140
correlation.c
|
@ -67,37 +67,9 @@ typedef struct {
|
||||||
} AI_alert_correlation;
|
} AI_alert_correlation;
|
||||||
|
|
||||||
|
|
||||||
/** Key for the bayesian correlation table */
|
|
||||||
typedef struct {
|
|
||||||
/** Snort ID of the first alert */
|
|
||||||
AI_alert_event_key a;
|
|
||||||
|
|
||||||
/** Snort ID of the second alert */
|
|
||||||
AI_alert_event_key b;
|
|
||||||
} AI_bayesian_correlation_key;
|
|
||||||
|
|
||||||
|
|
||||||
/** Bayesian alert correlation hash table */
|
|
||||||
typedef struct {
|
|
||||||
/** Key for the hash table */
|
|
||||||
AI_bayesian_correlation_key key;
|
|
||||||
|
|
||||||
/** Correlation value */
|
|
||||||
double correlation;
|
|
||||||
|
|
||||||
/** Timestamp of the last acquired correlation value */
|
|
||||||
time_t latest_computation_time;
|
|
||||||
|
|
||||||
/** Make the struct 'hashable' */
|
|
||||||
UT_hash_handle hh;
|
|
||||||
} AI_bayesian_correlation;
|
|
||||||
|
|
||||||
|
|
||||||
PRIVATE AI_bayesian_correlation *bayesian_cache = NULL;
|
|
||||||
PRIVATE AI_hyperalert_info *hyperalerts = NULL;
|
PRIVATE AI_hyperalert_info *hyperalerts = NULL;
|
||||||
PRIVATE AI_snort_alert *alerts = NULL;
|
PRIVATE AI_snort_alert *alerts = NULL;
|
||||||
PRIVATE AI_alert_correlation *correlation_table = NULL;
|
PRIVATE AI_alert_correlation *correlation_table = NULL;
|
||||||
PRIVATE double k_exp_value = 0.0;
|
|
||||||
PRIVATE pthread_mutex_t mutex;
|
PRIVATE pthread_mutex_t mutex;
|
||||||
|
|
||||||
|
|
||||||
|
@ -260,116 +232,6 @@ _AI_get_function_arguments ( char *orig_stmt, int *n_args )
|
||||||
return args;
|
return args;
|
||||||
} /* ----- end of function _AI_get_function_arguments ----- */
|
} /* ----- end of function _AI_get_function_arguments ----- */
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Function used for computing the correlation probability A->B of two alerts (A,B) given their timestamps: f(ta, tb) = exp ( -(tb - ta)^2 / k )
|
|
||||||
* \param ta Timestamp of A
|
|
||||||
* \param tb Timestamp of B
|
|
||||||
* \return The correlation probability A->B
|
|
||||||
*/
|
|
||||||
|
|
||||||
PRIVATE double
|
|
||||||
_AI_bayesian_correlation_function ( time_t ta, time_t tb )
|
|
||||||
{
|
|
||||||
if ( k_exp_value == 0.0 )
|
|
||||||
k_exp_value = - (double) (config->bayesianCorrelationInterval * config->bayesianCorrelationInterval) / log ( CUTOFF_Y_VALUE );
|
|
||||||
|
|
||||||
return exp ( -((ta - tb) * (ta - tb)) / k_exp_value );
|
|
||||||
} /* ----- end of function _AI_bayesian_correlation_function ----- */
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Compute the correlation between two alerts, A -> B: p[A|B] = p[Corr(A,B)] / P[B]
|
|
||||||
* \param a First alert
|
|
||||||
* \param b Second alert
|
|
||||||
* \return A real coefficient representing p[A|B] using the historical information
|
|
||||||
*/
|
|
||||||
|
|
||||||
PRIVATE double
|
|
||||||
_AI_alert_bayesian_correlation ( AI_snort_alert *a, AI_snort_alert *b )
|
|
||||||
{
|
|
||||||
double corr = 0.0;
|
|
||||||
unsigned int corr_count = 0,
|
|
||||||
corr_count_a = 0;
|
|
||||||
|
|
||||||
BOOL is_a_correlated = false;
|
|
||||||
AI_bayesian_correlation_key bayesian_key;
|
|
||||||
AI_bayesian_correlation *found = NULL;
|
|
||||||
|
|
||||||
AI_alert_event_key key_a,
|
|
||||||
key_b;
|
|
||||||
|
|
||||||
AI_alert_event *events_a = NULL,
|
|
||||||
*events_b = NULL;
|
|
||||||
|
|
||||||
AI_alert_event *events_iterator_a,
|
|
||||||
*events_iterator_b;
|
|
||||||
|
|
||||||
if ( !a || !b )
|
|
||||||
return 0.0;
|
|
||||||
|
|
||||||
key_a.gid = a->gid;
|
|
||||||
key_a.sid = a->sid;
|
|
||||||
key_a.rev = a->rev;
|
|
||||||
|
|
||||||
key_b.gid = b->gid;
|
|
||||||
key_b.sid = b->sid;
|
|
||||||
key_b.rev = b->rev;
|
|
||||||
|
|
||||||
/* Check if this correlation value is already in our cache */
|
|
||||||
bayesian_key.a = key_a;
|
|
||||||
bayesian_key.b = key_b;
|
|
||||||
HASH_FIND ( hh, bayesian_cache, &bayesian_key, sizeof ( bayesian_key ), found );
|
|
||||||
|
|
||||||
if ( found )
|
|
||||||
{
|
|
||||||
/* Ok, the abs() is not needed until the time starts running backwards, but it's better going safe... */
|
|
||||||
if ( abs ( time ( NULL ) - found->latest_computation_time ) <= config->bayesianCorrelationCacheValidity )
|
|
||||||
/* If our alert couple is there, just return it */
|
|
||||||
return found->correlation;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( !( events_a = (AI_alert_event*) AI_get_alert_events_by_key ( key_a )) ||
|
|
||||||
!( events_b = (AI_alert_event*) AI_get_alert_events_by_key ( key_b )))
|
|
||||||
return 0.0;
|
|
||||||
|
|
||||||
for ( events_iterator_a = events_a; events_iterator_a; events_iterator_a = events_iterator_a->next )
|
|
||||||
{
|
|
||||||
is_a_correlated = false;
|
|
||||||
|
|
||||||
for ( events_iterator_b = events_b; events_iterator_b; events_iterator_b = events_iterator_b->next )
|
|
||||||
{
|
|
||||||
if ( abs ( events_iterator_a->timestamp - events_iterator_b->timestamp ) <= config->bayesianCorrelationInterval )
|
|
||||||
{
|
|
||||||
is_a_correlated = true;
|
|
||||||
corr_count++;
|
|
||||||
corr += _AI_bayesian_correlation_function ( events_iterator_a->timestamp, events_iterator_b->timestamp );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( is_a_correlated )
|
|
||||||
corr_count_a++;
|
|
||||||
}
|
|
||||||
|
|
||||||
corr /= (double) corr_count;
|
|
||||||
corr -= ( events_a->count - corr_count_a ) / events_a->count;
|
|
||||||
/* _dpd.logMsg ( " Number of '%s' alerts correlated to '%s': %u over %u\\n", a->desc, b->desc, corr_count_a, events_a->count ); */
|
|
||||||
|
|
||||||
if ( found )
|
|
||||||
{
|
|
||||||
found->correlation = corr;
|
|
||||||
found->latest_computation_time = time ( NULL );
|
|
||||||
} else {
|
|
||||||
if ( !( found = ( AI_bayesian_correlation* ) malloc ( sizeof ( AI_bayesian_correlation ))))
|
|
||||||
_dpd.fatalMsg ( "AIPreproc: Fatal dynamic memory allocation error at %s:%d\n", __FILE__, __LINE__ );
|
|
||||||
|
|
||||||
found->key = bayesian_key;
|
|
||||||
found->correlation = corr;
|
|
||||||
found->latest_computation_time = time ( NULL );
|
|
||||||
}
|
|
||||||
|
|
||||||
/* _dpd.logMsg ( "Correlation ('%s') -> ('%s'): %f\\n", a->desc, b->desc, corr ); */
|
|
||||||
return corr;
|
|
||||||
} /* ----- end of function _AI_alert_bayesian_correlation ----- */
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Compute the correlation coefficient between two alerts, as #INTERSECTION(pre(B), post(A)) / #UNION(pre(B), post(A)), on the basis of preconditions and postconditions in the knowledge base's correlation rules
|
* \brief Compute the correlation coefficient between two alerts, as #INTERSECTION(pre(B), post(A)) / #UNION(pre(B), post(A)), on the basis of preconditions and postconditions in the knowledge base's correlation rules
|
||||||
|
@ -941,7 +803,7 @@ AI_alert_correlation_thread ( void *arg )
|
||||||
|
|
||||||
corr->key = corr_key;
|
corr->key = corr_key;
|
||||||
kb_correlation = _AI_kb_correlation_coefficient ( corr_key.a, corr_key.b );
|
kb_correlation = _AI_kb_correlation_coefficient ( corr_key.a, corr_key.b );
|
||||||
bayesian_correlation = _AI_alert_bayesian_correlation ( corr_key.a, corr_key.b );
|
bayesian_correlation = AI_alert_bayesian_correlation ( corr_key.a, corr_key.b );
|
||||||
|
|
||||||
if ( bayesian_correlation == 0.0 || config->bayesianCorrelationInterval == 0 )
|
if ( bayesian_correlation == 0.0 || config->bayesianCorrelationInterval == 0 )
|
||||||
corr->correlation = kb_correlation;
|
corr->correlation = kb_correlation;
|
||||||
|
|
1
spp_ai.h
1
spp_ai.h
|
@ -361,6 +361,7 @@ void* AI_alerts_pool_thread ( void *arg );
|
||||||
void* AI_serializer_thread ( void *arg );
|
void* AI_serializer_thread ( void *arg );
|
||||||
const AI_alert_event* AI_get_alert_events_by_key ( AI_alert_event_key );
|
const AI_alert_event* AI_get_alert_events_by_key ( AI_alert_event_key );
|
||||||
unsigned int AI_get_history_alert_number ();
|
unsigned int AI_get_history_alert_number ();
|
||||||
|
double AI_alert_bayesian_correlation ( AI_snort_alert *a, AI_snort_alert *b );
|
||||||
|
|
||||||
/** Function pointer to the function used for getting the alert list (from log file, db, ...) */
|
/** Function pointer to the function used for getting the alert list (from log file, db, ...) */
|
||||||
extern AI_snort_alert* (*get_alerts)(void);
|
extern AI_snort_alert* (*get_alerts)(void);
|
||||||
|
|
Loading…
Reference in a new issue