From dd3ea5940d47c58d95d25cccb0755c1da611a1c5 Mon Sep 17 00:00:00 2001 From: BlackLight Date: Tue, 28 Sep 2010 21:36:58 +0200 Subject: [PATCH] Keeping bayesian correlation in bayesian.c --- Makefile.am | 1 + Makefile.in | 6 ++- TODO | 8 +++ correlation.c | 140 +------------------------------------------------- spp_ai.h | 1 + 5 files changed, 16 insertions(+), 140 deletions(-) diff --git a/Makefile.am b/Makefile.am index 80f7725..c778720 100644 --- a/Makefile.am +++ b/Makefile.am @@ -18,6 +18,7 @@ include/sfPolicyUserData.c libsf_ai_preproc_la_SOURCES = \ alert_history.c \ alert_parser.c \ +bayesian.c \ cluster.c \ correlation.c \ db.c \ diff --git a/Makefile.in b/Makefile.in index 2641269..fc0da7e 100644 --- a/Makefile.in +++ b/Makefile.in @@ -77,7 +77,7 @@ LTLIBRARIES = $(lib_LTLIBRARIES) libsf_ai_preproc_la_LIBADD = am_libsf_ai_preproc_la_OBJECTS = libsf_ai_preproc_la-alert_history.lo \ libsf_ai_preproc_la-alert_parser.lo \ - libsf_ai_preproc_la-cluster.lo \ + libsf_ai_preproc_la-bayesian.lo libsf_ai_preproc_la-cluster.lo \ libsf_ai_preproc_la-correlation.lo libsf_ai_preproc_la-db.lo \ libsf_ai_preproc_la-mysql.lo libsf_ai_preproc_la-postgresql.lo \ libsf_ai_preproc_la-regex.lo libsf_ai_preproc_la-spp_ai.lo \ @@ -253,6 +253,7 @@ include/sfPolicyUserData.c libsf_ai_preproc_la_SOURCES = \ alert_history.c \ alert_parser.c \ +bayesian.c \ cluster.c \ correlation.c \ db.c \ @@ -379,6 +380,9 @@ libsf_ai_preproc_la-alert_history.lo: alert_history.c libsf_ai_preproc_la-alert_parser.lo: alert_parser.c $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsf_ai_preproc_la_CFLAGS) $(CFLAGS) -c -o libsf_ai_preproc_la-alert_parser.lo `test -f 'alert_parser.c' || echo '$(srcdir)/'`alert_parser.c +libsf_ai_preproc_la-bayesian.lo: bayesian.c + $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsf_ai_preproc_la_CFLAGS) $(CFLAGS) -c -o libsf_ai_preproc_la-bayesian.lo `test -f 'bayesian.c' || echo '$(srcdir)/'`bayesian.c + libsf_ai_preproc_la-cluster.lo: cluster.c $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsf_ai_preproc_la_CFLAGS) $(CFLAGS) -c -o libsf_ai_preproc_la-cluster.lo `test -f 'cluster.c' || echo '$(srcdir)/'`cluster.c diff --git a/TODO b/TODO index b8316ce..24f146f 100644 --- a/TODO +++ b/TODO @@ -2,6 +2,12 @@ AVERAGE/HIGH PRIORITY: ====================== +- Clustering alerts with time constraints +- Web interface +- Code profiling +- Saving packet flows as .pcap +- Neural network for computing k +- Isolating independant subgraphs from hyperalert correlation graphs - Testing more scenarios, making more hyperalert models ============= @@ -9,6 +15,7 @@ LOW PRIORITY: ============= - Managing clusters for addresses, timestamps (and more?) +- Splitting the distinct subgraphs of the output graph - libgc support ===== @@ -22,4 +29,5 @@ DONE: + Dynamic cluster_min_size algorithm + Add alerts' history serialization to db.c as well + Bayesian learning among alerts in alert log ++ Split bayesian correlation out of correlation.c diff --git a/correlation.c b/correlation.c index a0bd006..d2b4c04 100644 --- a/correlation.c +++ b/correlation.c @@ -67,37 +67,9 @@ typedef struct { } AI_alert_correlation; -/** Key for the bayesian correlation table */ -typedef struct { - /** Snort ID of the first alert */ - AI_alert_event_key a; - - /** Snort ID of the second alert */ - AI_alert_event_key b; -} AI_bayesian_correlation_key; - - -/** Bayesian alert correlation hash table */ -typedef struct { - /** Key for the hash table */ - AI_bayesian_correlation_key key; - - /** Correlation value */ - double correlation; - - /** Timestamp of the last acquired correlation value */ - time_t latest_computation_time; - - /** Make the struct 'hashable' */ - UT_hash_handle hh; -} AI_bayesian_correlation; - - -PRIVATE AI_bayesian_correlation *bayesian_cache = NULL; PRIVATE AI_hyperalert_info *hyperalerts = NULL; PRIVATE AI_snort_alert *alerts = NULL; PRIVATE AI_alert_correlation *correlation_table = NULL; -PRIVATE double k_exp_value = 0.0; PRIVATE pthread_mutex_t mutex; @@ -260,116 +232,6 @@ _AI_get_function_arguments ( char *orig_stmt, int *n_args ) return args; } /* ----- end of function _AI_get_function_arguments ----- */ -/** - * \brief Function used for computing the correlation probability A->B of two alerts (A,B) given their timestamps: f(ta, tb) = exp ( -(tb - ta)^2 / k ) - * \param ta Timestamp of A - * \param tb Timestamp of B - * \return The correlation probability A->B - */ - -PRIVATE double -_AI_bayesian_correlation_function ( time_t ta, time_t tb ) -{ - if ( k_exp_value == 0.0 ) - k_exp_value = - (double) (config->bayesianCorrelationInterval * config->bayesianCorrelationInterval) / log ( CUTOFF_Y_VALUE ); - - return exp ( -((ta - tb) * (ta - tb)) / k_exp_value ); -} /* ----- end of function _AI_bayesian_correlation_function ----- */ - -/** - * \brief Compute the correlation between two alerts, A -> B: p[A|B] = p[Corr(A,B)] / P[B] - * \param a First alert - * \param b Second alert - * \return A real coefficient representing p[A|B] using the historical information - */ - -PRIVATE double -_AI_alert_bayesian_correlation ( AI_snort_alert *a, AI_snort_alert *b ) -{ - double corr = 0.0; - unsigned int corr_count = 0, - corr_count_a = 0; - - BOOL is_a_correlated = false; - AI_bayesian_correlation_key bayesian_key; - AI_bayesian_correlation *found = NULL; - - AI_alert_event_key key_a, - key_b; - - AI_alert_event *events_a = NULL, - *events_b = NULL; - - AI_alert_event *events_iterator_a, - *events_iterator_b; - - if ( !a || !b ) - return 0.0; - - key_a.gid = a->gid; - key_a.sid = a->sid; - key_a.rev = a->rev; - - key_b.gid = b->gid; - key_b.sid = b->sid; - key_b.rev = b->rev; - - /* Check if this correlation value is already in our cache */ - bayesian_key.a = key_a; - bayesian_key.b = key_b; - HASH_FIND ( hh, bayesian_cache, &bayesian_key, sizeof ( bayesian_key ), found ); - - if ( found ) - { - /* Ok, the abs() is not needed until the time starts running backwards, but it's better going safe... */ - if ( abs ( time ( NULL ) - found->latest_computation_time ) <= config->bayesianCorrelationCacheValidity ) - /* If our alert couple is there, just return it */ - return found->correlation; - } - - if ( !( events_a = (AI_alert_event*) AI_get_alert_events_by_key ( key_a )) || - !( events_b = (AI_alert_event*) AI_get_alert_events_by_key ( key_b ))) - return 0.0; - - for ( events_iterator_a = events_a; events_iterator_a; events_iterator_a = events_iterator_a->next ) - { - is_a_correlated = false; - - for ( events_iterator_b = events_b; events_iterator_b; events_iterator_b = events_iterator_b->next ) - { - if ( abs ( events_iterator_a->timestamp - events_iterator_b->timestamp ) <= config->bayesianCorrelationInterval ) - { - is_a_correlated = true; - corr_count++; - corr += _AI_bayesian_correlation_function ( events_iterator_a->timestamp, events_iterator_b->timestamp ); - } - } - - if ( is_a_correlated ) - corr_count_a++; - } - - corr /= (double) corr_count; - corr -= ( events_a->count - corr_count_a ) / events_a->count; - /* _dpd.logMsg ( " Number of '%s' alerts correlated to '%s': %u over %u\\n", a->desc, b->desc, corr_count_a, events_a->count ); */ - - if ( found ) - { - found->correlation = corr; - found->latest_computation_time = time ( NULL ); - } else { - if ( !( found = ( AI_bayesian_correlation* ) malloc ( sizeof ( AI_bayesian_correlation )))) - _dpd.fatalMsg ( "AIPreproc: Fatal dynamic memory allocation error at %s:%d\n", __FILE__, __LINE__ ); - - found->key = bayesian_key; - found->correlation = corr; - found->latest_computation_time = time ( NULL ); - } - - /* _dpd.logMsg ( "Correlation ('%s') -> ('%s'): %f\\n", a->desc, b->desc, corr ); */ - return corr; -} /* ----- end of function _AI_alert_bayesian_correlation ----- */ - /** * \brief Compute the correlation coefficient between two alerts, as #INTERSECTION(pre(B), post(A)) / #UNION(pre(B), post(A)), on the basis of preconditions and postconditions in the knowledge base's correlation rules @@ -941,7 +803,7 @@ AI_alert_correlation_thread ( void *arg ) corr->key = corr_key; kb_correlation = _AI_kb_correlation_coefficient ( corr_key.a, corr_key.b ); - bayesian_correlation = _AI_alert_bayesian_correlation ( corr_key.a, corr_key.b ); + bayesian_correlation = AI_alert_bayesian_correlation ( corr_key.a, corr_key.b ); if ( bayesian_correlation == 0.0 || config->bayesianCorrelationInterval == 0 ) corr->correlation = kb_correlation; diff --git a/spp_ai.h b/spp_ai.h index 99ddc26..26ce59e 100644 --- a/spp_ai.h +++ b/spp_ai.h @@ -361,6 +361,7 @@ void* AI_alerts_pool_thread ( void *arg ); void* AI_serializer_thread ( void *arg ); const AI_alert_event* AI_get_alert_events_by_key ( AI_alert_event_key ); unsigned int AI_get_history_alert_number (); +double AI_alert_bayesian_correlation ( AI_snort_alert *a, AI_snort_alert *b ); /** Function pointer to the function used for getting the alert list (from log file, db, ...) */ extern AI_snort_alert* (*get_alerts)(void);