Circular buffer for pkt history & more improvements

This commit is contained in:
BlackLight 2010-11-23 23:23:29 +01:00
parent dbba991c47
commit bd19aa77c7
8 changed files with 162 additions and 52 deletions

16
README
View file

@ -188,12 +188,14 @@ preprocessor ai: \
database_parsing_interval 30 \
hashtable_cleanup_interval 300 \
manual_correlations_parsing_interval 120 \
max_hash_pkt_number 1000 \
neural_clustering_interval 1200 \
neural_network_training_interval 43200 \
neural_train_steps 10 \
output_database ( type="dbtype", name="snort", user="snortusr", password="snortpass", host="dbhost" ) \
output_neurons_per_side 20 \
tcp_stream_expire_interval 300 \
use_stream_hash_table 1 \
webserv_banner "Snort AIPreprocessor module" \
webserv_dir "/prefix/share/htdocs" \
webserv_port 7654
@ -323,7 +325,13 @@ the alerts from database and the next one (default if not specified: 30 seconds)
- hashtable_cleanup_interval: The interval that should occur from the cleanup of
the hashtable of TCP streams and the next one (default if not specified: 300
seconds)
seconds). Set this option to 0 for performing no cleanup on the stream hash
table
- max_hash_pkt_number: Maximum number of packets that each element of the stream
hash table should hold, set it to 0 for no limit (default value if not
specified: 1000)
- manual_correlations_parsing_interval: Interval in seconds between an execution
@ -370,6 +378,12 @@ stream as "expired", if no more packets are received inside of that and it's not
"marked" as suspicious (default if not specified: 300 seconds)
- use_stream_hash_table: Set this option to 0 if you do not want to use the hash
table for storing the streams of packets associated to alerts, this is a good
choice on a system where many alerts are triggered (default value if not
specified: 1)
- webserv_banner: Banner of the web server, to be placed on the error pages and
in the "Server" HTTP reply header

View file

@ -80,8 +80,6 @@ AI_serializer ( AI_snort_alert *alert )
void*
AI_alerts_pool_thread ( void *arg )
{
pthread_t serializer_thread;
while ( 1 )
{
sleep ( config->alertSerializationInterval );
@ -138,8 +136,6 @@ AI_file_alertparser_thread ( void* arg )
BOOL in_alert = false;
pthread_t alerts_pool_thread;
pthread_t serializer_thread;
pthread_t db_thread;
/* Initialize the mutex lock, so nobody can read the alerts while we write there */
pthread_mutex_init ( &alert_mutex, NULL );

View file

@ -262,7 +262,6 @@ __AI_merge_alerts ( AI_snort_alert **log )
{
AI_snort_alert *tmp, *tmp2, *tmp3;
AI_alerts_couple *alerts_couple;
pthread_t db_thread;
int count = 0;
for ( tmp = *log; tmp; tmp = tmp->next )

View file

@ -1238,8 +1238,7 @@ AI_alert_correlation_thread ( void *arg )
AI_snort_alert *alert_iterator = NULL,
*alert_iterator2 = NULL;
pthread_t db_thread,
manual_corr_thread;
pthread_t manual_corr_thread;
#ifdef HAVE_LIBGVC
char corr_png_file[4096] = { 0 };

1
db.c
View file

@ -45,7 +45,6 @@ AI_db_alertparser_thread ( void *arg )
int latest_cid = 0;
time_t latest_time = time ( NULL );
pthread_t alerts_pool_thread;
pthread_t serializer_thread;
DB_result res, res2;
DB_row row, row2;

View file

@ -218,11 +218,13 @@ static AI_config * AI_parse(char *args)
correlation_graph_interval = 0,
database_parsing_interval = 0,
manual_correlations_parsing_interval = 0,
max_hash_pkt_number = 0,
neural_clustering_interval = 0,
neural_network_training_interval = 0,
neural_train_steps = 0,
output_neurons_per_side = 0,
stream_expire_interval = 0,
use_stream_hash_table = 0,
webserv_banner_len = 0,
webserv_dir_len = 0;
@ -591,6 +593,48 @@ static AI_config * AI_parse(char *args)
config->neural_train_steps = neural_train_steps;
_dpd.logMsg( " Neural train steps: %u\n", config->neural_train_steps );
/* Parsing the max_hash_pkt_number option */
if (( arg = (char*) strcasestr( args, "max_hash_pkt_number" ) ))
{
for ( arg += strlen("max_hash_pkt_number");
*arg && (*arg < '0' || *arg > '9');
arg++ );
if ( !(*arg) )
{
AI_fatal_err ( "max_hash_pkt_number option used but "
"no value specified", __FILE__, __LINE__ );
}
max_hash_pkt_number = strtoul ( arg, NULL, 10 );
} else {
max_hash_pkt_number = DEFAULT_MAX_HASH_PKT_NUMBER;
}
config->max_hash_pkt_number = max_hash_pkt_number;
_dpd.logMsg( " Maximum number of packets stored in the hash table: %u\n", config->max_hash_pkt_number );
/* Parsing the use_stream_hash_table option */
if (( arg = (char*) strcasestr( args, "use_stream_hash_table" ) ))
{
for ( arg += strlen("use_stream_hash_table");
*arg && (*arg < '0' || *arg > '9');
arg++ );
if ( !(*arg) )
{
AI_fatal_err ( "use_stream_hash_table option used but "
"no value specified", __FILE__, __LINE__ );
}
use_stream_hash_table = strtoul ( arg, NULL, 10 );
} else {
use_stream_hash_table = DEFAULT_USE_STREAM_HASH_TABLE;
}
config->use_stream_hash_table = use_stream_hash_table;
_dpd.logMsg( " Using the stream hash table: %u\n", config->use_stream_hash_table );
/* Parsing the alert_correlation_weight option */
if (( arg = (char*) strcasestr( args, "alert_correlation_weight" ) ))
{

View file

@ -98,11 +98,18 @@
/** Default number of steps used for training the neural network */
#define DEFAULT_NEURAL_TRAIN_STEPS 10
/** Default maximum number of packets that an observed stream in the hash table should hold */
#define DEFAULT_MAX_HASH_PKT_NUMBER 1000
/** Default number of alerts needed in the history file or database for letting a certain
* heuristic correlation index weight be =~ 0.95 (the weight monotonically increases
* with the number of alerts according to a hyperbolic tangent function) */
#define DEFAULT_ALERT_CORRELATION_WEIGHT 5000
/** Default setting for the use of the hash table for holding streams of packets
* associated to a certain alert (0 = do not use, 1 or any value != 0: use) */
#define DEFAULT_USE_STREAM_HASH_TABLE 1
/** Default web server port */
#define DEFAULT_WEBSERV_PORT 7654
@ -164,6 +171,9 @@ struct pkt_info
/** Flag set if the packet is observed, i.e. associated to a security alert */
BOOL observed;
/** Number of packets in the current flow, if available */
unsigned int n_packets;
/** Make the struct 'hashable' */
UT_hash_handle hh;
};
@ -218,12 +228,19 @@ typedef struct
* with the number of alerts according to a hyperbolic tangent function) */
unsigned long alert_correlation_weight;
/** Maximum number of packets that an observed stream in the hash table should hold */
unsigned long max_hash_pkt_number;
/** Number of steps used for training the neural network */
unsigned long neural_train_steps;
/** Size of the alerts' buffer to be periodically sent to the serialization thread */
unsigned long alert_bufsize;
/** Setting for the use of the hash table for holding streams of packets
* associated to a certain alert (0 = do not use, 1 or any value != 0: use) */
unsigned long use_stream_hash_table;
/** Correlation threshold coefficient for correlating two hyperalerts. Two hyperalerts
* are 'correlated' to each other in a multi-step attack graph if and only if their
* correlation value is >= m + ks, where m is the average correlation coefficient,

128
stream.c
View file

@ -98,8 +98,16 @@ __AI_stream_free ( struct pkt_info* stream )
void*
AI_hashcleanup_thread ( void* arg )
{
struct pkt_info *h, *stream;
time_t max_timestamp;
struct pkt_info *h, *stream, *tmp;
time_t max_timestamp;
int pkt_count, pkt_rm;
BOOL has_old_streams;
if ( config->hashCleanupInterval == 0 )
{
pthread_exit ((void*) 0);
return (void*) 0;
}
while ( 1 ) {
/* Sleep for the specified number of seconds */
@ -109,31 +117,69 @@ AI_hashcleanup_thread ( void* arg )
if ( !hash || !HASH_COUNT(hash) )
continue;
/* Check all the streams in the hash */
for ( h = hash; h; h = (struct pkt_info*) h->next ) {
if ( h->observed ) continue;
max_timestamp = 0;
has_old_streams = true;
/* Find the maximum timestamp in the flow */
for ( stream = h; stream; stream = (struct pkt_info*) stream->next ) {
if ( stream->timestamp > max_timestamp )
max_timestamp = stream->timestamp;
}
while ( has_old_streams )
{
has_old_streams = false;
/* If the most recent packet in the stream is older than the specified threshold, remove that stream */
if ( time(NULL) - max_timestamp > config->streamExpireInterval ) {
stream = h;
if ( stream )
/* Check all the streams in the hash */
for ( h = hash; h; h = (struct pkt_info*) h->next ) {
if ( h->observed )
{
/* XXX This, sometimes, randomly, leads to the crash of the module.
* WHY??? Why can't computer science be deterministic, and if a
* certain not-NULL stream exists in a not-NULL hash table why
* should there be a crash, one day yes and the next one no? Until
* I won't find an answer to these enigmatic questions, I will leave
* this code commented, so if a certain stream goes timeout it won't
* be removed. I'm sorry but it's not my fault. Ask the karma about this */
/* __AI_stream_free ( stream ); */
continue;
}
if ( h->next )
{
if ( h->next->observed )
{
if ( config->max_hash_pkt_number != 0 )
{
if ( h->next->n_packets == 0 )
{
for ( stream = h->next, pkt_count=0; stream; stream = (struct pkt_info*) stream->next, pkt_count++ );
h->next->n_packets = pkt_count;
} else {
pkt_count = h->next->n_packets;
}
/* If this stream has too many packets inside, remove the oldest ones */
if ( pkt_count > config->max_hash_pkt_number )
{
for ( stream = h->next, pkt_rm = 0;
stream && pkt_rm < pkt_count - config->max_hash_pkt_number;
stream = stream->next, pkt_rm++ )
{
tmp = stream->next;
__AI_stream_free ( stream );
stream = tmp;
}
h->next = stream;
}
}
}
}
max_timestamp = 0;
/* Find the maximum timestamp in the flow */
for ( stream = h; stream; stream = (struct pkt_info*) stream->next ) {
if ( stream->timestamp > max_timestamp )
max_timestamp = stream->timestamp;
}
/* If the most recent packet in the stream is older than the specified threshold, remove that stream */
if ( time (NULL) - max_timestamp > config->streamExpireInterval ) {
has_old_streams = true;
stream = h;
if ( stream )
{
__AI_stream_free ( stream );
}
}
}
}
@ -163,12 +209,18 @@ AI_pkt_enqueue ( SFSnortPacket* pkt )
if ( start_time == 0 )
start_time = time (NULL);
/* If we are not using the stream hash table, just return */
if ( config->use_stream_hash_table == 0 )
return;
/* If this is not an IP and/or TCP packet, it's not for me */
if ( !( pkt->ip4_header && pkt->tcp_header ))
return;
if ( !( info = (struct pkt_info*) malloc( sizeof(struct pkt_info) )) )
{
AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ );
}
memset ( &key, 0, sizeof(struct pkt_key));
key.src_ip = pkt->ip4_header->source.s_addr;
@ -177,10 +229,13 @@ AI_pkt_enqueue ( SFSnortPacket* pkt )
info->key = key;
info->timestamp = time(NULL);
info->observed = false;
info->n_packets = 0;
info->next = NULL;
if ( !( info->pkt = (SFSnortPacket*) malloc ( sizeof (SFSnortPacket) )) )
{
AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ );
}
memcpy ( info->pkt, pkt, sizeof (SFSnortPacket) );
@ -193,8 +248,12 @@ AI_pkt_enqueue ( SFSnortPacket* pkt )
/* If there is already an element of this traffic stream in my hash table,
* append the packet just received to this stream*/
if ( found ) {
/* If the current packet contains a RST, just deallocate the stream */
if ( info->pkt->tcp_header->flags & TCPHEADER_RST ) {
/* If the current packet contains a RST or a FIN, just deallocate the stream */
if (
( info->pkt->tcp_header->flags & TCPHEADER_RST ) ||
(( info->pkt->tcp_header->flags & TCPHEADER_FIN ) &&
( info->pkt->tcp_header->flags & TCPHEADER_ACK ))
) {
pthread_mutex_lock ( &hash_mutex );
HASH_FIND ( hh, hash, &key, sizeof(struct pkt_key), found );
pthread_mutex_unlock ( &hash_mutex );
@ -227,23 +286,6 @@ AI_pkt_enqueue ( SFSnortPacket* pkt )
if ( !tmp ) {
found->next = info;
}
/* If the current packet contains an ACK and the latest one
* in this stream contained a FIN, then the communication
* on this stream is over */
if ( found->pkt->tcp_header->flags & TCPHEADER_FIN ) {
if ( info->pkt->tcp_header->flags & TCPHEADER_ACK ) {
pthread_mutex_unlock ( &hash_mutex );
HASH_FIND ( hh, hash, &key, sizeof(struct pkt_key), found );
pthread_mutex_unlock ( &hash_mutex );
if ( found ) {
if ( !found->observed ) {
__AI_stream_free ( found );
}
}
}
}
}
} else {
/* If the packet contains the ACK flag, no payload and it is