Snort_AIPreproc/spp_ai.h

581 lines
20 KiB
C
Raw Normal View History

2010-08-14 14:30:41 +02:00
/*
* =====================================================================================
*
* Filename: spp_ai.h
*
* Description: Header file for the preprocessor
*
* Version: 1.0
* Created: 30/07/2010 15:47:12
* Revision: none
* Compiler: gcc
*
* Author: BlackLight (http://0x00.ath.cx), <blacklight@autistici.org>
* Licence: GNU GPL v.3
* Company: DO WHAT YOU WANT CAUSE A PIRATE IS FREE, YOU ARE A PIRATE!
*
* =====================================================================================
*/
#ifndef _SPP_AI_H
#define _SPP_AI_H
2010-09-11 02:12:39 +02:00
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
2010-08-14 14:30:41 +02:00
#include "sf_snort_packet.h"
#include "sf_dynamic_preprocessor.h"
#include "uthash.h"
#include <pthread.h>
2010-08-14 14:30:41 +02:00
#define PRIVATE static
/** Default interval in seconds for the thread cleaning up TCP streams */
2010-08-16 22:09:34 +02:00
#define DEFAULT_HASH_CLEANUP_INTERVAL 300
/** Default interval in seconds before a stream without any packet is considered timed out */
2010-08-16 22:09:34 +02:00
#define DEFAULT_STREAM_EXPIRE_INTERVAL 300
/** Default interval in seconds for reading alerts from the alert database, if used */
#define DEFAULT_DATABASE_INTERVAL 30
/** Default interval in seconds for the thread clustering alerts */
2010-09-15 13:24:05 +02:00
#define DEFAULT_ALERT_CLUSTERING_INTERVAL 300
2010-09-11 02:12:39 +02:00
/** Default interval in seconds for running the graph correlation thread */
#define DEFAULT_ALERT_CORRELATION_INTERVAL 300
/** Default path to Snort's log file */
2010-08-16 22:09:34 +02:00
#define DEFAULT_ALERT_LOG_FILE "/var/log/snort/alert"
/** Default path to Snort's clustered alerts file */
2010-09-15 13:24:05 +02:00
#define DEFAULT_CLUSTER_LOG_FILE "/var/log/snort/clustered_alerts"
2010-08-16 22:09:34 +02:00
2010-09-11 02:12:39 +02:00
/** Default path to alert correlation rules directory */
#define DEFAULT_CORR_RULES_DIR "/etc/snort/corr_rules"
/** Default directory for placing correlated alerts information (.dot and possibly .png files) */
#define DEFAULT_CORR_ALERTS_DIR "/var/log/snort/correlated_alerts"
2010-09-20 14:39:08 +02:00
/** Default path to alert history binary file, used for bayesian statistical correlation over alerts */
#define DEFAULT_ALERT_HISTORY_FILE "/var/log/snort/alert_history"
/** Default correlation threshold coefficient for correlating two hyperalerts */
#define DEFAULT_CORR_THRESHOLD 0.5
2010-09-21 16:27:46 +02:00
/** Default size of the alerts' buffer to be periodically sent to the serialization thread */
#define DEFAULT_ALERT_BUFSIZE 30
/** Default timeout in seconds between a serialization of the alerts' buffer and the next one */
#define DEFAULT_ALERT_SERIALIZATION_INTERVAL 3600
2010-09-23 21:57:20 +02:00
/** Default interval between two alerts (a,b) for considering them correlated */
#define DEFAULT_BAYESIAN_CORRELATION_INTERVAL 1200
/** Default interval in seconds between an invocation of the thread for parsing XML manual correlations and the next one */
#define DEFAULT_MANUAL_CORRELATIONS_PARSING_INTERVAL 120
2010-10-21 17:36:47 +02:00
/** Default interval in seconds between a training loop for the neural network for
* alert correlations and the next one (this value should usually be high) */
#define DEFAULT_NEURAL_NETWORK_TRAINING_INTERVAL 43200
2010-11-20 16:47:57 +01:00
/** Default interval in seconds between an execution of the thread that attempts to cluster
* the output layer of the neural network searching for alerts belonging to the same
* attack scenario and the next one */
#define DEFAULT_NEURAL_CLUSTERING_INTERVAL 1200
2010-09-23 21:57:20 +02:00
/** Default interval of validity in seconds for an entry in the cache of correlated alerts */
#define DEFAULT_BAYESIAN_CORRELATION_CACHE_VALIDITY 600
/** Default maximum interval, in seconds, between two alerts for being considered in the same cluster */
#define DEFAULT_CLUSTER_MAX_ALERT_INTERVAL 14400
2010-10-21 17:36:47 +02:00
/** Default number of neurons per side on the output matrix of the SOM neural network */
#define DEFAULT_OUTPUT_NEURONS_PER_SIDE 20
/** Default number of steps used for training the neural network */
#define DEFAULT_NEURAL_TRAIN_STEPS 10
/** Default number of alerts needed in the history file or database for letting a certain
* heuristic correlation index weight be =~ 0.95 (the weight monotonically increases
* with the number of alerts according to a hyperbolic tangent function) */
2010-10-26 21:58:34 +02:00
#define DEFAULT_ALERT_CORRELATION_WEIGHT 5000
2010-10-07 12:19:21 +02:00
/** Default web server port */
#define DEFAULT_WEBSERV_PORT 7654
/** Default web server banner */
#define DEFAULT_WEBSERV_BANNER "Snort AIPreprocessor module"
2010-09-23 21:57:20 +02:00
/** Cutoff y value in the exponential decay for considering two alerts not correlated */
#define CUTOFF_Y_VALUE 0.01
2010-10-26 21:58:34 +02:00
/** Approximated solution of the equation tanh(x) = 0.95 (used as parameter in the correlation indexes weight function) */
#define HYPERBOLIC_TANGENT_SOLUTION 1.83178
2010-09-16 17:11:46 +02:00
/****************************/
/* Database support */
#ifdef HAVE_LIBMYSQLCLIENT
#define HAVE_DB 1
#endif
#ifdef HAVE_LIBPQ
#define HAVE_DB 1
#endif
/****************************/
2010-08-14 14:30:41 +02:00
extern DynamicPreprocessorData _dpd;
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
typedef enum { false, true } BOOL;
2010-09-11 02:12:39 +02:00
/*****************************************************************/
/** Possible types of clustering attributes */
2010-08-14 14:30:41 +02:00
typedef enum {
2010-08-16 22:09:34 +02:00
none, src_addr, dst_addr, src_port, dst_port, CLUSTER_TYPES
2010-08-14 14:30:41 +02:00
} cluster_type;
2010-09-11 02:12:39 +02:00
/*****************************************************************/
/** Each stream in the hash table is identified by the couple (src_ip, dst_port) */
2010-08-14 14:30:41 +02:00
struct pkt_key
{
uint32_t src_ip;
uint16_t dst_port;
};
2010-09-11 02:12:39 +02:00
/*****************************************************************/
/** Identifier of a packet in a stream */
2010-08-14 14:30:41 +02:00
struct pkt_info
{
/** Key of the packet (src_ip, dst_port) */
struct pkt_key key;
/** Timestamp */
time_t timestamp;
/** Reference to SFSnortPacket containing packet's information */
SFSnortPacket* pkt;
/** Pointer to the next packet in the stream */
struct pkt_info* next;
/** Flag set if the packet is observed, i.e. associated to a security alert */
BOOL observed;
/** Make the struct 'hashable' */
UT_hash_handle hh;
2010-08-14 14:30:41 +02:00
};
2010-09-11 02:12:39 +02:00
/*****************************************************************/
2010-08-14 14:30:41 +02:00
/* Data type containing the configuration of the module */
typedef struct
{
/** Interval in seconds for the stream cleanup thread */
2010-10-21 17:36:47 +02:00
unsigned long hashCleanupInterval;
/** Interval in seconds for considering an idle stream timed out */
2010-10-21 17:36:47 +02:00
unsigned long streamExpireInterval;
/** Interval in seconds for the alert clustering thread */
2010-10-21 17:36:47 +02:00
unsigned long alertClusteringInterval;
/** Interval in seconds for reading the alert database, if database logging is used */
2010-10-21 17:36:47 +02:00
unsigned long databaseParsingInterval;
2010-09-11 02:12:39 +02:00
/** Interval in seconds for running the thread for building alert correlation graphs */
2010-10-21 17:36:47 +02:00
unsigned long correlationGraphInterval;
2010-09-21 16:27:46 +02:00
/** Interval in seconds between a serialization of the alerts' buffer and the next one */
2010-10-21 17:36:47 +02:00
unsigned long alertSerializationInterval;
2010-09-21 16:27:46 +02:00
2010-09-23 21:57:20 +02:00
/** Interval in seconds between two alerts (a,b) for considering them correlated */
unsigned long bayesianCorrelationInterval;
/** Default maximum interval, in seconds, between two alerts for being considered in the same cluster */
unsigned long clusterMaxAlertInterval;
/** Interval in seconds between an invocation of the thread for parsing XML manual correlations and the next one */
unsigned long manualCorrelationsParsingInterval;
2010-11-20 16:47:57 +01:00
/** Interval in seconds between an execution of the thread that attempts to cluster
* the output layer of the neural network searching for alerts belonging to the same
* attack scenario and the next one */
unsigned long neuralClusteringInterval;
2010-09-23 21:57:20 +02:00
/** Interval in seconds for which an entry in the cache of correlated alerts is valid */
unsigned long bayesianCorrelationCacheValidity;
2010-10-21 17:36:47 +02:00
/** Interval in seconds between a training loop for the neural network for
* alert correlations and the next one (this value should usually be high) */
unsigned long neuralNetworkTrainingInterval;
/** Number of neurons per side on the output matrix of the SOM neural network */
unsigned long outputNeuronsPerSide;
/** Number of alerts needed in the history file or database for letting a certain
* heuristic correlation index weight be =~ 0.95 (the weight monotonically increases
* with the number of alerts according to a hyperbolic tangent function) */
unsigned long alert_correlation_weight;
/** Number of steps used for training the neural network */
unsigned long neural_train_steps;
2010-09-21 16:27:46 +02:00
/** Size of the alerts' buffer to be periodically sent to the serialization thread */
2010-10-21 17:36:47 +02:00
unsigned long alert_bufsize;
2010-09-11 02:12:39 +02:00
/** Correlation threshold coefficient for correlating two hyperalerts. Two hyperalerts
* are 'correlated' to each other in a multi-step attack graph if and only if their
* correlation value is >= m + ks, where m is the average correlation coefficient,
* s is the standard deviation over this coefficient, and k is this threshold
* coefficient. Its value can be >= 0. A value in [0,1] is strongly suggested,
* but this value mostly depends on how accurate the correlation rules where
* defined. Be careful, defining a correlation coefficient > or >> 1 no correlation
* may occur at all! */
double correlationThresholdCoefficient;
2010-10-07 12:19:21 +02:00
/** Port where the webserver providing the web interface for the correlation graph
* will listen onto */
unsigned short webserv_port;
/** (Absolute) path to the directory containing the HTML files for the web interface */
char webserv_dir[1024];
/** Banner string of the web server (this will be placed in the 'Server' HTTP header
* and in the footer of error pages */
char webserv_banner[1024];
2010-10-26 21:58:34 +02:00
/** Directory containing extra correlation modules */
char corr_modules_dir[1024];
/** Alert file */
2010-08-14 14:30:41 +02:00
char alertfile[1024];
2010-09-20 14:39:08 +02:00
/** Alert history binary file */
char alert_history_file[1024];
/** Clustered alerts file */
2010-08-14 14:30:41 +02:00
char clusterfile[1024];
2010-09-11 02:12:39 +02:00
/** Correlation rules path */
char corr_rules_dir[1024];
/** Directory where the correlated alerts' information will be placed */
char corr_alerts_dir[1024];
2010-10-21 17:36:47 +02:00
/** File keeping the serialized neural network used for the alert correlation */
char netfile[1024];
2010-11-20 16:47:57 +01:00
/** File containing the likely clusters computed over the output layer of the neural network */
char neural_clusters_log[1024];
/** Database name, if database logging is used */
char dbname[256];
/** Database user, if database logging is used */
char dbuser[256];
/** Database password, if database logging is used */
char dbpass[256];
/** Database host, if database logging is used */
char dbhost[256];
/** Output database type, if clustered alerts and
* correlations are saved to a database as well */
enum { outdb_none, outdb_mysql, outdb_postgresql, OUTDBTYPE_NUM } outdbtype;
/** Output database name, if clustered alerts and
* correlations are saved to a database as well */
char outdbname[256];
/** Output database user, if clustered alerts and
* correlations are saved to a database as well */
char outdbuser[256];
/** Output database password, if clustered alerts and
* correlations are saved to a database as well */
char outdbpass[256];
/** Output database host, if clustered alerts and
* correlations are saved to a database as well */
char outdbhost[256];
2010-08-14 14:30:41 +02:00
} AI_config;
2010-09-11 02:12:39 +02:00
/*****************************************************************/
/** Data type for hierarchies used for clustering */
2010-08-14 14:30:41 +02:00
typedef struct _hierarchy_node
{
cluster_type type;
char label[256];
int min_val;
int max_val;
int nchildren;
struct _hierarchy_node *parent;
struct _hierarchy_node **children;
} hierarchy_node;
2010-09-11 02:12:39 +02:00
/*****************************************************************/
/** Key for the hyperalert hash table */
typedef struct
{
unsigned int gid;
unsigned int sid;
unsigned int rev;
} AI_hyperalert_key;
/*****************************************************************/
/** Hyperalert hash table */
typedef struct
{
/** Hyperalert key */
AI_hyperalert_key key;
/** Pre-conditions, as array of strings */
char **preconds;
/** Number of pre-conditions */
unsigned int n_preconds;
/** Post-conditions, as array of strings */
char **postconds;
2010-08-14 14:30:41 +02:00
2010-09-11 02:12:39 +02:00
/** Number of post-conditions */
unsigned int n_postconds;
/** Make the struct 'hashable' */
UT_hash_handle hh;
} AI_hyperalert_info;
/*****************************************************************/
/** Data type for Snort alerts */
2010-08-14 14:30:41 +02:00
typedef struct _AI_snort_alert {
/* Identifiers of the alert */
unsigned int gid;
unsigned int sid;
unsigned int rev;
/* Snort priority, description,
* classification and timestamp
* of the alert */
unsigned short priority;
char *desc;
char *classification;
time_t timestamp;
/* IP header information */
uint8_t ip_tos;
uint16_t ip_len;
uint16_t ip_id;
uint8_t ip_ttl;
uint8_t ip_proto;
uint32_t ip_src_addr;
uint32_t ip_dst_addr;
2010-08-14 14:30:41 +02:00
/* TCP header information */
uint16_t tcp_src_port;
uint16_t tcp_dst_port;
uint32_t tcp_seq;
uint32_t tcp_ack;
2010-08-14 14:30:41 +02:00
uint8_t tcp_flags;
uint16_t tcp_window;
uint16_t tcp_len;
2010-08-14 14:30:41 +02:00
2010-09-11 02:12:39 +02:00
/** Reference to the TCP stream
2010-08-14 14:30:41 +02:00
* associated to the alert, if any */
struct pkt_info *stream;
2010-09-11 02:12:39 +02:00
/** Pointer to the next alert in
2010-08-14 14:30:41 +02:00
* the log, if any*/
struct _AI_snort_alert *next;
2010-09-11 02:12:39 +02:00
/** Hierarchies for addresses and ports,
2010-08-14 14:30:41 +02:00
* if the clustering algorithm is used */
2010-08-16 22:09:34 +02:00
hierarchy_node *h_node[CLUSTER_TYPES];
2010-08-14 14:30:41 +02:00
/** If the clustering algorithm is used,
* keep tracked of the pointers to the
* single grouped alerts */
struct _AI_snort_alert **grouped_alerts;
2010-09-11 02:12:39 +02:00
/** If the clustering algorithm is used,
2010-08-14 14:30:41 +02:00
* we also count how many alerts this
* single alert groups */
unsigned int grouped_alerts_count;
2010-09-11 02:12:39 +02:00
/** Hyperalert information, pre-conditions
* and post-conditions*/
AI_hyperalert_info *hyperalert;
/* Parent alerts in the chain, if any */
struct _AI_snort_alert **parent_alerts;
/* Number of parent alerts */
unsigned int n_parent_alerts;
/** Array of directly correlated 'derived'
* alerts from the current one, if any */
struct _AI_snort_alert **derived_alerts;
/** Number of derived alerts */
unsigned int n_derived_alerts;
/** Alert ID on the database, if the alerts
* are stored on a database as well */
unsigned long int alert_id;
2010-08-14 14:30:41 +02:00
} AI_snort_alert;
2010-09-11 02:12:39 +02:00
/*****************************************************************/
2010-09-23 21:57:20 +02:00
/** Key for the AI_alert_event structure, containing the Snort ID of the alert */
typedef struct {
int gid;
int sid;
int rev;
} AI_alert_event_key;
/*****************************************************************/
/** Structure representing the historical information of an alert saved in alert_history */
typedef struct _AI_alert_event {
AI_alert_event_key key;
unsigned int count;
time_t timestamp;
struct _AI_alert_event *next;
UT_hash_handle hh;
} AI_alert_event;
/*****************************************************************/
/** Simple structure for holding a couple of alerts to be merged, to be passed to the outdb thread */
typedef struct {
AI_snort_alert *alert1;
AI_snort_alert *alert2;
} AI_alerts_couple;
/*****************************************************************/
/** Key for the correlation hash table */
typedef struct {
/** First alert */
AI_snort_alert *a;
/** Second alert */
AI_snort_alert *b;
} AI_alert_correlation_key;
/*****************************************************************/
/** Struct representing the correlation between all the couples of alerts */
typedef struct {
/** Hash key */
AI_alert_correlation_key key;
/** Correlation coefficient */
double correlation;
/** Make the struct 'hashable' */
UT_hash_handle hh;
} AI_alert_correlation;
/*****************************************************************/
2010-11-20 16:47:57 +01:00
/** Expresses an alert as a numerical tuple manageable by a neural network */
typedef struct {
unsigned int gid;
unsigned int sid;
unsigned int rev;
uint32_t src_ip_addr;
uint32_t dst_ip_addr;
uint16_t src_port;
uint16_t dst_port;
time_t timestamp;
2010-11-20 20:32:13 +01:00
char* desc;
2010-11-20 16:47:57 +01:00
} AI_som_alert_tuple;
/*****************************************************************/
/** Key for the AI_alerts_per_neuron hash table */
typedef struct {
int x;
int y;
} AI_alerts_per_neuron_key;
/*****************************************************************/
/** Struct that holds, for each point of the output layer, the list of associated alerts
* for easily performing the clustering algorithm */
typedef struct {
AI_alerts_per_neuron_key key;
AI_som_alert_tuple *alerts;
int n_alerts;
UT_hash_handle hh;
} AI_alerts_per_neuron;
/*****************************************************************/
2010-10-21 17:36:47 +02:00
/** Enumeration for describing the table in the output database */
enum { ALERTS_TABLE, IPV4_HEADERS_TABLE, TCP_HEADERS_TABLE, PACKET_STREAMS_TABLE, CLUSTERED_ALERTS_TABLE, CORRELATED_ALERTS_TABLE, N_TABLES };
/** Tables in the output database */
static const char *outdb_config[] __attribute__ (( unused )) = {
"ca_alerts", "ca_ipv4_headers", "ca_tcp_headers",
"ca_packet_streams", "ca_clustered_alerts", "ca_correlated_alerts"
};
/*
* The unused attribute is needed for gcc to avoid raising a warning
* of "unused variable" when compiling with -Wall -pedantic -pedatic errors,
* since this array is declared here but only used in two source files
*/
2010-10-21 17:36:47 +02:00
/*****************************************************************/
2010-08-14 14:30:41 +02:00
int preg_match ( const char*, char*, char***, int* );
2010-09-21 16:27:46 +02:00
char* str_replace ( char*, char*, char *);
char* str_replace_all ( char*, char*, char* );
void base64_encode ( const char*, size_t, char** );
void base64_decode ( const char*, char** );
2010-08-14 14:30:41 +02:00
2010-10-03 04:18:43 +02:00
void AI_fatal_err ( const char *msg, const char *file, const int line );
2010-08-14 14:30:41 +02:00
void* AI_hashcleanup_thread ( void* );
void* AI_file_alertparser_thread ( void* );
2010-09-11 02:12:39 +02:00
void* AI_alert_correlation_thread ( void* );
2010-10-07 12:19:21 +02:00
void* AI_webserv_thread ( void* );
2010-09-16 17:11:46 +02:00
#ifdef HAVE_DB
AI_snort_alert* AI_db_get_alerts ( void );
2010-09-21 16:27:46 +02:00
void AI_db_free_alerts ( AI_snort_alert* );
2010-09-11 02:12:39 +02:00
void* AI_db_alertparser_thread ( void* );
#endif
2010-08-14 14:30:41 +02:00
void AI_pkt_enqueue ( SFSnortPacket* );
void AI_set_stream_observed ( struct pkt_key key );
void AI_hierarchies_build ( hierarchy_node**, int );
2010-09-11 02:12:39 +02:00
void AI_free_alerts ( AI_snort_alert *node );
2010-10-26 21:58:34 +02:00
void AI_init_corr_modules ();
2010-08-16 22:09:34 +02:00
2010-08-14 14:30:41 +02:00
struct pkt_info* AI_get_stream_by_key ( struct pkt_key );
AI_snort_alert* AI_get_alerts ( void );
2010-09-11 02:12:39 +02:00
AI_snort_alert* AI_get_clustered_alerts ( void );
2010-09-23 21:57:20 +02:00
void AI_serialize_alerts ( AI_snort_alert**, unsigned int );
void* AI_deserialize_alerts ();
2010-10-21 17:36:47 +02:00
void* AI_alerts_pool_thread ( void* );
void* AI_serializer_thread ( void* );
void* AI_neural_thread ( void* );
2010-09-23 21:57:20 +02:00
const AI_alert_event* AI_get_alert_events_by_key ( AI_alert_event_key );
unsigned int AI_get_history_alert_number ();
double AI_alert_bayesian_correlation ( const AI_snort_alert*, const AI_snort_alert* );
double AI_alert_neural_som_correlation ( const AI_snort_alert*, const AI_snort_alert* );
double AI_neural_correlation_weight ();
double AI_bayesian_correlation_weight ();
2010-09-20 14:39:08 +02:00
void AI_outdb_mutex_initialize ();
void* AI_store_alert_to_db_thread ( void* );
void* AI_store_cluster_to_db_thread ( void* );
void* AI_store_correlation_to_db_thread ( void* );
2010-11-20 16:47:57 +01:00
void* AI_neural_clustering_thread ( void* );
AI_alerts_per_neuron* AI_get_alerts_per_neuron ();
2010-10-26 21:58:34 +02:00
double(**AI_get_corr_functions ( size_t* ))(const AI_snort_alert*, const AI_snort_alert*);
double(**AI_get_corr_weights ( size_t* ))();
/** Function pointer to the function used for getting the alert list (from log file, db, ...) */
extern AI_snort_alert* (*get_alerts)(void);
2010-08-14 14:30:41 +02:00
/** Buffer containing the alerts to be serialized on the binary history file */
extern AI_snort_alert **alerts_pool;
/** Number of alerts contained in the buffer to be serialized */
extern unsigned int alerts_pool_count;
/** Mutex variable for writing on the output database */
extern pthread_mutex_t outdb_mutex;
/** Configuration of the module */
extern AI_config *config;
2010-08-14 14:30:41 +02:00
#endif /* _SPP_AI_H */