Snort_AIPreproc/spp_ai.h
2011-02-04 00:43:59 +01:00

668 lines
23 KiB
C

/*
* =====================================================================================
*
* Filename: spp_ai.h
*
* Description: Header file for the preprocessor
*
* Version: 1.0
* Created: 30/07/2010 15:47:12
* Revision: none
* Compiler: gcc
*
* Author: BlackLight (http://0x00.ath.cx), <blacklight@autistici.org>
* Licence: GNU GPL v.3
* Company: DO WHAT YOU WANT CAUSE A PIRATE IS FREE, YOU ARE A PIRATE!
*
* =====================================================================================
*/
#ifndef _SPP_AI_H
#define _SPP_AI_H
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "sf_snort_packet.h"
#include "sf_dynamic_preprocessor.h"
#include "uthash.h"
#include <netinet/in.h>
#include <pthread.h>
/*******************************************/
#ifdef HAVE_LIBPYTHON2_6
/* Avoid conflicts with Snort header files */
#ifdef _POSIX_C_SOURCE
#undef _POSIX_C_SOURCE
#endif
#ifdef _XOPEN_C_SOURCE
#undef _XOPEN_C_SOURCE
#endif
#ifdef _XOPEN_SOURCE
#undef _XOPEN_SOURCE
#endif
#include <Python.h>
#endif
/*******************************************/
#define PRIVATE static
/** Default interval in seconds for the thread cleaning up TCP streams */
#define DEFAULT_HASH_CLEANUP_INTERVAL 300
/** Default interval in seconds before a stream without any packet is considered timed out */
#define DEFAULT_STREAM_EXPIRE_INTERVAL 300
/** Default interval in seconds for reading alerts from the alert database, if used */
#define DEFAULT_DATABASE_INTERVAL 30
/** Default interval in seconds for the thread clustering alerts */
#define DEFAULT_ALERT_CLUSTERING_INTERVAL 300
/** Default interval in seconds for running the graph correlation thread */
#define DEFAULT_ALERT_CORRELATION_INTERVAL 300
/** Default path to Snort's log file */
#define DEFAULT_ALERT_LOG_FILE "/var/log/snort/alert"
/** Default path to Snort's clustered alerts file */
#define DEFAULT_CLUSTER_LOG_FILE "/var/log/snort/clustered_alerts"
/** Default path to alert correlation rules directory */
#define DEFAULT_CORR_RULES_DIR "/etc/snort/corr_rules"
/** Default directory for placing correlated alerts information (.dot and possibly .png files) */
#define DEFAULT_CORR_ALERTS_DIR "/var/log/snort/correlated_alerts"
/** Default path to alert history binary file, used for bayesian statistical correlation over alerts */
#define DEFAULT_ALERT_HISTORY_FILE "/var/log/snort/alert_history"
/** Default correlation threshold coefficient for correlating two hyperalerts */
#define DEFAULT_CORR_THRESHOLD 0.5
/** Default size of the alerts' buffer to be periodically sent to the serialization thread */
#define DEFAULT_ALERT_BUFSIZE 30
/** Default timeout in seconds between a serialization of the alerts' buffer and the next one */
#define DEFAULT_ALERT_SERIALIZATION_INTERVAL 3600
/** Default interval between two alerts (a,b) for considering them correlated */
#define DEFAULT_BAYESIAN_CORRELATION_INTERVAL 1200
/** Default interval in seconds between an invocation of the thread for parsing XML manual correlations and the next one */
#define DEFAULT_MANUAL_CORRELATIONS_PARSING_INTERVAL 120
/** Default interval in seconds between a training loop for the neural network for
* alert correlations and the next one (this value should usually be high) */
#define DEFAULT_NEURAL_NETWORK_TRAINING_INTERVAL 43200
/** Default interval in seconds between an execution of the thread that attempts to cluster
* the output layer of the neural network searching for alerts belonging to the same
* attack scenario and the next one */
#define DEFAULT_NEURAL_CLUSTERING_INTERVAL 1200
/** Default interval of validity in seconds for an entry in the cache of correlated alerts */
#define DEFAULT_BAYESIAN_CORRELATION_CACHE_VALIDITY 600
/** Default maximum interval, in seconds, between two alerts for being considered in the same cluster */
#define DEFAULT_CLUSTER_MAX_ALERT_INTERVAL 14400
/** Default number of neurons per side on the output matrix of the SOM neural network */
#define DEFAULT_OUTPUT_NEURONS_PER_SIDE 20
/** Default number of steps used for training the neural network */
#define DEFAULT_NEURAL_TRAIN_STEPS 10
/** Default maximum number of packets that an observed stream in the hash table should hold */
#define DEFAULT_MAX_HASH_PKT_NUMBER 1000
/** Default number of alerts needed in the history file or database for letting a certain
* heuristic correlation index weight be =~ 0.95 (the weight monotonically increases
* with the number of alerts according to a hyperbolic tangent function) */
#define DEFAULT_ALERT_CORRELATION_WEIGHT 5000
/** Default setting for the use of the hash table for holding streams of packets
* associated to a certain alert (0 = do not use, 1 or any value != 0: use) */
#define DEFAULT_USE_KNOWLEDGE_BASE_CORRELATION_INDEX 1
/** Default setting for the use of the knowledge base alert correlation index */
#define DEFAULT_USE_STREAM_HASH_TABLE 1
/** Default web server port */
#define DEFAULT_WEBSERV_PORT 7654
/** Default web server banner */
#define DEFAULT_WEBSERV_BANNER "Snort AIPreprocessor module"
/** Cutoff y value in the exponential decay for considering two alerts not correlated */
#define CUTOFF_Y_VALUE 0.01
/** Approximated solution of the equation tanh(x) = 0.95 (used as parameter in the correlation indexes weight function) */
#define HYPERBOLIC_TANGENT_SOLUTION 1.83178
/****************************/
/* Database support */
#ifdef HAVE_LIBMYSQLCLIENT
#define HAVE_DB 1
#endif
#ifdef HAVE_LIBPQ
#define HAVE_DB 1
#endif
/****************************/
extern DynamicPreprocessorData _dpd;
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
typedef enum { false, true } BOOL;
/*****************************************************************/
/** Possible types of clustering attributes */
typedef enum {
none, src_addr, dst_addr, src_port, dst_port, CLUSTER_TYPES
} cluster_type;
/*****************************************************************/
/** Each stream in the hash table is identified by the couple (src_ip, dst_port) */
struct pkt_key
{
uint32_t src_ip;
uint16_t dst_port;
};
/*****************************************************************/
/** Identifier of a packet in a stream */
struct pkt_info
{
/** Key of the packet (src_ip, dst_port) */
struct pkt_key key;
/** Timestamp */
time_t timestamp;
/** Reference to SFSnortPacket containing packet's information */
SFSnortPacket* pkt;
/** Pointer to the next packet in the stream */
struct pkt_info* next;
/** Flag set if the packet is observed, i.e. associated to a security alert */
BOOL observed;
/** Number of packets in the current flow, if available */
unsigned int n_packets;
/** Make the struct 'hashable' */
UT_hash_handle hh;
};
/*****************************************************************/
/* Data type containing the configuration of the module */
typedef struct
{
/** Interval in seconds for the stream cleanup thread */
unsigned long hashCleanupInterval;
/** Interval in seconds for considering an idle stream timed out */
unsigned long streamExpireInterval;
/** Interval in seconds for the alert clustering thread */
unsigned long alertClusteringInterval;
/** Interval in seconds for reading the alert database, if database logging is used */
unsigned long databaseParsingInterval;
/** Interval in seconds for running the thread for building alert correlation graphs */
unsigned long correlationGraphInterval;
/** Interval in seconds between a serialization of the alerts' buffer and the next one */
unsigned long alertSerializationInterval;
/** Interval in seconds between two alerts (a,b) for considering them correlated */
unsigned long bayesianCorrelationInterval;
/** Default maximum interval, in seconds, between two alerts for being considered in the same cluster */
unsigned long clusterMaxAlertInterval;
/** Interval in seconds between an invocation of the thread for parsing XML manual correlations and the next one */
unsigned long manualCorrelationsParsingInterval;
/** Interval in seconds between an execution of the thread that attempts to cluster
* the output layer of the neural network searching for alerts belonging to the same
* attack scenario and the next one */
unsigned long neuralClusteringInterval;
/** Interval in seconds for which an entry in the cache of correlated alerts is valid */
unsigned long bayesianCorrelationCacheValidity;
/** Interval in seconds between a training loop for the neural network for
* alert correlations and the next one (this value should usually be high) */
unsigned long neuralNetworkTrainingInterval;
/** Number of neurons per side on the output matrix of the SOM neural network */
unsigned long outputNeuronsPerSide;
/** Number of alerts needed in the history file or database for letting a certain
* heuristic correlation index weight be =~ 0.95 (the weight monotonically increases
* with the number of alerts according to a hyperbolic tangent function) */
unsigned long alert_correlation_weight;
/** Maximum number of packets that an observed stream in the hash table should hold */
unsigned long max_hash_pkt_number;
/** Number of steps used for training the neural network */
unsigned long neural_train_steps;
/** Size of the alerts' buffer to be periodically sent to the serialization thread */
unsigned long alert_bufsize;
/** Setting for the use of the knowledge base correlation index
* (0 = do not use, 1 or any value != 0: use) */
unsigned long use_knowledge_base_correlation_index;
/** Setting for the use of the hash table for holding streams of packets
* associated to a certain alert (0 = do not use, 1 or any value != 0: use) */
unsigned long use_stream_hash_table;
/** Correlation threshold coefficient for correlating two hyperalerts. Two hyperalerts
* are 'correlated' to each other in a multi-step attack graph if and only if their
* correlation value is >= m + ks, where m is the average correlation coefficient,
* s is the standard deviation over this coefficient, and k is this threshold
* coefficient. Its value can be >= 0. A value in [0,1] is strongly suggested,
* but this value mostly depends on how accurate the correlation rules where
* defined. Be careful, defining a correlation coefficient > or >> 1 no correlation
* may occur at all! */
double correlationThresholdCoefficient;
/** Port where the webserver providing the web interface for the correlation graph
* will listen onto */
unsigned short webserv_port;
/** (Absolute) path to the directory containing the HTML files for the web interface */
char webserv_dir[1024];
/** Banner string of the web server (this will be placed in the 'Server' HTTP header
* and in the footer of error pages */
char webserv_banner[1024];
/** Directory containing extra correlation modules */
char corr_modules_dir[1024];
/** Alert file */
char alertfile[1024];
/** Alert history binary file */
char alert_history_file[1024];
/** Clustered alerts file */
char clusterfile[1024];
/** Correlation rules path */
char corr_rules_dir[1024];
/** Directory where the correlated alerts' information will be placed */
char corr_alerts_dir[1024];
/** File keeping the serialized neural network used for the alert correlation */
char netfile[1024];
/** File containing the likely clusters computed over the output layer of the neural network */
char neural_clusters_log[1024];
/** Database name, if database logging is used */
char dbname[256];
/** Database user, if database logging is used */
char dbuser[256];
/** Database password, if database logging is used */
char dbpass[256];
/** Database host, if database logging is used */
char dbhost[256];
/** Output database type, if clustered alerts and
* correlations are saved to a database as well */
enum { outdb_none, outdb_mysql, outdb_postgresql, OUTDBTYPE_NUM } outdbtype;
/** Output database name, if clustered alerts and
* correlations are saved to a database as well */
char outdbname[256];
/** Output database user, if clustered alerts and
* correlations are saved to a database as well */
char outdbuser[256];
/** Output database password, if clustered alerts and
* correlations are saved to a database as well */
char outdbpass[256];
/** Output database host, if clustered alerts and
* correlations are saved to a database as well */
char outdbhost[256];
} AI_config;
/*****************************************************************/
/** Data type for hierarchies used for clustering */
typedef struct _hierarchy_node
{
cluster_type type;
char label[256];
int min_val;
int max_val;
int nchildren;
struct _hierarchy_node *parent;
struct _hierarchy_node **children;
} hierarchy_node;
/*****************************************************************/
/** Key for the hyperalert hash table */
typedef struct
{
unsigned int gid;
unsigned int sid;
unsigned int rev;
} AI_hyperalert_key;
/*****************************************************************/
/** Hyperalert hash table */
typedef struct
{
/** Hyperalert key */
AI_hyperalert_key key;
/** Pre-conditions, as array of strings */
char **preconds;
/** Number of pre-conditions */
unsigned int n_preconds;
/** Post-conditions, as array of strings */
char **postconds;
/** Number of post-conditions */
unsigned int n_postconds;
/** Make the struct 'hashable' */
UT_hash_handle hh;
} AI_hyperalert_info;
/*****************************************************************/
/** Data type for Snort alerts */
typedef struct _AI_snort_alert {
/* Identifiers of the alert */
unsigned int gid;
unsigned int sid;
unsigned int rev;
/* Snort priority, description,
* classification and timestamp
* of the alert */
unsigned short priority;
char *desc;
char *classification;
time_t timestamp;
/* IP header information */
uint8_t ip_tos;
uint16_t ip_len;
uint16_t ip_id;
uint8_t ip_ttl;
uint8_t ip_proto;
uint32_t ip_src_addr;
uint32_t ip_dst_addr;
/* TCP header information */
uint16_t tcp_src_port;
uint16_t tcp_dst_port;
uint32_t tcp_seq;
uint32_t tcp_ack;
uint8_t tcp_flags;
uint16_t tcp_window;
uint16_t tcp_len;
/** Reference to the TCP stream
* associated to the alert, if any */
struct pkt_info *stream;
/** Pointer to the next alert in
* the log, if any*/
struct _AI_snort_alert *next;
/** Hierarchies for addresses and ports,
* if the clustering algorithm is used */
hierarchy_node *h_node[CLUSTER_TYPES];
/** If the clustering algorithm is used,
* keep tracked of the pointers to the
* single grouped alerts */
struct _AI_snort_alert **grouped_alerts;
/** If the clustering algorithm is used,
* we also count how many alerts this
* single alert groups */
unsigned int grouped_alerts_count;
/** Hyperalert information, pre-conditions
* and post-conditions*/
AI_hyperalert_info *hyperalert;
/** Latitude and longitude of the attacker IP,
* if available */
double geocoord[2];
/* Parent alerts in the chain, if any */
struct _AI_snort_alert **parent_alerts;
/* Number of parent alerts */
unsigned int n_parent_alerts;
/** Array of directly correlated 'derived'
* alerts from the current one, if any */
struct _AI_snort_alert **derived_alerts;
/** Number of derived alerts */
unsigned int n_derived_alerts;
/** Alert ID on the database, if the alerts
* are stored on a database as well */
unsigned long int alert_id;
} AI_snort_alert;
/*****************************************************************/
/** Key for the AI_alert_event structure, containing the Snort ID of the alert */
typedef struct {
int gid;
int sid;
int rev;
} AI_alert_event_key;
/*****************************************************************/
/** Structure representing the historical information of an alert saved in alert_history */
typedef struct _AI_alert_event {
AI_alert_event_key key;
unsigned int count;
time_t timestamp;
struct _AI_alert_event *next;
UT_hash_handle hh;
} AI_alert_event;
/*****************************************************************/
/** Simple structure for holding a couple of alerts to be merged, to be passed to the outdb thread */
typedef struct {
AI_snort_alert *alert1;
AI_snort_alert *alert2;
} AI_alerts_couple;
/*****************************************************************/
/** Key for the correlation hash table */
typedef struct {
/** First alert */
AI_snort_alert *a;
/** Second alert */
AI_snort_alert *b;
} AI_alert_correlation_key;
/*****************************************************************/
/** Struct representing the correlation between all the couples of alerts */
typedef struct {
/** Hash key */
AI_alert_correlation_key key;
/** Correlation coefficient */
double correlation;
/** Make the struct 'hashable' */
UT_hash_handle hh;
} AI_alert_correlation;
/*****************************************************************/
/** Expresses an alert as a numerical tuple manageable by a neural network */
typedef struct {
unsigned int gid;
unsigned int sid;
unsigned int rev;
uint32_t src_ip_addr;
uint32_t dst_ip_addr;
uint16_t src_port;
uint16_t dst_port;
time_t timestamp;
char* desc;
} AI_som_alert_tuple;
/*****************************************************************/
/** Key for the AI_alerts_per_neuron hash table */
typedef struct {
int x;
int y;
} AI_alerts_per_neuron_key;
/*****************************************************************/
/** Struct that holds, for each point of the output layer, the list of associated alerts
* for easily performing the clustering algorithm */
typedef struct {
AI_alerts_per_neuron_key key;
AI_som_alert_tuple *alerts;
int n_alerts;
UT_hash_handle hh;
} AI_alerts_per_neuron;
/*****************************************************************/
/** Hash table holding analyzed geographical IP info */
typedef struct {
char ip[INET_ADDRSTRLEN];
double geocoord[2];
UT_hash_handle hh;
} AI_geoip_cache;
/*****************************************************************/
typedef struct {
int from_gid;
int from_sid;
int from_rev;
int to_gid;
int to_sid;
int to_rev;
} AI_alert_type_pair_key;
/*****************************************************************/
typedef struct {
AI_alert_type_pair_key key;
enum { manuallyNone, manuallyCorrelated, manuallyNotCorrelated } corr_type;
UT_hash_handle hh;
} AI_alert_type_pair;
/*****************************************************************/
/** Enumeration for describing the table in the output database */
enum { ALERTS_TABLE, IPV4_HEADERS_TABLE, TCP_HEADERS_TABLE, PACKET_STREAMS_TABLE, CLUSTERED_ALERTS_TABLE, CORRELATED_ALERTS_TABLE, N_TABLES };
/** Tables in the output database */
static const char *outdb_config[] __attribute__ (( unused )) = {
"ca_alerts", "ca_ipv4_headers", "ca_tcp_headers",
"ca_packet_streams", "ca_clustered_alerts", "ca_correlated_alerts"
};
/*
* The unused attribute is needed for gcc to avoid raising a warning
* of "unused variable" when compiling with -Wall -pedantic -pedatic errors,
* since this array is declared here but only used in two source files
*/
/*****************************************************************/
int preg_match ( const char*, char*, char***, int* );
char* str_replace ( char*, char*, char *);
char* str_replace_all ( char*, char*, char* );
void base64_encode ( const char*, size_t, char** );
void base64_decode ( const char*, char** );
void AI_fatal_err ( const char *msg, const char *file, const int line );
void* AI_hashcleanup_thread ( void* );
void* AI_file_alertparser_thread ( void* );
void* AI_alert_correlation_thread ( void* );
void* AI_webserv_thread ( void* );
#ifdef HAVE_DB
AI_snort_alert* AI_db_get_alerts ( void );
void AI_db_free_alerts ( AI_snort_alert* );
void* AI_db_alertparser_thread ( void* );
#endif
void AI_pkt_enqueue ( SFSnortPacket* );
void AI_set_stream_observed ( struct pkt_key key );
void AI_hierarchies_build ( hierarchy_node**, int );
void AI_free_alerts ( AI_snort_alert *node );
void AI_init_corr_modules ( void );
struct pkt_info* AI_get_stream_by_key ( struct pkt_key );
AI_snort_alert* AI_get_alerts ( void );
AI_snort_alert* AI_get_clustered_alerts ( void );
void AI_serialize_alerts ( AI_snort_alert**, unsigned int );
void AI_serializer ( AI_snort_alert* );
void* AI_deserialize_alerts ( void );
void* AI_alerts_pool_thread ( void* );
void* AI_neural_thread ( void* );
void* AI_manual_correlations_parsing_thread ( void* );
void* AI_neural_clustering_thread ( void* );
const AI_alert_event* AI_get_alert_events_by_key ( AI_alert_event_key );
unsigned int AI_get_history_alert_number ( void );
double AI_alert_bayesian_correlation ( const AI_snort_alert*, const AI_snort_alert* );
double AI_alert_neural_som_correlation ( const AI_snort_alert*, const AI_snort_alert* );
double AI_kb_correlation_coefficient ( const AI_snort_alert*, const AI_snort_alert* );
double AI_neural_correlation_weight ( void );
double AI_bayesian_correlation_weight ( void );
int AI_geoinfobyaddr ( const char*, double** );
void AI_outdb_mutex_initialize ( void );
void AI_store_alert_to_db ( AI_snort_alert* );
void AI_store_cluster_to_db ( AI_alerts_couple* );
void AI_store_correlation_to_db ( AI_alert_correlation* );
void AI_kb_index_init ( AI_snort_alert* );
AI_alerts_per_neuron* AI_get_alerts_per_neuron ( void );
double(**AI_get_corr_functions ( size_t* ))(const AI_snort_alert*, const AI_snort_alert*);
double(**AI_get_corr_weights ( size_t* ))( void );
#ifdef HAVE_LIBPYTHON2_6
PyObject** AI_get_py_functions ( size_t* );
PyObject** AI_get_py_weights ( size_t* );
PyObject* AI_alert_to_pyalert ( AI_snort_alert* );
#endif
/** Function pointer to the function used for getting the alert list (from log file, db, ...) */
extern AI_snort_alert* (*get_alerts)(void);
/** Buffer containing the alerts to be serialized on the binary history file */
extern AI_snort_alert **alerts_pool;
/** Number of alerts contained in the buffer to be serialized */
extern unsigned int alerts_pool_count;
/** Mutex variable for writing on the output database */
extern pthread_mutex_t outdb_mutex;
/** Configuration of the module */
extern AI_config *config;
extern AI_alert_type_pair *manual_correlations;
extern AI_alert_type_pair *manual_uncorrelations;
#endif /* _SPP_AI_H */