diff --git a/Makefile.am b/Makefile.am index f41ed75..339c817 100644 --- a/Makefile.am +++ b/Makefile.am @@ -25,10 +25,12 @@ bayesian.c \ cluster.c \ correlation.c \ db.c \ +fkmeans/kmeans.c \ fsom/fsom.c \ modules.c \ mysql.c \ neural.c \ +neural_cluster.c \ outdb.c \ postgresql.c \ regex.c \ diff --git a/Makefile.in b/Makefile.in index e482ec3..0643603 100644 --- a/Makefile.in +++ b/Makefile.in @@ -84,8 +84,10 @@ am_libsf_ai_preproc_la_OBJECTS = libsf_ai_preproc_la-alert_history.lo \ libsf_ai_preproc_la-cencode.lo libsf_ai_preproc_la-bayesian.lo \ libsf_ai_preproc_la-cluster.lo \ libsf_ai_preproc_la-correlation.lo libsf_ai_preproc_la-db.lo \ - libsf_ai_preproc_la-fsom.lo libsf_ai_preproc_la-modules.lo \ - libsf_ai_preproc_la-mysql.lo libsf_ai_preproc_la-neural.lo \ + libsf_ai_preproc_la-kmeans.lo libsf_ai_preproc_la-fsom.lo \ + libsf_ai_preproc_la-modules.lo libsf_ai_preproc_la-mysql.lo \ + libsf_ai_preproc_la-neural.lo \ + libsf_ai_preproc_la-neural_cluster.lo \ libsf_ai_preproc_la-outdb.lo libsf_ai_preproc_la-postgresql.lo \ libsf_ai_preproc_la-regex.lo libsf_ai_preproc_la-spp_ai.lo \ libsf_ai_preproc_la-stream.lo libsf_ai_preproc_la-webserv.lo @@ -267,10 +269,12 @@ bayesian.c \ cluster.c \ correlation.c \ db.c \ +fkmeans/kmeans.c \ fsom/fsom.c \ modules.c \ mysql.c \ neural.c \ +neural_cluster.c \ outdb.c \ postgresql.c \ regex.c \ @@ -416,6 +420,9 @@ libsf_ai_preproc_la-correlation.lo: correlation.c libsf_ai_preproc_la-db.lo: db.c $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsf_ai_preproc_la_CFLAGS) $(CFLAGS) -c -o libsf_ai_preproc_la-db.lo `test -f 'db.c' || echo '$(srcdir)/'`db.c +libsf_ai_preproc_la-kmeans.lo: fkmeans/kmeans.c + $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsf_ai_preproc_la_CFLAGS) $(CFLAGS) -c -o libsf_ai_preproc_la-kmeans.lo `test -f 'fkmeans/kmeans.c' || echo '$(srcdir)/'`fkmeans/kmeans.c + libsf_ai_preproc_la-fsom.lo: fsom/fsom.c $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsf_ai_preproc_la_CFLAGS) $(CFLAGS) -c -o libsf_ai_preproc_la-fsom.lo `test -f 'fsom/fsom.c' || echo '$(srcdir)/'`fsom/fsom.c @@ -428,6 +435,9 @@ libsf_ai_preproc_la-mysql.lo: mysql.c libsf_ai_preproc_la-neural.lo: neural.c $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsf_ai_preproc_la_CFLAGS) $(CFLAGS) -c -o libsf_ai_preproc_la-neural.lo `test -f 'neural.c' || echo '$(srcdir)/'`neural.c +libsf_ai_preproc_la-neural_cluster.lo: neural_cluster.c + $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsf_ai_preproc_la_CFLAGS) $(CFLAGS) -c -o libsf_ai_preproc_la-neural_cluster.lo `test -f 'neural_cluster.c' || echo '$(srcdir)/'`neural_cluster.c + libsf_ai_preproc_la-outdb.lo: outdb.c $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libsf_ai_preproc_la_CFLAGS) $(CFLAGS) -c -o libsf_ai_preproc_la-outdb.lo `test -f 'outdb.c' || echo '$(srcdir)/'`outdb.c diff --git a/fkmeans/Doxyfile b/fkmeans/Doxyfile new file mode 100644 index 0000000..f52d4b2 --- /dev/null +++ b/fkmeans/Doxyfile @@ -0,0 +1,1630 @@ +# Doxyfile 1.7.1 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See +# http://www.gnu.org/software/libiconv for the list of possible encodings. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded +# by quotes) that should identify the project. + +PROJECT_NAME = fkmeans + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = 0.1 + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = doc + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of +# source files, where putting all generated files in the same directory would +# otherwise cause performance problems for the file system. + +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, +# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, +# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English +# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, +# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak, +# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is +# used as the annotated text. Otherwise, the brief description is used as-is. +# If left blank, the following values are used ("$name" is automatically +# replaced with the name of the entity): "The $name class" "The $name widget" +# "The $name file" "is" "provides" "specifies" "contains" +# "represents" "a" "an" "the" + +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = YES + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful is your file systems +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like regular Qt-style comments +# (thus requiring an explicit @brief command for a brief description.) + +JAVADOC_AUTOBRIEF = YES + +# If the QT_AUTOBRIEF tag is set to YES then Doxygen will +# interpret the first line (until the first dot) of a Qt-style +# comment as the brief description. If set to NO, the comments +# will behave just like regular Qt-style comments (thus requiring +# an explicit \brief command for a brief description.) + +QT_AUTOBRIEF = YES + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce +# a new page for each member. If set to NO, the documentation of a member will +# be part of the file/class/namespace that contains it. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 8 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C +# sources only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = YES + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java +# sources only. Doxygen will then generate output that is more tailored for +# Java. For instance, namespaces will be presented as packages, qualified +# scopes will look different, etc. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources only. Doxygen will then generate output that is more tailored for +# Fortran. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for +# VHDL. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given extension. +# Doxygen has a built-in mapping, but you can override or extend it using this +# tag. The format is ext=language, where ext is a file extension, and language +# is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C, +# C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make +# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C +# (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions +# you also need to set FILE_PATTERNS otherwise the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should +# set this tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. +# func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. +# Doxygen will parse them like normal C++ but will assume all classes use public +# instead of private inheritance when no explicit protection keyword is present. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate getter +# and setter methods for a property. Setting this option to YES (the default) +# will make doxygen to replace the get and set methods by a property in the +# documentation. This will only work if the methods are indeed getting or +# setting a simple type. If this is not the case, or you want to show the +# methods anyway, you should set this option to NO. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. + +SUBGROUPING = YES + +# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum +# is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically +# be useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. + +TYPEDEF_HIDES_STRUCT = NO + +# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to +# determine which symbols to keep in memory and which to flush to disk. +# When the cache is full, less often used symbols will be written to disk. +# For small to medium size projects (<1000 input files) the default value is +# probably good enough. For larger projects a too small cache size can cause +# doxygen to be busy swapping symbols to and from disk most of the time +# causing a significant performance penality. +# If the system has enough physical memory increasing the cache will improve the +# performance by keeping more symbols in memory. Note that the value works on +# a logarithmic scale so increasing the size by one will rougly double the +# memory usage. The cache size is given by this formula: +# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, +# corresponding to a cache size of 2^16 = 65536 symbols + +SYMBOL_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = NO + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local +# methods, which are defined in the implementation section but not in +# the interface are included in the documentation. +# If set to NO (the default) only methods in the interface are included. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base +# name of the file that contains the anonymous namespace. By default +# anonymous namespace are hidden. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen +# will list include files with double quotes in the documentation +# rather than with sharp brackets. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen +# will sort the (brief and detailed) documentation of class members so that +# constructors and destructors are listed first. If set to NO (the default) +# the constructors will appear in the respective orders defined by +# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. +# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO +# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the +# hierarchy of group names into alphabetical order. If set to NO (the default) +# the group names will appear in their defined order. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. + +SORT_BY_SCOPE_NAME = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or define consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and defines in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +# If the sources in your project are distributed over multiple directories +# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy +# in the documentation. The default is NO. + +SHOW_DIRECTORIES = NO + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. +# This will remove the Files entry from the Quick Index and from the +# Folder Tree View (if specified). The default is YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the +# Namespaces page. +# This will remove the Namespaces entry from the Quick Index +# and from the Folder Tree View (if specified). The default is YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command , where is the value of +# the FILE_VERSION_FILTER tag, and is the name of an input file +# provided by doxygen. Whatever the program writes to standard output +# is used as the file version. See the manual for examples. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. The create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. +# You can optionally specify a file name after the option, if omitted +# DoxygenLayout.xml will be used as the name of the layout file. + +LAYOUT_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = YES + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be abled to get warnings for +# functions that are documented, but have no documentation for their parameters +# or return value. If set to NO (the default) doxygen will only warn about +# wrong or incomplete parameter documentation, but not about the absence of +# documentation. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. Optionally the format may contain +# $version, which will be replaced by the version of the file (if it could +# be obtained via FILE_VERSION_FILTER) + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is +# also the default input encoding. Doxygen uses libiconv (or the iconv built +# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for +# the list of possible encodings. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx +# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 + +FILE_PATTERNS = + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = NO + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used select whether or not files or +# directories that are symbolic links (a Unix filesystem feature) are excluded +# from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. Note that the wildcards are matched +# against the file with absolute path, so to exclude all test directories +# for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command , where +# is the value of the INPUT_FILTER tag, and is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. +# If FILTER_PATTERNS is specified, this tag will be +# ignored. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. +# Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. +# The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER +# is applied to all files. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) +# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from +# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will +# link to the source code. +# Otherwise they will link to the documentation. + +REFERENCES_LINK_SOURCE = YES + +# If the USE_HTAGS tag is set to YES then the references to source code +# will point to the HTML generated by the htags(1) tool instead of doxygen +# built-in source browser. The htags tool is part of GNU's global source +# tagging system (see http://www.gnu.org/software/global/global.html). You +# will need version 4.8.6 or higher. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = YES + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet. Note that doxygen will try to copy +# the style sheet file to the HTML output directory, so don't put your own +# stylesheet in the HTML output directory as well, or it will be erased! + +HTML_STYLESHEET = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. +# Doxygen will adjust the colors in the stylesheet and background images +# according to this color. Hue is specified as an angle on a colorwheel, +# see http://en.wikipedia.org/wiki/Hue for more information. +# For instance the value 0 represents red, 60 is yellow, 120 is green, +# 180 is cyan, 240 is blue, 300 purple, and 360 is red again. +# The allowed range is 0 to 359. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of +# the colors in the HTML output. For a value of 0 the output will use +# grayscales only. A value of 255 will produce the most vivid colors. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to +# the luminance component of the colors in the HTML output. Values below +# 100 gradually make the output lighter, whereas values above 100 make +# the output darker. The value divided by 100 is the actual gamma applied, +# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, +# and 100 does not change the gamma. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting +# this to NO can help when comparing the output of multiple runs. + +HTML_TIMESTAMP = YES + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + +HTML_ALIGN_MEMBERS = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. For this to work a browser that supports +# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox +# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). + +HTML_DYNAMIC_SECTIONS = NO + +# If the GENERATE_DOCSET tag is set to YES, additional index files +# will be generated that can be used as input for Apple's Xcode 3 +# integrated development environment, introduced with OSX 10.5 (Leopard). +# To create a documentation set, doxygen will generate a Makefile in the +# HTML output directory. Running make will produce the docset in that +# directory and running "make install" will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find +# it at startup. +# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. + +GENERATE_DOCSET = NO + +# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the +# feed. A documentation feed provides an umbrella under which multiple +# documentation sets from a single provider (such as a company or product suite) +# can be grouped. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that +# should uniquely identify the documentation set bundle. This should be a +# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen +# will append .docset to the name. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING +# is used to encode HtmlHelp index (hhk), content (hhc) and project file +# content. + +CHM_INDEX_ENCODING = + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated +# that can be used as input for Qt's qhelpgenerator to generate a +# Qt Compressed Help (.qch) of the generated HTML documentation. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can +# be used to specify the file name of the resulting .qch file. +# The path specified is relative to the HTML output folder. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#namespace + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#virtual-folders + +QHP_VIRTUAL_FOLDER = doc + +# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to +# add. For more information please see +# http://doc.trolltech.com/qthelpproject.html#custom-filters + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see +# +# Qt Help Project / Custom Filters. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's +# filter section matches. +# +# Qt Help Project / Filter Attributes. + +QHP_SECT_FILTER_ATTRS = + +# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can +# be used to specify the location of Qt's qhelpgenerator. +# If non-empty doxygen will try to run qhelpgenerator on the generated +# .qhp file. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files +# will be generated, which together with the HTML files, form an Eclipse help +# plugin. To install this plugin and make it available under the help contents +# menu in Eclipse, the contents of the directory containing the HTML and XML +# files needs to be copied into the plugins directory of eclipse. The name of +# the directory within the plugins directory should be the same as +# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before +# the help appears. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have +# this name. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index at +# top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. + +DISABLE_INDEX = NO + +# This tag can be used to set the number of enum values (range [1..20]) +# that doxygen will group on one line in the generated HTML documentation. + +ENUM_VALUES_PER_LINE = 4 + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. +# If the tag value is set to YES, a side panel will be generated +# containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). +# Windows users are probably better off using the HTML help feature. + +GENERATE_TREEVIEW = NO + +# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories, +# and Class Hierarchy pages using a tree view instead of an ordered list. + +USE_INLINE_TREES = NO + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open +# links to external symbols imported via tag files in a separate window. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of Latex formulas included +# as images in the HTML documentation. The default is 10. Note that +# when you change the font size after a successful doxygen run you need +# to manually remove any form_*.png images from the HTML output directory +# to force them to be regenerated. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are +# not supported properly for IE 6.0, but are supported on all modern browsers. +# Note that when changing this option you need to delete any form_*.png files +# in the HTML output before the changes have effect. + +FORMULA_TRANSPARENT = YES + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box +# for the HTML output. The underlying search engine uses javascript +# and DHTML and should work on any modern browser. Note that when using +# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets +# (GENERATE_DOCSET) there is already a search function so this one should +# typically be disabled. For large projects the javascript based search engine +# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. + +SEARCHENGINE = YES + +# When the SERVER_BASED_SEARCH tag is enabled the search engine will be +# implemented using a PHP enabled web server instead of at the web client +# using Javascript. Doxygen will generate the search PHP script and index +# file to put on the web server. The advantage of the server +# based approach is that it scales better to large projects and allows +# full text search. The disadvances is that it is more difficult to setup +# and does not have live searching capabilities. + +SERVER_BASED_SEARCH = NO + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = YES + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. +# Note that when enabling USE_PDFLATEX this option is only used for +# generating bitmaps for formulas in the HTML output, but not in the +# Makefile that is written to the output directory. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, a4wide, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4wide + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = YES + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = YES + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + +LATEX_HIDE_INDICES = NO + +# If LATEX_SOURCE_CODE is set to YES then doxygen will include +# source code with syntax highlighting in the LaTeX output. +# Note that which sources are shown also depends on other settings +# such as SOURCE_BROWSER. + +LATEX_SOURCE_CODE = NO + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + +XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. +# This is useful +# if you want to understand what is going on. +# On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_DEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# in the INCLUDE_PATH (see below) will be search if a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all function-like macros that are alone +# on a line, have an all uppercase name, and do not end with a semicolon. Such +# function macros are typically used for boiler-plate code, and will confuse +# the parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. +# Optionally an initial location of the external documentation +# can be added for each tagfile. The format of a tag file without +# this location is as follows: +# +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths or +# URLs. If a location is present for each tag, the installdox tool +# does not have to be run to correct the links. +# Note that each tag file must have a unique name +# (where the name does NOT include the path) +# If a tag file is not located in the directory in which doxygen +# is run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# or super classes. Setting the tag to NO turns the diagrams off. Note that +# this option is superseded by the HAVE_DOT option below. This is only a +# fallback. It is recommended to install and use dot, since it yields more +# powerful graphs. + +CLASS_DIAGRAMS = YES + +# You can define message sequence charts within doxygen comments using the \msc +# command. Doxygen will then run the mscgen tool (see +# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the +# documentation. The MSCGEN_PATH tag allows you to specify the directory where +# the mscgen tool resides. If left empty the tool is assumed to be found in the +# default search path. + +MSCGEN_PATH = + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = NO + +# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is +# allowed to run in parallel. When set to 0 (the default) doxygen will +# base this on the number of processors available in the system. You can set it +# explicitly to a value larger than 0 to get control over the balance +# between CPU load and processing speed. + +DOT_NUM_THREADS = 0 + +# By default doxygen will write a font called FreeSans.ttf to the output +# directory and reference it in all dot files that doxygen generates. This +# font does not include all possible unicode characters however, so when you need +# these (or just want a differently looking font) you can specify the font name +# using DOT_FONTNAME. You need need to make sure dot is able to find the font, +# which can be done by putting it in a standard location or by setting the +# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory +# containing the font. + +DOT_FONTNAME = FreeSans.ttf + +# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. +# The default size is 10pt. + +DOT_FONTSIZE = 10 + +# By default doxygen will tell dot to use the output directory to look for the +# FreeSans.ttf font (which doxygen will put there itself). If you specify a +# different font using DOT_FONTNAME you can set the path where dot +# can find it using this tag. + +DOT_FONTPATH = + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# the CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for groups, showing the direct groups dependencies + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH and HAVE_DOT options are set to YES then +# doxygen will generate a call dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable call graphs +# for selected functions only using the \callgraph command. + +CALL_GRAPH = NO + +# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then +# doxygen will generate a caller dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable caller +# graphs for selected functions only using the \callergraph command. + +CALLER_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES +# then doxygen will show the dependencies a directory has on other directories +# in a graphical way. The dependency relations are determined by the #include +# relations between the files in the directories. + +DIRECTORY_GRAPH = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are png, jpg, or gif +# If left blank png will be used. + +DOT_IMAGE_FORMAT = png + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of +# nodes that will be shown in the graph. If the number of nodes in a graph +# becomes larger than this value, doxygen will truncate the graph, which is +# visualized by representing a node as a red box. Note that doxygen if the +# number of direct children of the root node in a graph is already larger than +# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note +# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. + +DOT_GRAPH_MAX_NODES = 50 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes +# that lay further from the root node will be omitted. Note that setting this +# option to 1 or 2 may greatly reduce the computation time needed for large +# code bases. Also note that the size of a graph can be further restricted by +# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, because dot on Windows does not +# seem to support this out of the box. Warning: Depending on the platform used, +# enabling this option may lead to badly anti-aliased labels on the edges of +# a graph (i.e. they become hard to read). + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) +# support this, this feature is disabled by default. + +DOT_MULTI_TARGETS = YES + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES diff --git a/fkmeans/Makefile b/fkmeans/Makefile new file mode 100644 index 0000000..03aaac3 --- /dev/null +++ b/fkmeans/Makefile @@ -0,0 +1,3 @@ +all: + gcc -g -O3 -Wall -pedantic -pedantic-errors -std=c99 -o kmeans-test test.c kmeans.c -lm + diff --git a/fkmeans/README b/fkmeans/README new file mode 100644 index 0000000..cac6eb4 --- /dev/null +++ b/fkmeans/README @@ -0,0 +1,88 @@ +fkmeans is a tiny C library that allows you to perform k-means clustering +algorithm over arbitrary sets of n-dimensional data. All you need to do is: + +- Include the file kmeans.h in your sources; + +- Consider your data set as a vector of vectors of double items (double**), + where each vector is an n-dimensional item of your data set; + +- If you want to perform the k-means algorithm over your data and you already + know the number k of clusters there contained, or its estimate, you want to + execute some code like this (in this example, the data set is 3-dimensional, + i.e. it contains N vectors whose size is 3, and we know it contains n_clus + clusters): + + kmeans_t *km; + double **dataset; + ... + km = kmeans_new ( dataset, N, 3, n_clus ); + kmeans ( km ); + ... + kmeans_free ( km ); + + If you don't already know the number of clusters contained in your data set, + you can use the function kmeans_auto() for automatically attempting to find + the best one using Schwarz's criterion. Be careful, this operation can be very + slow, especially if executed on data set having many elements. The example + above would simply become something like: + + kmeans_t *km; + double **dataset; + ... + km = kmeans_auto ( dataset, N, 3 ); + ... + kmeans_free ( km ); + +- Once the clustering has been performed, the clusters of data can be simply + accessed from your kmeans_t* structure, as they are held as a double*** field + named "clusters". Each vector in this structure represents a cluter, whose + size is specified in the field cluster_sizes[i] of the structure. Each cluster + contains the items that form it, each of it is an n-dimensional vector. The + number of clusters is specified in the field "k" of the structure, the + number of dimensions of each element is specified in the field "dataset_dim" + and the number of elements in the originary data set is specified in the field + "dataset_size". So, for example: + + for ( i=0; i < km->k; i++ ) + { + printf ( "cluster %d: [ ", i ); + + for ( j=0; j < km->cluster_sizes[i]; j++ ) + { + printf ( "(" ); + + for ( k=0; k < km->dataset_size; k++ ) + { + printf ( "%f, ", km->clusters[i][j][k] ); + } + + printf ( "), "); + } + + printf ( "]\n" ); + } + + The library however already comes with a sample implementation, contained in + "test.c", and typing "make" this example will be built. This example takes 0, + 1, 2 or 3 command-line arguments, in format + + $ ./kmeans-test [num_elements] [min_value] [max_value] + + and randomly generates a 2-dimensional data set containing num_elements, whose + coordinates are between min_value and max_value. The clustering is then + performed and the results are shown on stdout, with the clusters coloured in + different ways; + +- After you write your source, remember to include the file "kmeans.c", + containing the implementation of the library, in the list of your sources + files; + +- That's all. Include "kmeans.h", write your code using + kmeans_new()+kmeans()+kmeans_free() or kmeans_auto()+kmeans_free(), explore + your clusters, remember to include "kmeans.c" in the list of your source + files, and you're ready for k-means clustering. + +Author: Fabio "BlackLight" Manganiello, + , + http://0x00.ath.cx + diff --git a/fkmeans/kmeans.c b/fkmeans/kmeans.c new file mode 100644 index 0000000..1dd5f58 --- /dev/null +++ b/fkmeans/kmeans.c @@ -0,0 +1,445 @@ +/* + * ===================================================================================== + * + * Filename: kmeans.c + * + * Description: k-means clusterization algorithm implementation in C + * + * Version: 1.0 + * Created: 12/11/2010 10:43:28 + * Revision: none + * Compiler: gcc + * + * Author: BlackLight (http://0x00.ath.cx), + * Licence: GNU GPL v.3 + * Company: DO WHAT YOU WANT CAUSE A PIRATE IS FREE, YOU ARE A PIRATE! + * + * ===================================================================================== + */ + +#include "kmeans.h" + +#include +#include +#include +#include +#include +#include + +/** + * \brief Initialize the centers of the clusters taking the K most distant elements in the dataset + * \param km k-means object + */ + +static void +__kmeans_init_centers ( kmeans_t *km ) +{ + int i, j, k, l, + index_found = 0, + max_index = 0, + assigned_centers = 0, + *assigned_centers_indexes = NULL; + + double dist = 0.0, + max_dist = 0.0; + + for ( i=0; i < km->dataset_size; i++ ) + { + dist = 0.0; + + for ( j=0; j < km->dataset_dim; j++ ) + { + dist += ( km->dataset[i][j] ) * ( km->dataset[i][j] ); + } + + if ( dist > max_dist ) + { + max_dist = dist; + max_index = i; + } + } + + for ( i=0; i < km->dataset_dim; i++ ) + { + km->centers[0][i] = km->dataset[max_index][i]; + } + + if ( !( assigned_centers_indexes = (int*) realloc ( assigned_centers_indexes, (++assigned_centers) * sizeof ( int )))) + { + return; + } + + assigned_centers_indexes[ assigned_centers - 1 ] = max_index; + + for ( i=1; i < km->k; i++ ) + { + max_dist = 0.0; + max_index = 0; + + for ( j=0; j < km->dataset_size; j++ ) + { + index_found = 0; + + for ( k=0; k < assigned_centers && !index_found; k++ ) + { + if ( assigned_centers_indexes[k] == j ) + { + index_found = 1; + } + } + + if ( index_found ) + continue; + + dist = 0.0; + + for ( k=0; k < assigned_centers; k++ ) + { + for ( l=0; l < km->dataset_dim; l++ ) + { + dist += ( km->dataset[j][l] - km->centers[k][l] ) * ( km->dataset[j][l] - km->centers[k][l] ); + } + } + + if ( dist > max_dist ) + { + max_dist = dist; + max_index = j; + } + } + + for ( j=0; j < km->dataset_dim; j++ ) + { + km->centers[i][j] = km->dataset[max_index][j]; + } + + if ( !( assigned_centers_indexes = (int*) realloc ( assigned_centers_indexes, (++assigned_centers) * sizeof ( int )))) + { + return; + } + + assigned_centers_indexes[ assigned_centers - 1 ] = max_index; + } + + free ( assigned_centers_indexes ); +} /* ----- end of function kmeans_init_centers ----- */ + +/** + * \brief Create a new k-means object + * \param dataset Dataset to be clustered + * \param dataset_size Number of elements in the dataset + * \param dataset_dim Dimension of each element of the dataset + * \param K Number of clusters + * \return Reference to the newly created k-means object, if successfull, NULL otherwise + */ + +kmeans_t* +kmeans_new ( double **dataset, const int dataset_size, const int dataset_dim, const int K ) +{ + int i, j; + kmeans_t *km = NULL; + + if ( !( km = (kmeans_t*) malloc ( sizeof ( kmeans_t )))) + { + return NULL; + } + + if ( !( km->dataset = (double**) calloc ( dataset_size, sizeof ( double* )))) + { + return NULL; + } + + for ( i=0; i < dataset_size; i++ ) + { + if ( !( km->dataset[i] = (double*) calloc ( dataset_dim, sizeof ( double )))) + { + return NULL; + } + + for ( j=0; j < dataset_dim; j++ ) + { + km->dataset[i][j] = dataset[i][j]; + } + } + + km->dataset_size = dataset_size; + km->dataset_dim = dataset_dim; + km->k = K; + + if ( !( km->clusters = (double***) calloc ( K, sizeof ( double** )))) + { + return NULL; + } + + if ( !( km->cluster_sizes = (int*) calloc ( K, sizeof ( int* )))) + { + return NULL; + } + + if ( !( km->centers = (double**) calloc ( K, sizeof ( double* )))) + { + return NULL; + } + + for ( i=0; i < K; i++ ) + { + if ( !( km->centers[i] = (double*) calloc ( dataset_dim, sizeof ( double )))) + { + return NULL; + } + } + + __kmeans_init_centers ( km ); + return km; +} /* ----- end of function kmeans_new ----- */ + +/** + * \brief Function that performs a single step for k-means algorithm + * \param km k-means object + * \return 0 if no changes were performed by this step, 1 otherwise, -1 in case of error + */ + +static int +__kmeans_step ( kmeans_t *km ) +{ + int i, j, k, + best_center = 0; + + double dist = 0.0, + min_dist = DBL_MAX, + **old_centers = NULL; + + if ( km->clusters[0] ) + { + for ( i=0; i < km->k; i++ ) + { + for ( j=0; j < km->cluster_sizes[i]; j++ ) + { + free ( km->clusters[i][j] ); + km->clusters[i][j] = NULL; + } + + free ( km->clusters[i] ); + km->clusters[i] = NULL; + km->cluster_sizes[i] = 0; + } + } + + if ( !( old_centers = (double**) alloca ( km->k * sizeof ( double* )))) + { + return -1; + } + + for ( i=0; i < km->k; i++ ) + { + if ( !( old_centers[i] = (double*) alloca ( km->dataset_dim * sizeof ( double )))) + { + return -1; + } + + for ( j=0; j < km->dataset_dim; j++ ) + { + old_centers[i][j] = km->centers[i][j]; + } + } + + for ( i=0; i < km->dataset_size; i++ ) + { + min_dist = DBL_MAX; + best_center = 0; + + for ( j=0; j < km->k; j++ ) + { + dist = 0.0; + + for ( k=0; k < km->dataset_dim; k++ ) + { + dist += ( km->dataset[i][k] - km->centers[j][k] ) * ( km->dataset[i][k] - km->centers[j][k] ); + } + + if ( dist < min_dist ) + { + min_dist = dist; + best_center = j; + } + } + + if ( !( km->clusters[best_center] = (double**) realloc ( km->clusters[best_center], (++(km->cluster_sizes[best_center])) * sizeof ( double* )))) + { + return -1; + } + + if ( !( km->clusters [best_center] [km->cluster_sizes[best_center]-1] = (double*) calloc ( km->dataset_dim, sizeof ( double )))) + { + return -1; + } + + for ( j=0; j < km->dataset_dim; j++ ) + { + km->clusters [best_center] [km->cluster_sizes[best_center]-1] [j] = km->dataset[i][j]; + } + } + + for ( i=0; i < km->k; i++ ) + { + for ( j=0; j < km->dataset_dim; j++ ) + { + km->centers[i][j] = 0.0; + + for ( k=0; k < km->cluster_sizes[i]; k++ ) + { + km->centers[i][j] += km->clusters[i][k][j]; + } + + if ( km->cluster_sizes[i] != 0 ) + { + km->centers[i][j] /= (double) km->cluster_sizes[i]; + } + } + } + + for ( i=0; i < km->k; i++ ) + { + for ( j=0; j < km->dataset_dim; j++ ) + { + if ( km->centers[i][j] != old_centers[i][j] ) + { + return 1; + } + } + } + + return 0; +} /* ----- end of function __kmeans_step ----- */ + +/** + * \brief Perform the k-means algorithm over a k-means object + * \param km k-means object + */ + +void +kmeans ( kmeans_t *km ) +{ + while ( __kmeans_step ( km ) != 0 ); +} /* ----- end of function kmeans ----- */ + +/** + * \brief Compute the heuristic coefficient associated to the current number of clusters through Schwarz's criterion + * \param km k-means object + * \return Real value expressing how well that number of clusters models the dataset + */ + +static double +__kmeans_heuristic_coefficient ( kmeans_t *km ) +{ + int i, j, k; + double distorsion = 0.0; + + for ( i=0; i < km->k; i++ ) + { + for ( j=0; j < km->cluster_sizes[i]; j++ ) + { + for ( k=0; k < km->dataset_dim; k++ ) + { + distorsion += ( km->centers[i][k] - km->clusters[i][j][k] ) * ( km->centers[i][k] - km->clusters[i][j][k] ); + } + } + } + + return distorsion + km->k * log ( km->dataset_size ); +} /* ----- end of function __kmeans_heuristic_coefficient ----- */ + +/** + * \brief Remove a k-means object + * \param km k-means object to be deallocaed + */ + +void +kmeans_free ( kmeans_t *km ) +{ + int i, j; + + for ( i=0; i < km->k; i++ ) + { + for ( j=0; j < km->cluster_sizes[i]; j++ ) + { + free ( km->clusters[i][j] ); + km->clusters[i][j] = NULL; + } + + free ( km->clusters[i] ); + km->clusters[i] = NULL; + } + + free ( km->clusters ); + km->clusters = NULL; + + free ( km->cluster_sizes ); + km->cluster_sizes = NULL; + + for ( i=0; i < km->k; i++ ) + { + free ( km->centers[i] ); + km->centers[i] = NULL; + } + + free ( km->centers ); + km->centers = NULL; + + for ( i=0; i < km->dataset_size; i++ ) + { + free ( km->dataset[i] ); + km->dataset[i] = NULL; + } + + free ( km->dataset ); + km->dataset = NULL; + + free ( km ); + km = NULL; +} /* ----- end of function kmeans_free ----- */ + +/** + * \brief Perform a k-means clustering over a dataset automatically choosing the best value of k using Schwarz's criterion + * \param dataset Dataset to be clustered + * \param dataset_size Number of elements in the dataset + * \param dataset_dim Dimension of each element of the dataset + * \return Reference to the newly created k-means object, if successfull, NULL otherwise + */ + +kmeans_t* +kmeans_auto ( double **dataset, int dataset_size, int dataset_dim ) +{ + int i; + + double heuristic = 0.0, + best_heuristic = DBL_MAX; + + kmeans_t *km = NULL, + *best_km = NULL; + + for ( i=1; i <= dataset_size; i++ ) + { + if ( !( km = kmeans_new ( dataset, dataset_size, dataset_dim, i ))) + return NULL; + + kmeans ( km ); + heuristic = __kmeans_heuristic_coefficient ( km ); + + if ( heuristic < best_heuristic ) + { + if ( best_km ) + { + kmeans_free ( best_km ); + } + + best_km = km; + best_heuristic = heuristic; + } else { + kmeans_free ( km ); + } + } + + return best_km; +} /* ----- end of function kmeans_auto ----- */ + diff --git a/fkmeans/kmeans.h b/fkmeans/kmeans.h new file mode 100644 index 0000000..67bdd7d --- /dev/null +++ b/fkmeans/kmeans.h @@ -0,0 +1,52 @@ +/* + * ===================================================================================== + * + * Filename: kmeans.h + * + * Description: Header file for C k-means implementation + * + * Version: 1.0 + * Created: 12/11/2010 10:43:55 + * Revision: none + * Compiler: gcc + * + * Author: BlackLight (http://0x00.ath.cx), + * Licence: GNU GPL v.3 + * Company: DO WHAT YOU WANT CAUSE A PIRATE IS FREE, YOU ARE A PIRATE! + * + * ===================================================================================== + */ + +#ifndef __KMEANS_H +#define __KMEANS_H + +typedef struct __kmeans_t { + /** Input data set */ + double **dataset; + + /** Number of elements in the data set */ + int dataset_size; + + /** Dimension of each element of the data set */ + int dataset_dim; + + /** Number of clusters */ + int k; + + /** Vector containing the number of elements in each cluster */ + int *cluster_sizes; + + /** Clusters */ + double ***clusters; + + /** Coordinates of the centers of the clusters */ + double **centers; +} kmeans_t; + +kmeans_t* kmeans_new ( double **dataset, const int dataset_size, const int dataset_dim, const int K ); +kmeans_t* kmeans_auto ( double **dataset, int dataset_size, int dataset_dim ); +void kmeans ( kmeans_t *km ); +void kmeans_free ( kmeans_t *km ); + +#endif + diff --git a/mysql.c b/mysql.c index 9a6631d..ec0c56d 100644 --- a/mysql.c +++ b/mysql.c @@ -48,18 +48,26 @@ __mysql_do_init ( MYSQL **__DB, BOOL is_out ) return (void*) *__DB; if ( !( *__DB = (MYSQL*) malloc ( sizeof ( MYSQL )))) + { return NULL; + } if ( !( mysql_init ( *__DB ))) + { return NULL; + } if ( is_out ) { if ( !mysql_real_connect ( *__DB, config->outdbhost, config->outdbuser, config->outdbpass, NULL, 0, NULL, 0 )) + { return NULL; + } if ( mysql_select_db ( *__DB, config->outdbname )) + { return NULL; + } } else { if ( !mysql_real_connect ( *__DB, config->dbhost, config->dbuser, config->dbpass, NULL, 0, NULL, 0 )) return NULL; diff --git a/neural.c b/neural.c index 14a157c..9f31c74 100644 --- a/neural.c +++ b/neural.c @@ -37,21 +37,22 @@ /** Enumeration for the input fields of the SOM neural network */ enum { som_src_ip, som_dst_ip, som_src_port, som_dst_port, som_time, som_gid, som_sid, som_rev, SOM_NUM_ITEMS }; -typedef struct { - unsigned int gid; - unsigned int sid; - unsigned int rev; - uint32_t src_ip_addr; - uint32_t dst_ip_addr; - uint16_t src_port; - uint16_t dst_port; - time_t timestamp; -} AI_som_alert_tuple; - -PRIVATE time_t latest_serialization_time = ( time_t ) 0; -PRIVATE som_network_t *net = NULL; +PRIVATE time_t latest_serialization_time = ( time_t ) 0; +PRIVATE som_network_t *net = NULL; +PRIVATE AI_alerts_per_neuron *alerts_per_neuron = NULL; PRIVATE pthread_mutex_t neural_mutex; +/** + * \brief Get the hash table containing the alerts associated to each output neuron + * \return The hash table + */ + +AI_alerts_per_neuron* +AI_get_alerts_per_neuron () +{ + return alerts_per_neuron; +} /* ----- end of function AI_get_alerts_per_neuron ----- */ + /** * \brief Get the current weight of the neural correlation index using a hyperbolic tangent function with a parameter expressed in function of the current number of alerts in the database * \return The weight of the correlation index ( 0 <= weight < 1 ) @@ -126,6 +127,11 @@ __AI_som_alert_distance ( const AI_som_alert_tuple alert1, const AI_som_alert_tu x2 = 0, y2 = 0; + int i; + BOOL is_found = false; + AI_alerts_per_neuron *found = NULL; + AI_alerts_per_neuron_key key; + if ( !( input1 = (double*) alloca ( SOM_NUM_ITEMS * sizeof ( double )))) { AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ ); @@ -136,24 +142,128 @@ __AI_som_alert_distance ( const AI_som_alert_tuple alert1, const AI_som_alert_tu AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ ); } - pthread_mutex_lock ( &neural_mutex ); - if ( !net ) { - pthread_mutex_unlock ( &neural_mutex ); return 0.0; } __AI_alert_to_som_data ( alert1, &input1 ); + __AI_alert_to_som_data ( alert2, &input2 ); + + pthread_mutex_lock ( &neural_mutex ); + som_set_inputs ( net, input1 ); som_get_best_neuron_coordinates ( net, &x1, &y1 ); - __AI_alert_to_som_data ( alert2, &input2 ); som_set_inputs ( net, input2 ); som_get_best_neuron_coordinates ( net, &x2, &y2 ); pthread_mutex_unlock ( &neural_mutex ); + /* Check if there are already entries in the hash table for these two neurons, otherwise + * it creates them and append these two alerts */ + key.x = x1; + key.y = y1; + HASH_FIND ( hh, alerts_per_neuron, &key, sizeof ( key ), found ); + + if ( !found ) + { + if ( !( found = (AI_alerts_per_neuron*) calloc ( 1, sizeof ( AI_alerts_per_neuron )))) + { + AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ ); + } + + found->key = key; + found->n_alerts = 1; + + if ( !( found->alerts = (AI_som_alert_tuple*) calloc ( 1, sizeof ( AI_som_alert_tuple )))) + { + AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ ); + } + + found->alerts[0] = alert1; + HASH_ADD ( hh, alerts_per_neuron, key, sizeof ( key ), found ); + } else { + is_found = false; + + for ( i=0; i < found->n_alerts && !is_found; i++ ) + { + if ( + alert1.gid == found->alerts[i].gid && + alert1.sid == found->alerts[i].sid && + alert1.rev == found->alerts[i].rev && + alert1.src_ip_addr == found->alerts[i].src_ip_addr && + alert1.dst_ip_addr == found->alerts[i].dst_ip_addr && + alert1.src_port == found->alerts[i].src_port && + alert1.dst_port == found->alerts[i].dst_port ) + { + is_found = true; + } + } + + if ( !is_found ) + { + if ( !( found->alerts = (AI_som_alert_tuple*) realloc ( found->alerts, + (++(found->n_alerts)) * sizeof ( AI_som_alert_tuple )))) + { + AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ ); + } + + found->alerts[ found->n_alerts - 1 ] = alert1; + } + } + + key.x = x2; + key.y = y2; + HASH_FIND ( hh, alerts_per_neuron, &key, sizeof ( key ), found ); + + if ( !found ) + { + if ( !( found = (AI_alerts_per_neuron*) calloc ( 1, sizeof ( AI_alerts_per_neuron )))) + { + AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ ); + } + + found->key = key; + found->n_alerts = 1; + + if ( !( found->alerts = (AI_som_alert_tuple*) calloc ( 1, sizeof ( AI_som_alert_tuple )))) + { + AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ ); + } + + found->alerts[0] = alert2; + HASH_ADD ( hh, alerts_per_neuron, key, sizeof ( key ), found ); + } else { + is_found = false; + + for ( i=0; i < found->n_alerts && !is_found; i++ ) + { + if ( + alert2.gid == found->alerts[i].gid && + alert2.sid == found->alerts[i].sid && + alert2.rev == found->alerts[i].rev && + alert2.src_ip_addr == found->alerts[i].src_ip_addr && + alert2.dst_ip_addr == found->alerts[i].dst_ip_addr && + alert2.src_port == found->alerts[i].src_port && + alert2.dst_port == found->alerts[i].dst_port ) + { + is_found = true; + } + } + + if ( !is_found ) + { + if ( !( found->alerts = (AI_som_alert_tuple*) realloc ( found->alerts, + (++(found->n_alerts)) * sizeof ( AI_som_alert_tuple )))) + { + AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ ); + } + + found->alerts[ found->n_alerts - 1 ] = alert2; + } + } + /* Return the normalized euclidean distance in [0,1] (the normalization is made considering that the maximum distance * between two points on the output neurons matrix is the distance between the upper-left and bottom-right points) */ return sqrt ((double) ( (x2-x1)*(x2-x1) + (y2-y1)*(y2-y1) )) / @@ -170,9 +280,7 @@ __AI_som_alert_distance ( const AI_som_alert_tuple alert1, const AI_som_alert_tu double AI_alert_neural_som_correlation ( const AI_snort_alert *a, const AI_snort_alert *b ) { - size_t i = 0; - unsigned long long int time_sum = 0; - AI_som_alert_tuple t1, t2; + AI_som_alert_tuple t1, t2; t1.gid = a->gid; t1.sid = a->sid; @@ -181,18 +289,7 @@ AI_alert_neural_som_correlation ( const AI_snort_alert *a, const AI_snort_alert t1.dst_ip_addr = ntohl ( a->ip_dst_addr ); t1.src_port = ntohs ( a->tcp_src_port ); t1.dst_port = ntohs ( a->tcp_dst_port ); - time_sum = (unsigned long long int) a->timestamp; - - /* The timestamp of this alert is computed like the average timestamp of the grouped alerts */ - for ( i=1; i < a->grouped_alerts_count; i++ ) - { - if ( a->grouped_alerts[i-1] ) - { - time_sum += (unsigned long long int) a->grouped_alerts[i-1]->timestamp; - } - } - - t1.timestamp = (time_t) ( time_sum / a->grouped_alerts_count ); + t1.timestamp = a->timestamp; t2.gid = b->gid; t2.sid = b->sid; @@ -201,17 +298,7 @@ AI_alert_neural_som_correlation ( const AI_snort_alert *a, const AI_snort_alert t2.dst_ip_addr = ntohl ( b->ip_dst_addr ); t2.src_port = ntohs ( b->tcp_src_port ); t2.dst_port = ntohs ( b->tcp_dst_port ); - time_sum = (unsigned long long int) b->timestamp; - - for ( i=1; i < b->grouped_alerts_count; i++ ) - { - if ( b->grouped_alerts[i-1] ) - { - time_sum += (unsigned long long int) b->grouped_alerts[i-1]->timestamp; - } - } - - t2.timestamp = (time_t) ( time_sum / b->grouped_alerts_count ); + t2.timestamp = b->timestamp; return __AI_som_alert_distance ( t1, t2 ); } /* ----- end of function AI_alert_neural_som_correlation ----- */ @@ -338,8 +425,9 @@ __AI_som_train () void* AI_neural_thread ( void *arg ) { - BOOL do_train = false; struct stat st; + BOOL do_train = false; + pthread_t neural_clustering_thread; pthread_mutex_init ( &neural_mutex, NULL ); @@ -353,6 +441,14 @@ AI_neural_thread ( void *arg ) AI_fatal_err ( "AIPreproc: neural network thread launched but netfile option was not specified", __FILE__, __LINE__ ); } + if ( config->neuralClusteringInterval != 0 ) + { + if ( pthread_create ( &neural_clustering_thread, NULL, AI_neural_clustering_thread, NULL ) != 0 ) + { + AI_fatal_err ( "Failed to create the manual correlations parsing thread", __FILE__, __LINE__ ); + } + } + while ( 1 ) { if ( stat ( config->netfile, &st ) < 0 ) diff --git a/neural_cluster.c b/neural_cluster.c new file mode 100644 index 0000000..6925f0c --- /dev/null +++ b/neural_cluster.c @@ -0,0 +1,194 @@ +/* + * ===================================================================================== + * + * Filename: neural_cluster.c + * + * Description: Perform the clusterization over the output layer of the SOM neural + * network, in order to attempt to find the alerts belonging to the + * same attack scenario. The clusterization is operated through k-means + * using Schwarz criterion in order to find the optimal number of + * clusters, the implementation is in fkmeans/ + * + * Version: 0.1 + * Created: 19/11/2010 18:37:35 + * Revision: none + * Compiler: gcc + * + * Author: BlackLight (http://0x00.ath.cx), + * Licence: GNU GPL v.3 + * Company: DO WHAT YOU WANT CAUSE A PIRATE IS FREE, YOU ARE A PIRATE! + * + * ===================================================================================== + */ + +#include "spp_ai.h" + +/** \defgroup neural_cluster Module for clustering the alerts associated to the + * neural network output layer in order to find alerts belonging to the same scenario + * @{ */ + +#include "fkmeans/kmeans.h" + +#include +#include +#include + +/** + * \brief Print the clusters associated to the SOM output to an XML log file + * \param km k-means object + * \param alerts_per_neuron Hash table containing the alerts associated to each neuron + */ + +PRIVATE void +__AI_neural_clusters_to_xml ( kmeans_t *km, AI_alerts_per_neuron *alerts_per_neuron ) +{ + int i, j, k, l, are_equal; + FILE *fp = NULL; + + uint32_t src_addr = 0, + dst_addr = 0; + + char src_ip[INET_ADDRSTRLEN] = { 0 }, + dst_ip[INET_ADDRSTRLEN] = { 0 }; + + AI_alerts_per_neuron_key key; + AI_alerts_per_neuron *alert_iterator = NULL; + + if ( !( fp = fopen ( config->neural_clusters_log, "w" ))) + { + AI_fatal_err ( "Unable to write on the neural clusters XML log file", __FILE__, __LINE__ ); + } + + fprintf ( fp, "\n\n" + "\n" ); + + for ( i=0; i < km->k; i++ ) + { + fprintf ( fp, "\t\n", i ); + + for ( j=0; j < km->cluster_sizes[i]; j++ ) + { + key.x = km->clusters[i][j][0]; + key.y = km->clusters[i][j][1]; + HASH_FIND ( hh, alerts_per_neuron, &key, sizeof ( key ), alert_iterator ); + + if ( alert_iterator ) + { + for ( k=0; k < alert_iterator->n_alerts; k++ ) + { + are_equal = 0; + + for ( l=0; l < alert_iterator->n_alerts && !are_equal; l++ ) + { + if ( k != l ) + { + if ( + alert_iterator->alerts[k].gid == alert_iterator->alerts[l].gid && + alert_iterator->alerts[k].sid == alert_iterator->alerts[l].sid && + alert_iterator->alerts[k].rev == alert_iterator->alerts[l].rev && + alert_iterator->alerts[k].src_ip_addr == alert_iterator->alerts[l].src_ip_addr && + alert_iterator->alerts[k].dst_ip_addr == alert_iterator->alerts[l].dst_ip_addr && + alert_iterator->alerts[k].src_port == alert_iterator->alerts[l].src_port && + alert_iterator->alerts[k].dst_port == alert_iterator->alerts[l].dst_port && + alert_iterator->alerts[k].timestamp == alert_iterator->alerts[l].timestamp ) + { + are_equal = 1; + } + } + } + + if ( !are_equal ) + { + src_addr = htonl ( alert_iterator->alerts[k].src_ip_addr ); + dst_addr = htonl ( alert_iterator->alerts[k].dst_ip_addr ); + inet_ntop ( AF_INET, &src_addr, src_ip, INET_ADDRSTRLEN ); + inet_ntop ( AF_INET, &dst_addr, dst_ip, INET_ADDRSTRLEN ); + + fprintf ( fp, "\t\t\n", + alert_iterator->alerts[k].gid, + alert_iterator->alerts[k].sid, + alert_iterator->alerts[k].rev, + src_ip, alert_iterator->alerts[k].src_port, + dst_ip, alert_iterator->alerts[k].dst_port, + alert_iterator->alerts[k].timestamp, + alert_iterator->key.x, alert_iterator->key.y ); + } + } + } + } + + fprintf ( fp, "\t\n" ); + } + + fprintf ( fp, "\n" ); + fclose ( fp ); +} /* ----- end of function __AI_neural_clusters_to_xml ----- */ + +/** + * \brief Thread that performs the k-means clustering over the output layer of + * the SOM neural network + */ + +void* +AI_neural_clustering_thread ( void *arg ) +{ + AI_alerts_per_neuron *alerts_per_neuron = NULL, + *alert_iterator = NULL; + + kmeans_t *km = NULL; + double **dataset = NULL; + int i, dataset_size = 0; + + while ( 1 ) + { + dataset = NULL; + dataset_size = 0; + alerts_per_neuron = AI_get_alerts_per_neuron(); + + for ( alert_iterator = alerts_per_neuron; alert_iterator; alert_iterator = (AI_alerts_per_neuron*) alert_iterator->hh.next ) + { + if ( alert_iterator->n_alerts > 0 ) + { + if ( !( dataset = (double**) realloc ( dataset, (++dataset_size) * sizeof ( double* )))) + { + AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ ); + } + + if ( !( dataset[dataset_size-1] = (double*) calloc ( 2, sizeof ( double )))) + { + AI_fatal_err ( "Fatal dynamic memory allocation error", __FILE__, __LINE__ ); + } + + dataset[dataset_size-1][0] = (double) alert_iterator->key.x; + dataset[dataset_size-1][1] = (double) alert_iterator->key.y; + } + } + + if ( dataset && dataset_size != 0 ) + { + if ( !( km = kmeans_auto ( dataset, dataset_size, 2 ))) + { + AI_fatal_err ( "Unable to initialize the k-means clustering object", __FILE__, __LINE__ ); + } + + __AI_neural_clusters_to_xml ( km, alerts_per_neuron ); + kmeans_free ( km ); + + for ( i=0; i < dataset_size; i++ ) + { + free ( dataset[i] ); + } + + free ( dataset ); + } + + sleep ( config->neuralClusteringInterval ); + } + + pthread_exit ((void*) 0); + return (void*) 0; +} /* ----- end of function AI_neural_clustering_thread ----- */ + +/** @} */ + diff --git a/spp_ai.c b/spp_ai.c index d76d15a..9935e09 100644 --- a/spp_ai.c +++ b/spp_ai.c @@ -172,14 +172,14 @@ static AI_config * AI_parse(char *args) { char *arg; char *match; - char alertfile[1024] = { 0 }, - alert_history_file[1024] = { 0 }, - clusterfile[1024] = { 0 }, - corr_alerts_dir[1024] = { 0 }, - corr_modules_dir[1024] = { 0 }, - corr_rules_dir[1024] = { 0 }, - webserv_dir[1024] = { 0 }, - webserv_banner[1024] = { 0 }; + char alertfile[1024] = { 0 }, + alert_history_file[1024] = { 0 }, + clusterfile[1024] = { 0 }, + corr_alerts_dir[1024] = { 0 }, + corr_modules_dir[1024] = { 0 }, + corr_rules_dir[1024] = { 0 }, + webserv_dir[1024] = { 0 }, + webserv_banner[1024] = { 0 }; char **matches = NULL; int nmatches = 0; @@ -217,6 +217,7 @@ static AI_config * AI_parse(char *args) correlation_graph_interval = 0, database_parsing_interval = 0, manual_correlations_parsing_interval = 0, + neural_clustering_interval = 0, neural_network_training_interval = 0, neural_train_steps = 0, output_neurons_per_side = 0, @@ -526,6 +527,27 @@ static AI_config * AI_parse(char *args) config->neuralNetworkTrainingInterval = neural_network_training_interval; _dpd.logMsg( " Neural network training interval: %u\n", config->neuralNetworkTrainingInterval ); + /* Parsing the neural_clustering_interval option */ + if (( arg = (char*) strcasestr( args, "neural_clustering_interval" ) )) + { + for ( arg += strlen("neural_clustering_interval"); + *arg && (*arg < '0' || *arg > '9'); + arg++ ); + + if ( !(*arg) ) + { + AI_fatal_err ( "neural_clustering_interval option used but " + "no value specified", __FILE__, __LINE__ ); + } + + neural_clustering_interval = strtoul ( arg, NULL, 10 ); + } else { + neural_clustering_interval = DEFAULT_NEURAL_CLUSTERING_INTERVAL; + } + + config->neuralClusteringInterval = neural_clustering_interval; + _dpd.logMsg( " Neural network clustering interval: %u\n", config->neuralClusteringInterval ); + /* Parsing the output_neurons_per_side option */ if (( arg = (char*) strcasestr( args, "output_neurons_per_side" ) )) { @@ -796,6 +818,9 @@ static AI_config * AI_parse(char *args) _dpd.logMsg(" webserv_dir: %s\n", config->webserv_dir); + snprintf ( config->neural_clusters_log, sizeof ( config->neural_clusters_log ), "%s/neural_clusters.xml", config->webserv_dir ); + _dpd.logMsg(" neural_clusters_log: %s\n", config->neural_clusters_log); + /* Parsing the corr_modules_dir option */ if (( arg = (char*) strcasestr( args, "corr_modules_dir" ) )) { diff --git a/spp_ai.h b/spp_ai.h index da12f85..1a65a7b 100644 --- a/spp_ai.h +++ b/spp_ai.h @@ -81,6 +81,11 @@ * alert correlations and the next one (this value should usually be high) */ #define DEFAULT_NEURAL_NETWORK_TRAINING_INTERVAL 43200 +/** Default interval in seconds between an execution of the thread that attempts to cluster + * the output layer of the neural network searching for alerts belonging to the same + * attack scenario and the next one */ +#define DEFAULT_NEURAL_CLUSTERING_INTERVAL 1200 + /** Default interval of validity in seconds for an entry in the cache of correlated alerts */ #define DEFAULT_BAYESIAN_CORRELATION_CACHE_VALIDITY 600 @@ -193,6 +198,11 @@ typedef struct /** Interval in seconds between an invocation of the thread for parsing XML manual correlations and the next one */ unsigned long manualCorrelationsParsingInterval; + /** Interval in seconds between an execution of the thread that attempts to cluster + * the output layer of the neural network searching for alerts belonging to the same + * attack scenario and the next one */ + unsigned long neuralClusteringInterval; + /** Interval in seconds for which an entry in the cache of correlated alerts is valid */ unsigned long bayesianCorrelationCacheValidity; @@ -256,6 +266,9 @@ typedef struct /** File keeping the serialized neural network used for the alert correlation */ char netfile[1024]; + /** File containing the likely clusters computed over the output layer of the neural network */ + char neural_clusters_log[1024]; + /** Database name, if database logging is used */ char dbname[256]; @@ -451,6 +464,34 @@ typedef struct { UT_hash_handle hh; } AI_alert_correlation; /*****************************************************************/ +/** Expresses an alert as a numerical tuple manageable by a neural network */ +typedef struct { + unsigned int gid; + unsigned int sid; + unsigned int rev; + uint32_t src_ip_addr; + uint32_t dst_ip_addr; + uint16_t src_port; + uint16_t dst_port; + time_t timestamp; +} AI_som_alert_tuple; +/*****************************************************************/ +/** Key for the AI_alerts_per_neuron hash table */ +typedef struct { + int x; + int y; +} AI_alerts_per_neuron_key; +/*****************************************************************/ +/** Struct that holds, for each point of the output layer, the list of associated alerts + * for easily performing the clustering algorithm */ +typedef struct { + AI_alerts_per_neuron_key key; + AI_som_alert_tuple *alerts; + int n_alerts; + UT_hash_handle hh; +} AI_alerts_per_neuron; +/*****************************************************************/ + /** Enumeration for describing the table in the output database */ enum { ALERTS_TABLE, IPV4_HEADERS_TABLE, TCP_HEADERS_TABLE, PACKET_STREAMS_TABLE, CLUSTERED_ALERTS_TABLE, CORRELATED_ALERTS_TABLE, N_TABLES }; @@ -513,6 +554,8 @@ void AI_outdb_mutex_initialize (); void* AI_store_alert_to_db_thread ( void* ); void* AI_store_cluster_to_db_thread ( void* ); void* AI_store_correlation_to_db_thread ( void* ); +void* AI_neural_clustering_thread ( void* ); +AI_alerts_per_neuron* AI_get_alerts_per_neuron (); double(**AI_get_corr_functions ( size_t* ))(const AI_snort_alert*, const AI_snort_alert*); double(**AI_get_corr_weights ( size_t* ))();