Big optimizations, now Lambert function uses static vectors of mus and alphas.

2024-12-27 01:25:10 +01:00 · 2010-10-31 18:06:29 +01:00 · 2010-10-31 18:06:29 +01:00 · 20b76cb928
commit 20b76cb928
parent a4eaf22c08
4 changed files with 174 additions and 95 deletions
--- a/2
+++ b/2
@ -1,4 +1,4 @@
 all:
-	gcc -w *.c -o fsom_example
+	gcc -w -O3 -pipe -fomit-frame-pointer -ffast-math *.c -o fsom_example
 clean:
 	rm -f *.o fsom_example
--- a/example.c
+++ b/example.c
@ -1,7 +1,65 @@
+/*
+ * =====================================================================================
+ *
+ *       Filename:  example.c
+ *
+ *    Description:  Examle file to benchmark fsom library.
+ *
+ *        Version:  0.1
+ *       Revision:  none
+ *       Compiler:  gcc
+ *
+ *         Author:  BlackLight (http://0x00.ath.cx), <blacklight@autistici.org>
+ *    Contributor:  evilsocket (http://www.evilsocket.net), <evilsocket@gmail.com>
+ *        Licence:  GNU GPL v.3
+ *        Company:  DO WHAT YOU WANT CAUSE A PIRATE IS FREE, YOU ARE A PIRATE!
+ *
+ * =====================================================================================
+ */
 #include	"fsom.h"

 #include	<alloca.h>
 #include	<stdio.h>
+#include 	<sys/time.h>
+
+static unsigned long prev_uticks = 0;
+
+static unsigned long get_uticks(){
+    struct timeval ts;
+    gettimeofday(&ts,0);
+    return ((ts.tv_sec * 1000000) + ts.tv_usec);
+}
+
+static void start_timer(){
+	prev_uticks = get_uticks();
+}
+
+static void print_timing( const char *msg ){
+	#define MS_DELTA (1000.0)
+	#define SS_DELTA (MS_DELTA * 1000.0)
+	#define MM_DELTA (SS_DELTA * 60.0)
+	#define HH_DELTA (MM_DELTA * 60.0)
+
+	double ticks = get_uticks() - prev_uticks;
+
+	if( ticks < MS_DELTA ){
+	   printf( "%s\t : %lf us\n", msg, ticks );
+	}
+	else if( ticks < SS_DELTA ){
+	   printf( "%s\t : %lf ms\n", msg, ticks / MS_DELTA );
+	}
+	else if( ticks < MM_DELTA ){
+	   printf( "%s\t : %lf s\n", msg, ticks / SS_DELTA );
+	}
+	else if( ticks < HH_DELTA ){
+	   printf( "%s\t : %lf m\n", msg, ticks / MM_DELTA );
+	}
+	else{
+	   printf( "%s\t : %lf h\n", msg, ticks / HH_DELTA );
+	}
+
+	start_timer();
+}

 #define 	VECTORS 		10
 #define 	INPUTS 		    20
@ -29,6 +87,8 @@ main ( int argc, char *argv[] )
 		}
 	}

+	start_timer();
+
 	net = som_network_new ( INPUTS, OUT_ROWS, OUT_COLS );

 	if ( !net )
@ -37,9 +97,13 @@ main ( int argc, char *argv[] )
 		return 1;
 	}

+	print_timing( "Network Creation" );
+
 	som_init_weights ( net, data, INPUTS );
 	som_train ( net, data, VECTORS, TRAIN_STEPS );

+	print_timing( "Network Training" );
+
 	for ( i=0; i < INPUTS; ++i )
 	{
 		som_set_inputs ( net, data[i] );
@ -47,5 +111,9 @@ main ( int argc, char *argv[] )
 	//	printf ( "best coordinates [%u]: %u,%u\n", i, x, y );
 	}

+	print_timing( "Input Recognition" );
+
 	som_network_destroy ( net );
+
+	print_timing( "Network Destruction" );
 }			
--- a/fsom.c
+++ b/fsom.c
@ -11,6 +11,7 @@
 *       Compiler:  gcc
 *
 *         Author:  BlackLight (http://0x00.ath.cx), <blacklight@autistici.org>
+ *    Contributor:  evilsocket (http://www.evilsocket.net), <evilsocket@gmail.com>
 *        Licence:  GNU GPL v.3
 *        Company:  DO WHAT YOU WANT CAUSE A PIRATE IS FREE, YOU ARE A PIRATE!
 *
@ -81,41 +82,15 @@ som_synapsis_new ( som_neuron_t *input_neuron, som_neuron_t *output_neuron, doub
 * \brief  Create a new neuron
 * \return The new neuron
 */
-
-static som_neuron_t*
-som_neuron_new ()
-{
-	som_neuron_t  *neuron = NULL;
-
-	if ( !( neuron = ( som_neuron_t* ) malloc ( sizeof ( som_neuron_t ))))
-	{
-		return NULL;
-	}
-
-	neuron->output = 0.0;
-	neuron->input  = 0.0;
-	neuron->synapses = NULL;
-	neuron->synapses_count = 0;
-
-	return neuron;
-}		/* -----  end of function som_neuron_new  ----- */
+#define som_neuron_new() ( som_neuron_t *)calloc( 1, sizeof ( som_neuron_t ) )
+/* -----  end of macro som_neuron_new  ----- */

 /**
 * \brief  Deallocate a neuron
 * \param  neuron 	Neuron to be deallocated
 */
-
-static void
-som_neuron_destroy ( som_neuron_t *neuron )
-{
-	if ( !neuron )
-	{
-		return;
-	}
-
-	free ( neuron );
-	neuron = NULL;
-}		/* -----  end of function som_neuron_destroy  ----- */
+#define som_neuron_destroy( neuron ) if( neuron ){ free(neuron); neuron = NULL; }
+/* -----  end of macro som_neuron_destroy  ----- */

 /**
 * \brief  Create a new input layer
@ -151,7 +126,6 @@ som_input_layer_new ( size_t neurons_count )
 			for ( j=0; j < i; ++j )
 			{
 				som_neuron_destroy ( layer->neurons[j] );
-				layer->neurons[j] = NULL;
 			}

 			free ( layer->neurons );
@ -221,7 +195,6 @@ som_output_layer_new ( size_t neurons_rows, size_t neurons_cols )
 					for ( l=0; l < j; ++l )
 					{
 						som_neuron_destroy ( layer->neurons[k][l] );
-						layer->neurons[k][l] = NULL;
 					}

 					free ( layer->neurons[k] );
@ -279,13 +252,11 @@ som_network_new ( size_t input_neurons, size_t output_neurons_rows, size_t outpu
 	som_network_t *net = NULL;
 	srand ( time ( NULL ));

-	if ( !( net = ( som_network_t* ) malloc ( sizeof ( som_network_t ))))
+	if ( !( net = ( som_network_t* ) calloc ( 1, sizeof ( som_network_t ))))
 	{
 		return NULL;
 	}

-	memset ( net, 0, sizeof ( som_network_t ));
-
 	if ( !( net->input_layer = som_input_layer_new ( input_neurons )))
 	{
 		free ( net );
@ -359,7 +330,6 @@ som_input_layer_destroy ( som_network_t *net )
 		}

 		som_neuron_destroy ( net->input_layer->neurons[i] );
-		net->input_layer->neurons[i] = NULL;
 	}

 	free ( net->input_layer->neurons );
@ -406,7 +376,6 @@ som_output_layer_destroy ( som_network_t *net )
 			}

 			som_neuron_destroy ( net->output_layer->neurons[i][j] );
-			net->output_layer->neurons[i][j] = NULL;
 		}

 		free ( net->output_layer->neurons[i] );
@ -468,25 +437,27 @@ double
 som_get_best_neuron_coordinates ( som_network_t *net, size_t *x, size_t *y )
 {
 	size_t i = 0,
-		  j = 0,
-		  k = 0;
+		   j = 0,
+		   k = 0;

-	double mod = 0.0,
-		  best_dist = 0.0;
+	double mod 		 = 0.0,
+		   best_dist = DBL_MAX;
+
+	som_neuron_t *neuron;

 	for ( i=0; i < net->output_layer->neurons_rows; ++i )
 	{
 		for ( j=0; j < net->output_layer->neurons_cols; ++j )
 		{
-			mod = 0.0;
+			mod    = 0.0;
+			neuron = net->output_layer->neurons[i][j];

-			for ( k=0; k < net->output_layer->neurons[i][j]->synapses_count; ++k )
+			for ( k=0; k < neuron->synapses_count; ++k )
 			{
-				mod += ( net->input_layer->neurons[k]->input - net->output_layer->neurons[i][j]->synapses[k]->weight ) *
-					( net->input_layer->neurons[k]->input - net->output_layer->neurons[i][j]->synapses[k]->weight );
+				mod += pow( net->input_layer->neurons[k]->input - neuron->synapses[k]->weight, 2 );
 			}

-			if (( i == 0 && j == 0 ) || ( mod < best_dist ))
+			if ( mod < best_dist )
 			{
 				best_dist = mod;
 				*x = i;
@ -501,50 +472,44 @@ som_get_best_neuron_coordinates ( som_network_t *net, size_t *x, size_t *y )
 /**
 * \brief  Get the n-th approximated step of the analytic continuation of the Lambert W-function of a real number x (see "Numerical Evaluation of the Lambert W Function and Application to Generation of Generalized Gaussian Noise With Exponent 1/2" from Chapeau-Blondeau and Monir, IEEE Transactions on Signal Processing, vol.50, no.9, Sep.2002)
 * \param  x 	Input variable of which we're going to compute W[-1](x)
- * \param  n 	Number of steps in the series computation
 * \return W[-1](x)
 */

 static double
-lambert_W1_function ( double x, int n )
+lambert_W1_function ( som_network_t *net, double x )
 {
 	int j = 0,
 		k = 0;

-	double *alphas = NULL,
-		  *mus = NULL,
-		  p = 0.0,
-		  res = 0.0,
-		  k_plus = 0,
-		  k_less = 0;
-
-	if ( !( alphas = (double*) alloca ( (n+1) * sizeof ( double ))))
-		return 0.0;
-
-	if ( !( mus = (double*) alloca ( (n+1) * sizeof ( double ))))
-		return 0.0;
+	double p = 0.0,
+		   res = 0.0,
+		   k_plus = 0,
+		   k_less = 0;

 	p = - sqrt ( 2 * ( M_E * x + 1 ));

-	mus[0] 	  = -1;
-	mus[1] 	  = 1;
-	alphas[0] = 2;
-	alphas[1] = -1;
-	for ( k=2; k < n; ++k )
+	net->mus[0]    = -1;
+	net->mus[1]    = 1;
+	net->alphas[0] = 2;
+	net->alphas[1] = -1;
+	for ( k=2; k < TAYLOR_LAMBERT_LAST_ELEMENT; ++k )
 	{
-		alphas[k] = 0.0;
+		net->alphas[k] = 0.0;

 		for ( j=2; j < k; ++j )
 		{
-			alphas[k] += mus[j] * mus[ k - j + 1 ];
+			net->alphas[k] += net->mus[j] * net->mus[ k - j + 1 ];
 		}
 		
 		k_plus = k + 1;
 		k_less = k - 1;

-		mus[k] = (k_less / k_plus) * ( (mus[k-2] / 2.0) + (alphas[k-2] / 4.0) ) - ( alphas[k] / 2.0 ) - ( mus[(int)k_less] / k_plus );
+		net->mus[k] = (k_less / k_plus) *
+					  ( (net->mus[k-2] / 2.0) + ( net->alphas[k-2] / 4.0) ) -
+					  ( net->alphas[k] / 2.0 ) -
+					  ( net->mus[(int)k_less] / k_plus );

-		res += ( mus[k] * pow ( p, k ) );
+		res += ( net->mus[k] * pow ( p, k ) );
 	}

 	return res;
@ -572,7 +537,7 @@ som_learning_rate ( som_network_t* net, size_t t, double M, size_t N )
 	{
 		K = ( M * (double) N * M_E ) / 0.01;
 		W_arg = -((double) N ) / K;
-		W = lambert_W1_function ( W_arg, 1000 );
+		W = lambert_W1_function ( net, W_arg );
 		T = K * exp ( W );
 		net->T_learning_param = T;
 	} else {
@ -592,10 +557,11 @@ som_learning_rate ( som_network_t* net, size_t t, double M, size_t N )
 * \return Learning rate
 */

-static double
+INLINE double
 som_learning_rate_fast ( som_network_t* net, size_t t, double M, size_t N )
 {
-	return M * ( (double) t / net->T_learning_param) * exp ( 1 - ( (double) t / net->T_learning_param ));
+	double inv_lrate = (double) t / net->T_learning_param;
+	return M * inv_lrate * exp ( 1 - inv_lrate );
 }		/* -----  end of function som_learning_rate  ----- */

 /**
@ -802,48 +768,80 @@ som_init_weights ( som_network_t *net, double **data, size_t n_data )
 		}
 	}

+	size_t		 last_out_row = out_rows - 1;
+	som_neuron_t *j_neuron,
+				 *br_edge = net->output_layer->neurons[ last_out_row  ][ last_out_col ],
+				 *bl_edge = net->output_layer->neurons[ last_out_row ][0];
 	for ( j=1; j < out_cols - 1; ++j )
 	{
+		j_neuron  = net->output_layer->neurons[last_out_row][j];
+		a		  = (double)(j - 1)		   / (double)last_out_col;
+		b		  = (double)(out_cols - j) / (double)last_out_col;
+
 		for ( k=0; k < in_size; ++k )
 		{
-			net->output_layer->neurons[ out_rows - 1 ][j]->synapses[k]->weight =
-				( ((double) j - 1) / ((double) out_cols - 1 )) * net->output_layer->neurons[ out_rows - 1 ][ out_cols - 1 ]->synapses[k]->weight +
-				( (double) ( out_cols - j ) / ((double) out_cols - 1 )) * net->output_layer->neurons[ out_rows - 1 ][0]->synapses[k]->weight;
+			j_neuron->synapses[k]->weight = a * br_edge->synapses[k]->weight + b * bl_edge->synapses[k]->weight;
 		}
 	}

+	som_neuron_t *i_neuron,
+				 *tl_edge = net->output_layer->neurons[0][0];
 	for ( i=1; i < out_rows - 1; ++i )
 	{
+		i_neuron = net->output_layer->neurons[i][0];
+		a		 = ( ((double) i - 1) / (double) last_out_row );
+		b		 = ( (double) ( out_rows - i ) /  (double)last_out_row);
 		for ( k=0; k < in_size; ++k )
 		{
-			net->output_layer->neurons[i][0]->synapses[k]->weight =
-				( ((double) i - 1) / ((double) out_rows - 1 )) * net->output_layer->neurons[ out_rows-1 ][0]->synapses[k]->weight +
-				( (double) ( out_rows - i ) / ((double) out_rows - 1 )) * net->output_layer->neurons[0][0]->synapses[k]->weight;
+			i_neuron->synapses[k]->weight = a * bl_edge->synapses[k]->weight + b * tl_edge->synapses[k]->weight;
 		}
 	}

+	som_neuron_t *tr_edge = net->output_layer->neurons[0][ last_out_col ];
 	for ( i=1; i < out_rows - 1; ++i )
 	{
+		i_neuron = net->output_layer->neurons[i][ last_out_col ];
+		a		 = ( ((double) i - 1) / ((double) last_out_row ));
+		b		 = ( (double) ( out_rows - i ) / ((double) last_out_row ));
 		for ( k=0; k < in_size; ++k )
 		{
-			net->output_layer->neurons[i][ out_cols - 1 ]->synapses[k]->weight =
-				( ((double) i - 1) / ((double) out_rows - 1 )) * net->output_layer->neurons[ out_rows - 1 ][ out_cols - 1 ]->synapses[k]->weight +
-				( (double) ( out_rows - i ) / ((double) out_rows - 1 )) * net->output_layer->neurons[0][ out_cols - 1 ]->synapses[k]->weight;
+			i_neuron->synapses[k]->weight = a * br_edge->synapses[k]->weight + b * tr_edge->synapses[k]->weight;
 		}
 	}

 	/* Initialize the weights in the middle of the matrix */
+	som_neuron_t *ij_neuron;
+	double		  sqr_index = (double) last_out_row  * (double) last_out_col,
+				  prev_i,
+				  prev_j,
+				  prev_out_rows,
+				  prev_out_cols,
+				  c,
+				  d;
 	for ( i=1; i < out_rows - 1; ++i )
 	{
+		prev_i 		  = i - 1;
+		prev_out_rows = out_rows - i;
 		for ( j=1; j < out_cols - 1; ++j )
 		{
+			prev_j	  	  = j - 1;
+			ij_neuron 	  = net->output_layer->neurons[i][j];
+			prev_out_cols = out_cols - j;
+			a		  	  = ( prev_j * prev_i ) / sqr_index;
+			b	   	  	  = ( prev_j * prev_out_rows ) / sqr_index;
+			c		  	  = ( prev_out_cols * prev_i ) / sqr_index;
+			d		 	  = ( prev_out_cols * prev_out_rows ) / sqr_index;
 			for ( k=0; k < in_size; ++k )
 			{
-				net->output_layer->neurons[i][j]->synapses[k]->weight =
-					( (((double) j - 1)*((double) i - 1)) / (((double) out_rows - 1)*((double) out_cols - 1))) * net->output_layer->neurons[ out_rows - 1 ][ out_cols - 1 ]->synapses[k]->weight +
-					( (((double) j - 1)*(double) (out_rows - i)) / (((double) out_rows - 1)*((double) out_cols - 1))) * net->output_layer->neurons[0][ out_cols - 1 ]->synapses[k]->weight +
-					( ((double) (out_cols - j)*((double) i - 1)) / (((double) out_rows - 1)*((double) out_cols - 1))) * net->output_layer->neurons[ out_rows - 1 ][0]->synapses[k]->weight +
-					( ((double) (out_cols - j)*(double) (out_rows - i)) / (((double) out_rows - 1)*((double) out_cols - 1))) * net->output_layer->neurons[0][0]->synapses[k]->weight;
+				ij_neuron->synapses[k]->weight =
+					a *
+					br_edge->synapses[k]->weight +
+					b *
+					tr_edge->synapses[k]->weight +
+					c *
+					bl_edge->synapses[k]->weight +
+					d *
+					tl_edge->synapses[k]->weight;
 			}
 		}
 	}
@ -943,13 +941,11 @@ som_deserialize ( const char* fname )
 		return NULL;
 	}

-	if ( !( net = ( som_network_t* ) malloc ( sizeof ( som_network_t ))))
+	if ( !( net = ( som_network_t* ) calloc ( 1, sizeof ( som_network_t ))))
 	{
 		return NULL;
 	}

-	memset ( net, 0, sizeof ( som_network_t ));
-
 	fread ( &(net->serialization_time), sizeof ( time_t ), 1, fp );
 	fread ( &(net->T_learning_param ), sizeof ( double ), 1, fp );
 	fread ( &input_neurons, sizeof ( size_t ), 1, fp );
--- a/fsom.h
+++ b/fsom.h
@ -11,6 +11,7 @@
 *       Compiler:  gcc
 *
 *         Author:  BlackLight (http://0x00.ath.cx), <blacklight@autistici.org>
+ *    Contributor:  evilsocket (http://www.evilsocket.net), <evilsocket@gmail.com>
 *        Licence:  GNU GPL v.3
 *        Company:  DO WHAT YOU WANT CAUSE A PIRATE IS FREE, YOU ARE A PIRATE!
 *
@ -23,37 +24,51 @@
 #include	<stddef.h>
 #include	<time.h>

+#define TAYLOR_LAMBERT_ELEMENTS 	1001
+#define TAYLOR_LAMBERT_LAST_ELEMENT 1000
+
+#ifndef INLINE
+#	define INLINE __inline__ __attribute__((always_inline))
+#endif
+
 typedef struct  {
 	double                 output;
 	double                 input;

 	struct som_synapsis_s  **synapses;
 	size_t                 synapses_count;
-} som_neuron_t;
+} som_neuron_t
+__attribute__ ((aligned));;

 typedef struct som_synapsis_s  {
 	som_neuron_t    *neuron_in;
 	som_neuron_t    *neuron_out;
 	double          weight;
-} som_synapsis_t;
+} som_synapsis_t
+__attribute__ ((aligned));

 typedef struct  {
 	som_neuron_t    **neurons;
 	size_t          neurons_count;
-} som_input_layer_t;
+} som_input_layer_t
+__attribute__ ((aligned));

 typedef struct  {
 	som_neuron_t    ***neurons;
 	size_t          neurons_rows;
 	size_t          neurons_cols;
-} som_output_layer_t;
+} som_output_layer_t
+__attribute__ ((aligned));

 typedef struct  {
 	som_input_layer_t   *input_layer;
 	som_output_layer_t  *output_layer;
 	double              T_learning_param;
 	time_t              serialization_time;
-} som_network_t;
+	double				alphas[TAYLOR_LAMBERT_ELEMENTS];
+	double			    mus[TAYLOR_LAMBERT_ELEMENTS];
+} som_network_t
+__attribute__ ((aligned));

 void                 som_network_destroy ( som_network_t* );
 void                 som_set_inputs ( som_network_t*, double* );