/*----------------------------------------------------------------------*/ 
/*  Name           : mlp.c                                              */
/*  Version        : 1.0                                                */
/*  Creation       : 13/05/01                                           */
/*  Last update    : 05/06/24                                           */ 
/*  Subject        : Implementation of an MLP                           */
/*  Author         : Yann Guermeur                                      */
/*----------------------------------------------------------------------*/


#include <stdio.h>
#include <string.h>
#include <math.h>
#include <ctype.h>
#include <stdlib.h>
#include <time.h>
#include "biblio.h"
#include "algebre.h"
#include <sys/types.h>
#include <unistd.h>

#define true 1
#define false 0
#define inf(a,b) ((a)<=(b)?(a):(b))
#define sup(a,b) ((a)>=(b)?(a):(b))

#define taille 81
#define pas 100

/* Global variables */

FILE *f1;

char fichier_fichcom[taille], fichier_data[taille], conf[taille],
 fichier_matrix_init[taille], fichier_matrix_fin[taille],
 fichier_output[taille];

unsigned short seed[3];

int status;

long i, j, k, nb_data, hls=4, ops = 3, Q = 3, indice_exemple = 1, random_num,
 learning=false, iter, nbiter, exemple, old_dim_input, dim_input, **X, *y, 
 choice[3], Q_check, nb_symb, **mat_conf;

double *dcdxl, epsilon=0.01, *des, *dcdxk, **uweight, bound=0.0,
 *x_i, *fbias, *ubias, *hidden, epsilon_min=0.001, **fweight, *output, cost=0.0;

/* Functions included in this program */

void caract_db();
void read_data();
void weights_init();
void compute_xi();
void compute_des();
void compute_hidden_outputs();
void compute_top_outputs();
void compute_dcdxl();
void compute_dcdxk();
void modif_uweight();
void modif_fweight();
void modif_bias();
void write_matrixes();
void compute_cost();
void eval_rec();
void network_use();
void learning_proc();

int main (int argc, char *argv[])

/* MLP - objective function: cross entropy */

{

status = system("clear");
srand48(getpid());
strcpy(fichier_fichcom, argv[1]);

printf("\n      Name of the file of parameters: %s", fichier_fichcom);

caract_db();
read_data();

weights_init();

if(learning==true)
  {
  for(iter=1; iter<=nbiter; iter++)
    {
    learning_proc(); 
    if(iter%pas == 0)
      {
      printf("Iteration %5ld\n", iter);
      write_matrixes();
      }
    }
  }
else
  {
  network_use();
  display_stats(fichier_data, nb_data, Q, mat_conf, cost);
  }

}

void caract_db()

/* Characterization of the problem */

{

if((f1=fopen(fichier_fichcom, "r"))==NULL)
  exit(0);

choice[1]=getc(f1);

switch(choice[1])
  {
  case 'o': learning=true; break;
  case 'n': break;
  default: printf("\n\nCoding problem...\n\n"); exit(0);
  }

choice[2]=getc(f1);

status = fscanf(f1, "%ld", &nb_symb);
status = fscanf(f1, "%ld", &Q);
status = fscanf(f1, "%s", fichier_data);
status = fscanf(f1, "%s", fichier_matrix_init);
status = fscanf(f1, "%s", fichier_matrix_fin);
status = fscanf(f1,"%ld", &hls);
status = fscanf(f1,"%lf", &epsilon);
status = fscanf(f1,"%lf", &bound);
status = fscanf(f1,"%ld" ,&nbiter);
status = fscanf(f1,"%s" ,fichier_output);

printf("\n                Name of the data set: %s", fichier_data);
if(learning == true)
  {
  epsilon_min = epsilon * 0.1;
  printf("\n Name of the file of initial weights: %s", fichier_matrix_init);
  printf("\n   Name of the file of final weights: %s", fichier_matrix_fin);
  printf("\n  Minimal value of the gradient step: %lf\n", epsilon_min);
  }
else
  {
  printf("\n         Name of the file of weights: %s", fichier_matrix_init);
  printf("\nName of the file of computed outputs: %s", fichier_output);
  }

fclose(f1);

ops=Q;

mat_conf = matrix_l(Q, Q);

for(i=1; i<=Q; i++)
  for(j=1; j<=Q; j++)
    mat_conf[i][j] = 0;

hidden = (double *) calloc(hls+1, sizeof(double));
dcdxk = (double *) calloc(hls+1, sizeof(double));
output = (double *) calloc(ops+1, sizeof(double));
des = (double *) calloc(ops+1, sizeof(double));
dcdxl = (double *) calloc(ops+1, sizeof(double));

}

void read_data()

/* Reading the data set */

{

long y_min = Q, y_max = 0, residue_coding;

if((f1=fopen(fichier_data, "r"))==NULL)
  {
  printf("\nData file: %s not found...\n", fichier_data);
  exit(0);
  }

status = fscanf(f1, "%ld", &nb_data);
status = fscanf(f1, "%ld", &old_dim_input);
status = fscanf(f1, "%ld", &Q_check);

if(Q_check != Q)
  {
  printf("\n\nIncoherence in the numbers of categories...");
  exit(0);
  }

dim_input = nb_symb * old_dim_input;

X = matrix_l(nb_data+1, dim_input+1);
x_i = (double *) calloc(dim_input+1, sizeof(double));
y = (long *) calloc(nb_data+1, sizeof(long));

for(i=1; i<=nb_data; i++)
  {
  for(j=1; j<=old_dim_input; j++)
    {
    status = fscanf(f1, "%ld", &residue_coding);
    if(residue_coding == 0)
      residue_coding = nb_symb;
    if((residue_coding < 1) || (residue_coding > nb_symb))
      {
      printf("\nWindow %ld, position %ld, Unknown residue %ld\n\n", 
      i, j, residue_coding);
      exit(0);
      }
    for(k=1; k<=nb_symb; k++)
      {
      if(k == residue_coding)
        X[i][(j-1)*nb_symb+k] = 1;
      else
        X[i][(j-1)*nb_symb+k] = 0;
      }
    }

  status = fscanf(f1, "%ld", &y[i]);

  if(y[i] < y_min)
     y_min = y[i];

  if(y[i] > y_max)
     y_max = y[i];
  }

fclose(f1);

/*
printf("\nIndices of categories ranging from %ld to %ld\n", y_min, y_max);
Pause("");
*/

if((y_max - y_min != (Q-1)) || (y_min < 0) || (y_max > Q))
  {
  printf("\nIndices of categories ranging from %ld to %ld\n", y_min, y_max);
  exit(0);
  }

if(y_min == 0)
  for(i=1; i<=nb_data; i++)
    y[i]++;

}

void weights_init()

/* Initialization of the weights */

{

fweight = matrix(hls+1, dim_input+1);
uweight  = matrix(ops+1, hls+1);

fbias = (double *) calloc(hls+1, sizeof(double));
ubias = (double *) calloc(ops+1, sizeof(double));

if(choice[2] == 'o')
  {
  if((f1=fopen(fichier_matrix_init, "r"))==NULL)
    {
    printf("\nFile of the weights, %s: cannot be found...\n", 
    fichier_matrix_init);
    exit(0);
    }
  else
    printf("\n\nReading the values of the weights...\n");

  for(i=1; i<=hls; i++)
    for(j=1; j<=dim_input; j++)
      status = fscanf(f1, "%lf", &fweight[i][j]);

  for(i=1; i<=ops; i++)
    for(j=1; j<=hls; j++)
      status = fscanf(f1, "%lf", &uweight[i][j]);

  for(i=1; i<=hls; i++)
    status = fscanf(f1, "%lf", &fbias[i]);

  for(i=1; i<=ops; i++)
    status = fscanf(f1, "%lf", &ubias[i]);

  fclose(f1);
  } 
else
  {

  printf("\nRandom initialization of the weights...\n\n");

  for(i=1; i<=hls; i++)
    for(j=1; j<=dim_input; j++)
      fweight[i][j] = bound*(drand48()-0.5);

  for(i=1; i<=ops; i++)
    for(j=1; j<=hls; j++)
      uweight[i][j] = bound*(drand48()-0.5);

  for(i=1; i<=hls; i++)
    fbias[i] = (bound*(drand48()-0.5))/10.0;
 
  for(i=1; i<=ops; i++)
    ubias[i] = (bound*(drand48()-0.5))/10.0;
  }

}

void compute_xi()

/* Computation of the input vector */

{

if(learning == true)
  {
  random_num = nrand48(seed);
  indice_exemple = (random_num % nb_data) + 1;
  }
else
  indice_exemple = exemple;

if((indice_exemple < 1) || (indice_exemple > nb_data))
  {
  printf("\n\nWrong index of example...\n\n");
  exit(0);
  }

for(k=1; k<=dim_input; k++)
  x_i[k] = X[indice_exemple][k];

}

void compute_des()

/* Computation of desired output */

{

for(k=1; k<=Q; k++)
  des[k] = (y[indice_exemple] == k) ? 1.0 : 0.0;

}

void compute_hidden_outputs()

/* Computation of the output of the hidden layer */

{

mult_mat_vect(fweight, x_i, hidden, hls, dim_input);
add_vects(hidden, fbias, hls);

for(i=1; i<=hls; i++)
  hidden[i] = tanh(hidden[i]);

}

void compute_top_outputs()

/* Computation of the output of the network */

{

double sum = 0.0;

mult_mat_vect(uweight, hidden, output, ops, hls);
add_vects(output, ubias, ops);

for(i=1; i<=ops; i++)
  {
  output[i] = exp(output[i]);
  sum += output[i];
  }

for(i=1; i<=ops; i++)
  output[i] /= sum;

}

void compute_dcdxl()

/* Computation of the partial derivatives dc/dxl */

{

sub_vects(output, des, dcdxl, ops);

}

void compute_dcdxk()

/* Computation of the partial derivatives dc/dxk */

{

for(i=1; i<=hls; i++)
  dcdxk[i] = 0.0;

for(i=1; i<=ops; i++)
  for(j=1; j<=hls; j++)
    dcdxk[j] += dcdxl[i] * uweight[i][j];

for(i=1; i<=hls; i++)
  dcdxk[i] *= tanh_prim(hidden[i]);

}

void modif_uweight()

/* Update of the weights of the top layer */

{

for(i=1; i<=ops; i++)
  for(j=1; j<=hls; j++)
    uweight[i][j] -= epsilon * dcdxl[i] * hidden[j];

}
 
void modif_fweight()

/* Update of the weights of the bottom layer */

{

for(i=1; i<=hls; i++)
  for(j=1; j<=dim_input; j++)
    fweight[i][j] -= epsilon * dcdxk[i] * x_i[j];

}

void modif_bias()

/* Update of the weights of the bias cell */

{

for(i=1; i<=hls; i++)
  fbias[i] -= epsilon * dcdxk[i];

for(i=1; i<=ops; i++)
  ubias[i] -= epsilon * dcdxl[i];

}

void compute_cost()

/* Computation of the objective function (cross entropy) */

{

if(output[y[exemple]] > 0.0)
  cost -= log(output[y[exemple]]);
else
  {
  printf("\nExample: %ld, objective function is infinite...\n\n",
         exemple);
  exit(0);
  }

}

void eval_rec()

/* Classification of a pattern */

{

long class_pred = 0;

double maximum = 0.0;

for(k=1; k<=Q; k++)
  if(output[k] > maximum)
    {
    class_pred = k;
    maximum = output[k];
    }

for(k=1; k<=Q; k++)
  fprintf(f1, "%lf%c", output[k], (k==Q) ? '\n':' ');

mat_conf[y[exemple]][class_pred]++;

}

void learning_proc()

/* Implementation of an epoch */

{

if(epsilon >= epsilon_min)
  epsilon *= 0.998;

for(exemple=1; exemple<=nb_data; exemple++)
  {
  compute_xi();
  compute_des();
  compute_hidden_outputs();
  compute_top_outputs();
  compute_dcdxl();
  compute_dcdxk();
  modif_uweight();
  modif_fweight();
  modif_bias();
  }

}

void network_use()

/* Assessment of the network on a test set */

{

if((f1=fopen(fichier_output, "w"))==NULL)
  exit(0);

for(exemple=1; exemple<=nb_data; exemple++)
  {
  compute_xi();
  compute_des();
  compute_hidden_outputs();
  compute_top_outputs();
  compute_cost();
  eval_rec();
  }

fclose(f1);

}

void write_matrixes()

/* Save of network weights */

{

if((f1=fopen(fichier_matrix_fin,"w"))==NULL)
  exit(0);

for(i=1; i<=hls; i++)
  for(j=1; j<=dim_input; j++)
    fprintf(f1,"%lf\n", fweight[i][j]);

for(i=1; i<=ops; i++)
  for(j=1; j<=hls; j++)
    fprintf(f1,"%lf\n", uweight[i][j]);

for(i=1; i<=hls; i++)
  fprintf(f1,"%lf\n", fbias[i]);

for(i=1; i<=ops; i++)
  fprintf(f1,"%lf\n", ubias[i]);

fclose(f1);

}
