/*----------------------------------------------------------------------*/ 
/*  Name           : plr.c                                              */
/*  Version        : 1.0                                                */
/*  Creation       : 05/13/01                                           */
/*  Last update    : 10/15/25                                           */ 
/*  Subject        : Implementation of a Polytomous Logistic Regression */
/*  Author         : Yann Guermeur                                      */
/*----------------------------------------------------------------------*/


#include <stdio.h>
#include <string.h>
#include <math.h>
#include <ctype.h>
#include <stdlib.h>
#include <time.h>
#include "biblio.h"
#include "algebre.h"
#include <sys/types.h>
#include <unistd.h>

#define true 1
#define false 0
#define inf(a,b) ((a)<=(b)?(a):(b))
#define sup(a,b) ((a)>=(b)?(a):(b))

#define taille 81
#define pas 100
#define negligible 1e-8

/* Global variables */

FILE *f1;

char fichier_fichcom[taille], fichier_data[taille], fichier_matrix_init[taille],
 fichier_matrix_fin[taille], fichier_output[taille];

unsigned short seed[3];

int status;

long i, j, k, nb_data, ops = 3, Q = 3, indice_exemple = 1, random_num,
 learning=false, iter, nbiter, exemple, dim_input, *y, choice[3], Q_check,
 **mat_conf;

double *dcdxl, epsilon=0.01, *des, **weight, bound=0.0,
 *mean, *variance, *st_dev,
 *x_i, *bias, epsilon_min=0.001, *output, **X, cost=0.0;

/* Functions included in this program */

void caract_db();
void read_data();
void standardize_data();
void weights_init();
void compute_xi();
void compute_des();
void compute_outputs();
void compute_dcdxl();
void modif_weight();
void modif_bias();
void write_matrixes();
void compute_cost();
void eval_rec();
void network_use();
void learning_proc();

int main (int argc, char *argv[])

/* PLR - objective function: cross entropy */

{

status = system("clear");
srand48(getpid());
strcpy(fichier_fichcom, argv[1]);

printf("\n      Name of the file of parameters: %s", fichier_fichcom);

caract_db();
read_data();
standardize_data();

weights_init();

if(learning==true)
  {
  for(iter=1; iter<=nbiter; iter++)
    {
    learning_proc(); 
    if(iter%pas == 0)
      {
      printf("Iteration %5ld\n", iter);
      write_matrixes();
      }
    }
  }
else
  {
  network_use();
  display_stats(fichier_data, nb_data, Q, mat_conf, cost);
  }

}

void caract_db()

/* Characterization of the problem */

{

if((f1=fopen(fichier_fichcom, "r"))==NULL)
  exit(0);

choice[1]=getc(f1);

switch(choice[1])
  {
  case 'o': learning=true; break;
  case 'n': break;
  default: printf("\n\nCoding problem...\n\n"); exit(0);
  }

choice[2]=getc(f1);

status = fscanf(f1, "%ld", &Q);
status = fscanf(f1, "%s", fichier_data);
status = fscanf(f1, "%s", fichier_matrix_init);
status = fscanf(f1, "%s", fichier_matrix_fin);
status = fscanf(f1, "%lf", &epsilon);
status = fscanf(f1, "%lf", &bound);
status = fscanf(f1, "%ld", &nbiter);
status = fscanf(f1, "%s", fichier_output);

printf("\n                Name of the data set: %s", fichier_data);
if(learning == true)
  {
  epsilon_min = epsilon * 0.1;
  printf("\n Name of the file of initial weights: %s", fichier_matrix_init);
  printf("\n   Name of the file of final weights: %s", fichier_matrix_fin);
  printf("\n  Minimal value of the gradient step: %lf\n", epsilon_min);
  }
else
  {
  printf("\n         Name of the file of weights: %s", fichier_matrix_init);
  printf("\nName of the file of computed outputs: %s", fichier_output);
  }

fclose(f1);

ops=Q;

mat_conf = matrix_l(Q, Q);

for(i=1; i<=Q; i++)
  for(j=1; j<=Q; j++)
    mat_conf[i][j] = 0;

output = (double *) calloc(ops+1, sizeof(double));
des = (double *) calloc(ops+1, sizeof(double));
dcdxl = (double *) calloc(ops+1, sizeof(double));

}

void read_data()

/* Reading the data set */

{

long y_min = Q, y_max = 0;

if((f1=fopen(fichier_data, "r"))==NULL)
  {
  printf("\nData file: %s not found...\n", fichier_data);
  exit(0);
  }

status = fscanf(f1, "%ld", &nb_data);
status = fscanf(f1, "%ld", &dim_input);
status = fscanf(f1, "%ld", &Q_check);

if(Q_check != Q)
  {
  printf("\n\nIncoherence in the numbers of categories...");
  exit(0);
  }


X = matrix(nb_data, dim_input);
x_i = (double *) calloc(dim_input+1, sizeof(double));
y = (long *) calloc(nb_data+1, sizeof(long));

mean = (double *) calloc(dim_input+1, sizeof(double));
variance = (double *) calloc(dim_input+1, sizeof(double));
st_dev = (double *) calloc(dim_input+1, sizeof(double));

for(i=1; i<=nb_data; i++)
  {
  for(j=1; j<=dim_input; j++)
    status = fscanf(f1, "%lf", &X[i][j]);

  status = fscanf(f1, "%ld", &y[i]);

  if(y[i] < y_min)
     y_min = y[i];

  if(y[i] > y_max)
     y_max = y[i];
  }

fclose(f1);

if(learning==true)
  if(y_min != 1 || y_max != Q)
    {
    printf("\nWrong numbering of the categories\n");
    exit(0);
    }
else
  if(y_min < 1 || y_max > Q)
    {
    printf("\nWrong numbering of the categories\n");
    exit(0);
    }

}

void standardize_data()

/* Standardize the data of the training set per predictor */

{

for(j=1; j<=dim_input; j++)
  {
  mean[j] = 0.0;
  variance[j] = 0.0;
  st_dev[j] = 0.0;
  }

for(i=1; i<=nb_data; i++)
  for(j=1; j<=dim_input; j++)
    mean[j] += X[i][j];

for(j=1; j<=dim_input; j++)
  mean[j] /= (double) nb_data;

for(i=1; i<=nb_data; i++)
  for(j=1; j<=dim_input; j++)
     variance[j] += (X[i][j] - mean[j]) * (X[i][j] - mean[j]);

for(j=1; j<=dim_input; j++)
  {
  variance[j] /= (double) nb_data;
  st_dev[j] = sqrt(variance[j]);
  }

for(i=1; i<=nb_data; i++)
  for(j=1; j<=dim_input; j++)
    {
    X[i][j] -= mean[j];
    if(st_dev[j] > negligible)
      X[i][j] /= st_dev[j];
    }

}

void weights_init()

/* Initialization of the weights */

{

weight = matrix(ops, dim_input);
bias = (double *) calloc(ops+1, sizeof(double));

if(choice[2] == 'o')
  {
  if((f1=fopen(fichier_matrix_init, "r"))==NULL)
    {
    printf("\nFile of the weights, %s: cannot be found...\n", 
    fichier_matrix_init);
    exit(0);
    }
  else
    printf("\n\nReading the values of the weights...\n");

  for(i=1; i<=ops; i++)
    for(j=1; j<=dim_input; j++)
      status = fscanf(f1, "%lf", &weight[i][j]);

  for(i=1; i<=ops; i++)
    status = fscanf(f1, "%lf", &bias[i]);

  fclose(f1);
  } 
else
  {
  printf("\nRandom initialization of the weights...\n\n");

  for(i=1; i<=ops; i++)
    for(j=1; j<=dim_input; j++)
      weight[i][j] = bound*(drand48()-0.5);

  for(i=1; i<=ops; i++)
    bias[i] = (bound*(drand48()-0.5))/10.0;

  write_matrixes();
  }

}

void compute_xi()

/* Computation of the input vector */

{

if(learning == true)
  {
  random_num = nrand48(seed);
  indice_exemple = (random_num % nb_data) + 1;
  }
else
  indice_exemple = exemple;

if((indice_exemple < 1) || (indice_exemple > nb_data))
  {
  printf("\n\nWrong index of example...\n\n");
  exit(0);
  }

for(k=1; k<=dim_input; k++)
  x_i[k] = X[indice_exemple][k];

}

void compute_des()

/* Computation of desired output */

{

for(k=1; k<=Q; k++)
  des[k] = (y[indice_exemple] == k) ? 1.0 : 0.0;

}

void compute_outputs()

/* Computation of the output of the network */

{

double sum = 0.0, copie = 0.0;

mult_mat_vect(weight, x_i, output, ops, dim_input);

for(i=1; i<=ops; i++)
  if(isinf(output[i]) || isnan(output[i]))
    {
    printf("\noutput after update = %lf\n", output[i]);
    exit(0);
    }

add_vects(output, bias, ops);

for(i=1; i<=ops; i++)
  {
  copie = output[i];
  output[i] = exp(copie);

  if(isinf(output[i]) || isnan(output[i]))
    {
    printf("\noutput before and after exponentation: %lf and %lf\n", 
           copie, output[i]);
    exit(0);
    }

  sum += output[i];
  if(isinf(sum) || isnan(sum))
    {
    printf("\noutput = %lf => Sum is not a number...\n", output[i]);
    exit(0);
    }
  }

for(i=1; i<=ops; i++)
  {
  output[i] /= sum;
  if(isnan(output[i]))
    {
    printf("\nOutput is not a number, sum is %lf\n", sum);
    exit(0);
    }
  }

}

void compute_dcdxl()

/* Computation of the partial derivatives dc/dxl */

{

sub_vects(output, des, dcdxl, ops);

}

void modif_weight()

/* Update of the weights */

{

for(i=1; i<=ops; i++)
  for(j=1; j<=dim_input; j++)
    {
    weight[i][j] -= epsilon * dcdxl[i] * x_i[j];
    if(isnan(weight[i][j]))
      {
      printf("\nProblem located...\n");
      printf("\nderivative: %lf, predictor: %lf\n", dcdxl[i], x_i[j]);
      exit(0);
      }
    }

}
 
void modif_bias()

/* Update of the weights of the bias cell */

{

for(i=1; i<=ops; i++)
  bias[i] -= epsilon * dcdxl[i];

}

void compute_cost()

/* Computation of the objective function (cross entropy) */

{

if(output[y[exemple]] > 0.0)
  cost -= log(output[y[exemple]]);
else
  {
  printf("\nExample: %ld, objective function is infinite...\n\n",
         exemple);
  exit(0);
  }

}

void eval_rec()

/* Classification of a pattern */

{

long class_pred = 0;

double maximum = 0.0;

for(k=1; k<=Q; k++)
  if(output[k] > maximum)
    {
    class_pred = k;
    maximum = output[k];
    }

for(k=1; k<=Q; k++)
  fprintf(f1, "%lf%c", output[k], (k==Q) ? '\n':' ');

mat_conf[y[exemple]][class_pred]++;

}

void learning_proc()

/* Implementation of an epoch */

{

if(epsilon >= epsilon_min)
  epsilon *= 0.998;

for(exemple=1; exemple<=nb_data; exemple++)
  {
  compute_xi();
  compute_des();
  compute_outputs();
  compute_dcdxl();
  modif_weight();
  modif_bias();
  }

}

void network_use()

/* Assessment of the network on a test set */

{

if((f1=fopen(fichier_output, "w"))==NULL)
  exit(0);

for(exemple=1; exemple<=nb_data; exemple++)
  {
  compute_xi();
  compute_des();
  compute_outputs();
  compute_cost();
  eval_rec();
  }

fclose(f1);

}

void write_matrixes()

/* Save of network weights */

{

if((f1=fopen(fichier_matrix_fin,"w"))==NULL)
  exit(0);

for(i=1; i<=ops; i++)
  for(j=1; j<=dim_input; j++)
    fprintf(f1,"%lf\n", weight[i][j]);

for(i=1; i<=ops; i++)
  fprintf(f1,"%lf\n", bias[i]);

fclose(f1);

}
