/* Copyright 2025-2025 Yann Guermeur                                        */

/* This program is free software; you can redistribute it and/or modify     */
/* it under the terms of the GNU General Public License as published by     */
/* the Free Software Foundation; either version 2 of the License, or        */
/* (at your option) any later version.                                      */

/* This program is distributed in the hope that it will be useful,          */
/* but WITHOUT ANY WARRANTY; without even the implied warranty of           */
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            */
/* GNU General Public License for more details.                             */

/* You should have received a copy of the GNU General Public License        */
/* along with this program; if not, write to the Free Software              */
/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/

/*--------------------------------------------------------------------------*/
/*  Name           : apply_SVM.c                                            */
/*  Version        : 1.0                                                    */
/*  Creation       : 08/08/25                                               */
/*  Last update    : 08/08/25                                               */
/*  Subject        : Implementation of the M-SVM of Weston and Watkins      */
/*  Module         : M-SVM applied to unlabeled data                        */
/*  Author         : Yann Guermeur (Yann.Guermeur@cnrs.fr)                  */
/*--------------------------------------------------------------------------*/


#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <math.h>
#include <ctype.h>
#include "algebre.h"
#include "biblio.h"

#define true 1
#define false 0
#define min(a,b) ((a)<=(b)?(a):(b))
#define max(a,b) ((a)<=(b)?(b):(a))

#define taille 81
#define very_small 1e-4
#define step 100

FILE *fs, *fc;

int status;

long **X_app, **X_test, *y_app, smallest_cat=0, largest_cat=0, nb_symb,
Q=0, i, j, k, l, dim_input, nb_data_app=0, nb_data_test=0, y_i, y_j;

char fichier_app[taille],
fichier_alpha[taille], fichier_dot_prod_aa[taille], fichier_b[taille],
fichier_test[taille], commande[taille+20], fichier_vect_theta[taille],
fichier_fichcom[taille], fichier_resultat[taille], var_open = '(',
var_close = ')';

double **alpha, *b_SVM, C=0.0, partiel, max_alpha=0.0,
*vect_theta, **dot_prod_aa;

/* Functions included in this program */

void caract_db();
void read_data();
void alloc_memory();
void read_alpha();
void check_feasible_sol();
void read_b();
void read_vect_theta();
void read_dot_prod_aa();
double fonction(long categorie, long *vecteur);
void compute_outputs(long **X, long nb_data);

int main(int argc, char *argv[])

{

strcpy(fichier_fichcom, argv[1]);

status = system("clear");
caract_db();
read_dot_prod_aa();
read_data();
alloc_memory();
read_alpha();
read_b();
read_vect_theta();

printf("\n\n*** Computation of the outputs\n");
compute_outputs(X_test, nb_data_test);

}

void read_b()

{

if((fs=fopen(fichier_b, "r"))==NULL)
  {
  printf("\nFile of vector b: %s cannot be open...\n", fichier_b);
  exit(0);
  }

for(k=1; k<=Q; k++)
  {
  status = fscanf(fs, "%lf", &b_SVM[k]);
/*  printf("\n%lf", b_SVM[k]); */
  }

fclose(fs);

/* Pause(""); */

}

void caract_db()

{

if((fs=fopen(fichier_fichcom, "r"))==NULL)
  {
  printf("\nFile of parameters: %s cannot be open...\n", fichier_fichcom);
  exit(0);
  }

status = fscanf(fs, "%lf", &C);
status = fscanf(fs, "%s", fichier_app);
printf("\nThe file of the training set is: %s", fichier_app);
status = fscanf(fs, "%s", fichier_test);
printf("\n    The file of the test set is: %s\n", fichier_test);

status = fscanf(fs, "%s", fichier_alpha);
status = fscanf(fs, "%s", fichier_vect_theta);
status = fscanf(fs, "%s", fichier_dot_prod_aa);
status = fscanf(fs, "%s", fichier_b);
status = fscanf(fs, "%s", fichier_resultat);

fclose(fs);

}

void read_data()

{

long min_y, ind_min_y, max_y, ind_max_y, *cardinal_cat, cardinal_min, 
cardinal_max=0;

if((fs=fopen(fichier_app, "r"))==NULL)
  {
  printf("\nFile of data: %s cannot be open...\n", fichier_app);
  exit(0);
  }

status = fscanf(fs, "%ld", &nb_data_app);
status = fscanf(fs, "%ld", &dim_input);
status = fscanf(fs, "%ld", &Q);

cardinal_cat = (long *) calloc(Q+1, sizeof(long));

X_app = matrix_l(nb_data_app, dim_input);
y_app = (long *) calloc(nb_data_app+1, sizeof(long));

min_y = nb_data_app;
max_y = 0;

for(i=1; i<=nb_data_app; i++)
  {
  for(j=1; j<=dim_input; j++)
    {
    status = fscanf(fs, "%ld", &X_app[i][j]);
    if(X_app[i][j] == 0)
      X_app[i][j] = nb_symb;
    }
  status = fscanf(fs, "%ld", &y_app[i]);
  if(y_app[i] < min_y)
    {
    min_y = y_app[i];
    ind_min_y = i;
    }
  if(y_app[i] > max_y)
    {
    max_y = y_app[i];
    ind_max_y = i;
    }
  }

fclose(fs);

if((fs=fopen(fichier_test, "r"))==NULL)
  {
  printf("\nFile of data: %s cannot be open...\n", fichier_test);
  exit(0);
  }

status = fscanf(fs, "%ld", &nb_data_test);
status = fscanf(fs, "%ld", &dim_input);
status = fscanf(fs, "%ld", &Q);

X_test = matrix_l(nb_data_test, dim_input);

for(i=1; i<=nb_data_test; i++)
  for(j=1; j<=dim_input; j++)
    {
    status = fscanf(fs, "%ld", &X_test[i][j]);
    if(X_test[i][j] == 0)
      X_test[i][j] = nb_symb;
    }

fclose(fs);

/*
printf("\nExtreme indices of categories : %2ld -> %2ld\n", min_y, max_y);
*/

if(((min_y != 0) && (min_y != 1)) || (max_y - min_y != Q-1))
  {
  printf("\nWrong numbering of the categories\n");
  exit(0);
  }

if(min_y == 0)
  for(i=1; i<=nb_data_app; i++)
    y_app[i]++;

for(k=1; k<=Q; k++)
  cardinal_cat[k] = 0;

for(i=1; i<=nb_data_app; i++)
  cardinal_cat[y_app[i]]++;

cardinal_min = nb_data_app;

for(k=1; k<=Q; k++)
  {
  if(cardinal_cat[k] > cardinal_max)
    {
    cardinal_max = cardinal_cat[k];
    largest_cat = k;
    }
  if(cardinal_cat[k] < cardinal_min)
    {
    cardinal_min = cardinal_cat[k];
    smallest_cat = k;
    }
  }
  
/*
for(k=1; k<=Q; k++)
  printf("\nCardinal of category %2ld: %5ld", k, cardinal_cat[k]);

printf("\nIndex of the smallest category: %2ld", smallest_cat);
printf("\nIndex of the largest category: %2ld", largest_cat);
Pause("");
*/

if(cardinal_min == 0)
  {
  printf("\nNo training example for category %2ld\n", smallest_cat);
  exit(0);
  }

}

void alloc_memory()

{

alpha = matrix(nb_data_app, Q);
vect_theta = (double *) calloc(dim_input+1, sizeof(double));
b_SVM = (double *) calloc(Q+1, sizeof(double));

}

void read_alpha()

{

max_alpha = 0.0;

if((fs=fopen(fichier_alpha, "r"))==NULL)
  {
  printf("\nFile of dual variables: %s cannot be open...\n", 
  fichier_alpha);
  exit(0);
  }

alpha = matrix(nb_data_app, Q);

for(i=1; i<=nb_data_app; i++)
  for(k=1; k<=Q; k++)
    {
    status = fscanf(fs, "%lf", &alpha[i][k]);
    if((alpha[i][k] != 0.0) && (k == y_app[i]))
      {
      printf("\nEx. %ld: pb. with the dummy variable...\n\n", i);
      exit(0);
      }
    if(alpha[i][k] > max_alpha)
      max_alpha = alpha[i][k];
    }

fclose(fs);

if(max_alpha < C)
  printf("\nMaximum value of a dual variable, %e, inferior to C (= %e) \n", max_alpha, C);

}

void check_feasible_sol()

{

double *constraints, norm;

for(i=1; i<=nb_data_app; i++)
  for(k=1; k<=Q; k++)
    if((alpha[i][k] < 0.0) || (alpha[i][k] > C))
       {
       printf("\nNo feasible solution: alpha[%ld][%ld] = %lf\n\n",
              i, k, alpha[i][k]);
       exit(0);
       }
     
constraints = calloc(Q+1, sizeof(double));

for(k=1; k<=Q; k++)
  constraints[k] = 0.0;

for(i=1; i<=nb_data_app; i++)
  for(k=1; k<=Q; k++)
     {
     if(y_app[i] == k)
       for(l=1; l<=Q; l++)
         constraints[k] -= alpha[i][l];
     else
       constraints[k] += alpha[i][k];  
     }

printf("\n\nSatisfaction of the equality constraints:\n\n");

for(k=1; k<=Q; k++)
  printf("%11.8f\n", constraints[k]);

norm = 0.0;

for(k=1; k<=Q; k++)
  norm += constraints[k] * constraints[k];

norm = sqrt(norm);

if(norm >= very_small)
  {
  printf("\nLarge deviation of the equality constraints...\n");
  exit(0);
  }
else
  {
  sprintf(commande, "cp %s Feasible/.", fichier_alpha);
  status = system(commande);
  }

}

void read_vect_theta()

{

if((fs=fopen(fichier_vect_theta, "r"))==NULL)
  {
  printf("\nFile of the positional weighting %s: cannot be open...\n",
  fichier_vect_theta);
  exit(0);
  }
else
  printf("\nThe file of the positional weighting is: %s", fichier_vect_theta);

for(i=1; i<=dim_input; i++)
  status = fscanf(fs, "%lf", &vect_theta[i]);

fclose(fs);

}

void read_dot_prod_aa()

{

if((fs=fopen(fichier_dot_prod_aa, "r"))==NULL)
  {
  printf("\nFile of the substitution matrix %s: cannot be open...\n",
  fichier_dot_prod_aa);
  exit(0);
  }
else
  printf("\n The file of the substitution matrix is: %s", fichier_dot_prod_aa);

status = fscanf(fs, "%ld", &nb_symb);
dot_prod_aa = matrix(nb_symb, nb_symb);

for(i=1; i<=nb_symb; i++)
  for(j=1; j<=nb_symb; j++)
    status = fscanf(fs, "%lf", &dot_prod_aa[i][j]);

fclose(fs);

}

double fonction(long categorie, long *vecteur)

{

long indice1, indice2;

double resultat = 0.0;

for(indice1=1; indice1<=nb_data_app; indice1++)
  {
  if(y_app[indice1] == categorie)
    {
    partiel = 0.0;
    for(indice2=1; indice2<=Q; indice2++)
      partiel += alpha[indice1][indice2];
    }
  else
    partiel = - alpha[indice1][categorie];

  if(partiel != 0.0)
    resultat += partiel * gaussian(X_app[indice1], vecteur, vect_theta, dot_prod_aa, dim_input);
  }

return resultat;

}

void compute_outputs(long **X, long nb_data)

{

double *output;

output = (double *) calloc(Q+1, sizeof(double));

if((fc=fopen(fichier_resultat, "w"))==NULL)
  {
  printf("\nFile of outputs: %s cannot be open...\n", fichier_resultat);
  exit(0);
  }

for(i=1; i<=nb_data; i++)
  {
  for(k=1; k<=Q; k++)
    {
    output[k] = fonction(k, X[i]) + b_SVM[k];
    fprintf(fc, "%12.6f%c", output[k], (k==Q) ? '\n' : ' ');
    }

  if((i % step) == 0)
    printf("\nExample: %5ld", i);
  }

fclose(fc);

printf("\n\n");

}
