/* Copyright 2025-2025 Yann Guermeur                                        */

/* This program is free software; you can redistribute it and/or modify     */
/* it under the terms of the GNU General Public License as published by     */
/* the Free Software Foundation; either version 2 of the License, or        */
/* (at your option) any later version.                                      */

/* This program is distributed in the hope that it will be useful,          */
/* but WITHOUT ANY WARRANTY; without even the implied warranty of           */
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            */
/* GNU General Public License for more details.                             */

/* You should have received a copy of the GNU General Public License        */
/* along with this program; if not, write to the Free Software              */
/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/

/*--------------------------------------------------------------------------*/
/*  Name           : process_data.c                                         */
/*  Version        : 1.0                                                    */
/*  Creation       : 08/08/25                                               */
/*  Last update    : 08/08/25                                               */
/*  Subject        : Implementation of the M-SVM of Weston and Watkins      */
/*  Module         : Data pre-processing                                    */
/*  Author         : Yann Guermeur (Yann.Guermeur@cnrs.fr)                  */
/*--------------------------------------------------------------------------*/


#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <math.h>
#include <ctype.h>
#include "algebre.h"
#include "biblio.h"

#define true 1
#define false 0
#define min(a,b) ((a)<=(b)?(a):(b))
#define max(a,b) ((a)<=(b)?(b):(a))

#define taille 81
#define step 100

FILE *fs, *fc;

int status;

long **X, *y, smallest_cat=0, largest_cat=0, nb_symb,
Q=0, i, j, k, l, dim_input, nb_data=0;

char fichier_data[taille], fichier_data_SVM[taille], commande[taille+20],
fichier_fichcom[taille];

/* Functions included in this program */

void caract_db();
void read_data();
void write_data();

int main(int argc, char *argv[])

{

strcpy(fichier_fichcom, argv[1]);

status = system("clear");
caract_db();
read_data();
write_data();

}

void caract_db()

{

if((fs=fopen(fichier_fichcom, "r"))==NULL)
  {
  printf("\nFile of parameters: %s cannot be open...\n", fichier_fichcom);
  exit(0);
  }

status = fscanf(fs, "%s", fichier_data);
printf("\n    The file of the data is: %s", fichier_data);
status = fscanf(fs, "%s", fichier_data_SVM);
printf("\nThe new file of the data is: %s\n", fichier_data_SVM);

fclose(fs);

}

void read_data()

{

long min_y, max_y, ind_max_y, *cardinal_cat, cardinal_min, cardinal_max=0;

if((fs=fopen(fichier_data, "r"))==NULL)
  {
  printf("\nFile of data: %s cannot be open...\n", fichier_data);
  exit(0);
  }

status = fscanf(fs, "%ld", &nb_data);
status = fscanf(fs, "%ld", &dim_input);
status = fscanf(fs, "%ld", &Q);

cardinal_cat = (long *) calloc(Q+1, sizeof(long));

X = matrix_l(nb_data, dim_input);
y = (long *) calloc(nb_data+1, sizeof(long));

cardinal_min=nb_data;
min_y = nb_data;
max_y = 0;

for(i=1; i<=nb_data; i++)
  {
  for(j=1; j<=dim_input; j++)
    {
    status = fscanf(fs, "%ld", &X[i][j]);
/*    X[i][j]++; */
    }
  status = fscanf(fs, "%ld", &y[i]);
  if(y[i] < min_y)
    min_y = y[i];

  if(y[i] > max_y)
    max_y = y[i];
  }

fclose(fs);

/*
printf("\nExtreme indices of categories : %2ld -> %2ld\n", min_y, max_y);
*/

if(((min_y != 0) && (min_y != 1)) || (max_y - min_y != Q-1))
  {
  printf("\nWrong numbering of the categories\n");
  exit(0);
  }

if(min_y == 0)
  for(i=1; i<=nb_data; i++)
    y[i]++;

for(k=1; k<=Q; k++)
  cardinal_cat[k] = 0;

for(i=1; i<=nb_data; i++)
  cardinal_cat[y[i]]++;

for(k=1; k<=Q; k++)
  {
  if(cardinal_cat[k] > cardinal_max)
    {
    cardinal_max = cardinal_cat[k];
    largest_cat = k;
    }
  if(cardinal_cat[k] < cardinal_min)
    {
    cardinal_min = cardinal_cat[k];
    smallest_cat = k;
    }
  }
  
for(k=1; k<=Q; k++)
  printf("\nCardinal of category %2ld: %5ld", k, cardinal_cat[k]);

printf("\n\nIndex of the smallest category: %2ld", smallest_cat);
printf("\n Index of the largest category: %2ld\n", largest_cat);
/* Pause(""); */

if(cardinal_min == 0)
  {
  printf("\nNo example for category %2ld\n", smallest_cat);
  exit(0);
  }

}

void write_data()

{

if((fc=fopen(fichier_data_SVM, "w"))==NULL)
  {
  printf("\nFile of outputs: %s cannot be open...\n", fichier_data_SVM);
  exit(0);
  }

fprintf(fc, "%ld\n", nb_data);
fprintf(fc, "%ld\n", dim_input);
fprintf(fc, "%ld\n", Q);

for(i=1; i<=nb_data; i++)
  {
  for(j=1; j<=dim_input; j++)
/* fprintf(fc, "%2ld ", X[i][j]); */
    fprintf(fc, "%2ld%c", X[i][j], (j==dim_input) ? '\n' : ' ');

/*  fprintf(fc, "%2ld\n", y[i]); */

  if((i % step) == 0)
    printf("\nExample: %5ld", i);
  }

fclose(fc);

printf("\n\n");

}
