#!/usr/local/bin/perl
use strict;
use Cwd;

#-------------------------------------------------------------------------------
# Synopsis:
#    epochs.pl [ < -v > ] [-s seed ] -e nmb_epochs
#	[ <perc> | -u <perc/class> | -n <nb/class> | -nk <nb/cl> | -nd <nb/cl> ]
#	<file> <directory>
#
# Description:
#	create several epochs for training by calling shuffle
#	then incrementally train with these epochs by calling lcs
#	
# Example:
#	epochs.pl -e 8 80 doc-list ohra
#-------------------------------------------------------------------------------

# Check arguments

my $verbose ;
my $args="";
my $epochs=1;
my $seed;
my $hasseed=0;

if ($ARGV[0] eq "-v")
{ $verbose = "-v";
  shift;
}
if ($ARGV[0] eq "-vv")
{ $verbose = "-vv";
  $args="$args -v";
  shift;
}
if ($ARGV[0] eq "-s")
{ shift;
  $hasseed=1;
  $seed=shift;
  $args="$args -s $seed";
}
if ($ARGV[0] eq "-e")
{ shift;
  $epochs=$ARGV[0];
  $args="$args -e $epochs";
  shift;
}

if (@ARGV != 3)
{ print STDERR "Usage: epochsMplus.pl [ -v ] [-s seed ] -e epoch_number\n";
  print STDERR "	<perc> <file> <directory>\n";
  exit 1;
}

my $percentage = $ARGV[0];
my $file = $ARGV[1];
my $directory = $ARGV[2];
$args="$args $percentage $file $directory";
if (($percentage == 0) && ($percentage ne "0"))
  { die "error: not a number `$percentage'\n"; }
check_file_exists($file);
check_dir_exists($directory);
if( $directory eq ".")
  { $directory=cwd(); }
my $dir=$directory;
$dir =~ s/.*\///;

my $time = time;

# MAKE TRAIN- AND TESTSET
esystem ("shuffle.pl $args\n");
esystem ("rm -f $directory/train");
open OUT,">$directory/learn.dat" 
			|| die "can't open $directory/learn.dat\n";


for(my $i=1;$i<=$epochs;$i++)
{ # TRAIN ONE EPOCH
  $time = time;
  esystem ("cat $directory/train$i >>$directory/train\n");
  esystem ("lcs  $verbose $directory\n");

  # SAVE THE DATA
  my $stats=qx?grep training lcs.log|grep $dir|sed -e "s/.*with //"|sed -e "s/ documents: /  /"|sed -e "s/activ.*//"|sed -e "s/   */  /"?;
  chomp $stats;
  my $res=qx?grep "^Total" lcs.log|sed -e "s/Total */ /"|sed -e "s/%//g"?;
  esystem("mv lcs.log lcs.log.$i");
  print (time - $time);
  print " $stats $res\n";
  print OUT (time - $time);
  print OUT " $stats $res";
}

close OUT;
esystem ("gnuplot <learn.plot");

sub check_file_exists
{ my($file) = @_;
  if (! -f $file)
    { die "error: file `$file' does not exist\n"; }
}

sub check_dir_exists
{ my($dir) = @_;
  if (! -d $dir)
  { die "error: directory `$dir' does not exist\n"; }
}

sub esystem
{ my($cmd) = @_;
#  if ($verbose = "-vv")
#    { print STDERR "$cmd\n"; }
  system $cmd || die "error: cannot execute `$cmd'\n";
}
