#!/bin/sh
#
# The purpose of this Linux shell script is to run aleph with 10 fold cross validation
# Author: Jose Santos <jcas81@gmail.com> 19 June 2008
#
# It assumes the following files are in the root directory:
#   aleph.pl
#
#  It is also required for the problems to solve to be in the directories in the root
#  The structure of each of these directories is:
#    problem_dir
#        background prolog files (namely a modes.b must exist)
#        a sub-directory folds with files train#.? where # varies from 1..10 and # is f for
#        positive examples and n for negative examples
#
#  10 fold cross validated test accuracies and times are printed to stdout. Time to build the model with all the data is also printed
#
#  Usage example: run_aleph datasets/classical/carcinogenesis log_carcino.txt
#

if test -z $1
then
  echo First argument not defined. Should be directory name.
  exit
fi
if test -z $2
then
  echo Second argument not defined. Should be log file name.
  exit
fi
LOG_FILE=$2
D=\'$1/folds/
YAP=/homes/jcs06/bin/yap6/bin/yap
echo > $LOG_FILE
for i in 1 2 3 4 5 6 7 8 9 10
do
  echo Fold $i
  T=$D\train$i\'  # training data without the extension
  B=read_background\(\'$1\/aleph\'\),read_examples\($T,$T\),
  C=set\(test_pos,$D\test$i.f\'\),set\(test_neg,$D\test$i.n\'\),run.
  echo $A$B$C| $YAP -q -l Aleph/problem.pl >> $LOG_FILE
done
echo Using all data for training
V=read_background\(\'$1\/aleph\'\),read_examples\($D\train_all\',$D\train_all\'\),run.
echo $A$V| $YAP -q -l Aleph/problem.pl >> $LOG_FILE
grep "Accuracy" $LOG_FILE | sed -e'1~2d' | gawk -F= '{ print $2 }'
grep "cputime" $LOG_FILE | gawk -F: '{ print $2 }'
