|
|
A.4 Restore.Input
#!/usr/local/bin/perl
#
# CLASPnet
#
# English SNNS results parser for my M.Sc. Dissertation
# Copyright 1995-1996. All rights reserved. Ezra Van Everbroeck
#
# 960409 : minor output format change
# 960407 : update for semantic info
# 960322 : added 'Mis' and 'Spu'
# avoid insanely large percentages
# 960321 : added 17th unit
# only 60 input units now
# 960313 : added infinite vs finite unit
# 960301 : analyze 75x15 results
# added real ID checking
# 960227 : analyze 15x15 results (part 2)
# 960226 : analyze 15x15 results (part 1)
# 960215 : more work on memory management
# 960214 : added some memory usage control
# 960212 : added .err support
# 960204 : translation to Perl of old shell + AWK scripts
#
##
# Set the global stuff
##
#$len_input = 15; # number of input units per word
$len_output = 17; # number of output units per word
@tolerances = (0, 0.1, 0.2, 0.3, 0.4, 0.5);
%name = (
"1" => "Affirmative", "2" => "Imperative" , "3" => "Interrogative",
"4" => "ID1" , "5" => "ID2" , "6" => "ID3",
"7" => "Status" , "8" => "Declarative", "9" => "Order",
"10"=> "WH-Question", "11"=> "YN-Question", "12"=> "Rel-clause",
"13"=> "Connector" , "14"=> "That-clause", "15"=> "Infinity",
"16"=> "Voice" , "17"=> "Polarity"
);
#$snns_tools_dir = "/home/admin/ezra/zin/snns/tools/bin/pc_linux";
#$snns_xgui_dir = "/home/admin/ezra/zin/snns/xgui/bin/pc_linux";
##
# Main routine
##
&intro;
&get_results;
&get_voc;
&open_rrf;
&analyze_results;
&calc_errors;
&finale;
##
# Print intro message
##
sub intro {
print <<END
+-+-+ Starting CLASPnet's SNNS English Results Analyser +-+-+
+-+-+ Copyright 1995-1996. Ezra Van Everbroeck +-+-+
END
} # end &intro
##
# Get vocabulary file
##
sub get_voc {
my($word, $pattern);
print "Use which vocabulary file [${def_file}.voc]? ";
chop($voc_file = <STDIN>);
unless ($voc_file) { $voc_file = $def_file . ".voc" };
unless ($voc_file =~ /\w+.voc$/) { $voc_file .= ".voc" };
unless (-f ${voc_file}) {
die "!! No such vocabulary file: $voc_file! Aborting ...\n\n";
} else {
open(VOC_FILE,"./${voc_file}");
while (<VOC_FILE>) {
chop;
($word, $pattern) = split(/ -> /,$_);
$pattern =~ s/(\.\d)/0$1/g;
$convert{$pattern} = $word;
}
}
close(VOC_FILE);
# print %convert;
} # end &get_voc
##
# Get results file
##
sub get_results {
print "Analyse which results file? ";
chop($results_file = <STDIN>);
unless ($results_file =~ /\w+.res$/) { $results_file .= ".res" };
($def_file = $results_file) =~ s/\.res$//;
unless (-f ${results_file}) {
die "!! No such results file: $results_file! Aborting ...\n\n";
}
# end &get_results
##
# Analyze results file and change binary patterns to words
##
sub analyze_results {
open(RES_FILE,"./${results_file}");
$tmp_file = "./tmp.claspnet.gi.$$";
print "++ Writing RRF file: $rrf_file\n";
$/ = "#";
while (<RES_FILE>) {
s/#\n$//;
$i++;
if ($_ =~ /\S+/) {
if ($i == 1) { print RRF_FILE; }
else {
@p = split(/\n/,$_);
for (@p) { chomp; }
#75 $input = "$p[1] $p[2] $p[3] $p[4] $p[5] $p[6] $p[7] $p[8]";
#60 $input = "$p[1] $p[2] $p[3] $p[4] $p[5] $p[6]";
$input = "$p[1] $p[2] $p[3] $p[4] $p[5] $p[6] $p[7] $p[8] $p[9]";
$input = $convert{$input} or
die "!! The string \"$input\" is not present in the vocabulary!\n";
#75 $desired = $p[9] . " " . $p[10];
#75 $real = $p[11] . " " . $p[12];
#60 $desired = $p[7] . " " . $p[8];
#60 $real = $p[9] . " " . $p[10];
$desired = $p[10] . " " . $p[11];
$real = $p[12] . " " . $p[13];
$des = &form($desired);
$rel = &form($real);
print RRF_FILE "#$p[0]\t$input\n$des$rel";
}
}
}
close(RES_FILE);
close(TMP_FILE);
}
} # end &analyze_results
##
# Restructure long lines
##
sub form {
my($in) = $_[0];
@o = split(' ',$in);
$o1 =
sprintf("%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s%-8s\n",
$o[0], $o[1], $o[2], $o[3], $o[4], $o[5], $o[6], $o[7], $o[8], $o[9],
$o[10], $o[11], $o[12], $o[13], $o[14], $o[15], $o[16]);
return $o1;
} # end &form
##
# Get name for RRF file and print header
##
sub open_rrf {
$time = localtime;
print "Name for RRF file (will overwrite) [${def_file}.rrf]? ";
chop($rrf_file = <STDIN>);
unless ($rrf_file) { $rrf_file = $def_file . ".rrf" };
unless ($rrf_file =~ /\w+.rrf$/) { $rrf_file .= ".rrf" };
open(RRF_FILE,">./${rrf_file}");
print RRF_FILE "
+-+-+ Starting CLASPnet's SNNS English Results Analyser +-+-+
+-+-+ Copyright 1995-1996. Ezra Van Everbroeck +-+-+
Created with $0 on $time
Results file is $results_file
It used vocabulary file $voc_file
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
";
} # end &open_rrf
##
# Calculate errors percentage for varying tolerance
##
sub calc_errors {
my($do,$a,$do_output,$do_correct,$i);
$nul = 0;
$do = 0;
$do_output = 0; $do_correct = 0;
%perc_ok = (); %perc_nok = (); %error_on = (); %error_off = ();
$tolerance = 0; $tolerance_neg = 0;
# input separator back to normal to avoid surprises
$/ = "\n";
# get name for the file
print "Name for error summary file (will overwrite) [${def_file}.err]? ";
chop($err_file = <STDIN>);
unless ($err_file) { $err_file = $def_file . ".err" };
unless ($err_file =~ /\w+.err$/) { $err_file .= ".err" };
print "++ Writing error summary file: $err_file\n";
open(RRF_FILE,"${rrf_file}");
while (<RRF_FILE>) {
chomp;
$a = 0;
if ($do_output == 1) {
@output = split(' ');
#print "@output \n";
foreach(@tolerances) {
$tolerance = $_;
if ($_ != 0) {
$tolerance_neg = ($_ * -1);
} else {
$tolerance_neg = 0;
}
for ( $i=0 ; $i<$len_output ; $i++) {
if (($correct[$i] - $output[$i]) > $tolerance) {
++$error_on{"field",($i+1),"_",$tolerance};
}
if (($correct[$i] - $output[$i]) < $tolerance_neg) {
++$error_off{"field",($i+1),"_",$tolerance_neg};
}
}
}
# combine ID tags
&check_id;
$do_output = 0;
@output = (); @correct = ();
}
if ($do_correct == 1) {
@correct = split(' ');
#print "@correct \n";
foreach(@correct) {
++$a;
if ($_ == 1) {
++$output{"field",$a};
#print %output;
#print "\n";
}
}
$do_output = 1;
$do_correct = 0;
}
if ($_ =~ /#/) { $do_correct = 1; ++$num_pat; };
}
close(RRF_FILE);
# find percentages
foreach(@tolerances) {
$tolerance = $_;
if ($_ != 0) {
$tolerance_neg = ($_ * -1);
} else {
$tolerance_neg = 0;
}
for ( $i=0 ; $i<$len_output ; $i++ ) {
$perc_ok{"field",($i+1),"_",$tolerance} =
int(100 * (($num_pat -
($error_on{"field",($i+1),"_",$tolerance} +
$error_off{"field",($i+1),"_",$tolerance_neg})
) / $num_pat ));
# avoid divide by zero error
if (($output{"field",($i+1)} * 100) == 0) {
$output{"field",($i+1)} = 0.0000001;
}
$perc_nok{"field",($i+1),"_",$tolerance} =
int(100 * ($error_on{"field",($i+1),"_",$tolerance} +
$error_off{"field",($i+1),"_",$tolerance_neg})
/ ($output{"field",($i+1)}));
# avoid insanely large numbers
if ($perc_nok{"field",($i+1),"_",$tolerance} > 10000) {
$perc_nok{"field",($i+1),"_",$tolerance} = 100;
}
$perc_on{"field",($i+1),"_",$tolerance} =
int(100 * ($error_on{"field",($i+1),"_",$tolerance} /
$output{"field",($i+1)}));
$perc_off{"field",($i+1),"_",$tolerance_neg} =
int(100 * ($error_off{"field",($i+1),"_",$tolerance_neg} /
($num_pat - $output{"field",($i+1)})));
# restore zero
if ($output{"field",($i+1)} < 1) {
$output{"field",($i+1)} = 0;
}
}
}
# find average percentages for clause types
foreach(@tolerances) {
$tolerance = $_;
$avg = 0;
for ( $i=7 ; $i<($len_output -3) ; $i++) {
$avg += $perc_ok{"field",($i+1),"_",$tolerance};
}
$perc_ok_avg{"avg",$tolerance} =
int($avg / 7);
}
} # end &calc_errors
##
# Check ID tags: combine all three before judging
##
sub check_id {
my($i,$tol,$diff1,$diff2,$diff3);
$tol = .25;
$full_id = "$correct[3] $correct[4] $correct[5]";
++$ids{"$full_id"};
$diff1 = ($correct[3] - $output[3]);
$diff2 = ($correct[4] - $output[4]);
$diff3 = ($correct[5] - $output[5]);
if ($diff1 < 0) { $diff1 = $diff1 * -1; }
if ($diff2 < 0) { $diff2 = $diff2 * -1; }
if ($diff3 < 0) { $diff3 = $diff3 * -1; }
if ($diff1 < $tol && $diff2 < $tol && $diff3 < $tol) {
++$ids_gud{"$full_id"};
} else {
++$ids_bad{"$full_id"};
}
} # end &check_id
##
# Finale : write error summary file
##
sub finale {
$time = localtime;
open(ERR_FILE,">./${err_file}");
select(ERR_FILE);
print "Created with $0 on $time
Results file is $results_file and RRF file is $rrf_file
It used vocabulary file $voc_file
";
print "
****************************************************************
T: 0.0 0.1 0.2 0.3 0.4 0.5
****************************************************************";
for ( $i=0 ; $i<$len_output ; $i++) {
print "\n*** $name{$i+1} ($output{\"field\",$i+1}):";
print "\nOn: ";
foreach(@tolerances) {
$tolerance = $_;
if ($error_on{"field",($i+1),"_",$tolerance}) {
printf("%10s",$error_on{"field",($i+1),"_",$tolerance});
} else {
printf("%10s",$nul);
}
}
print "\nMis:";
foreach(@tolerances) {
$tolerance = $_;
if ($perc_on{"field",($i+1),"_",$tolerance}) {
printf("%9s%%",$perc_on{"field",($i+1),"_",$tolerance});
} else {
printf("%9s%%",$nul);
}
}
print "\nOff:";
foreach(@tolerances) {
$tolerance = $_;
if ($_ != 0) {
$tolerance_neg = ($_ * -1);
} else {
$tolerance_neg = 0;
}
if ($error_off{"field",($i+1),"_",$tolerance_neg}) {
printf("%10s",$error_off{"field",($i+1),"_",$tolerance_neg});
} else {
printf("%10s",$nul);
}
}
print "\nSpu:";
foreach(@tolerances) {
$tolerance = $_;
if ($_ != 0) {
$tolerance_neg = ($_ * -1);
} else {
$tolerance_neg = 0;
}
if ($perc_off{"field",($i+1),"_",$tolerance_neg}) {
printf("%9s%%",$perc_off{"field",($i+1),"_",$tolerance_neg});
} else {
printf("%9s%%",$nul);
}
}
print "\nBad:";
foreach(@tolerances) {
$tolerance = $_;
printf("%9.f%%",$perc_nok{"field",($i+1),"_",$tolerance});
}
print "\nGud:";
foreach(@tolerances) {
$tolerance = $_;
printf("%9.f%%",$perc_ok{"field",($i+1),"_",$tolerance});
}
print "\n";
}
print "\nCST:";
foreach(@tolerances) {
$tolerance = $_;
printf("%10.f",$perc_ok_avg{"avg",$tolerance});
}
print "
****************************************************************
Number of patterns: $num_pat
****************************************************************
";
%dec2bin = ( "0" => "0 0 0", "1" => "1 0 0", "2" => "0 1 0",
"3" => "1 1 0", "4" => "0 0 1", "5" => "1 0 1",
"6" => "0 1 1", "7" => "1 1 1");
%bin2dec = ( "0 0 0" => "0", "1 0 0" => "1", "0 1 0" => "2",
"1 1 0" => "3", "0 0 1" => "4", "1 0 1" => "5",
"0 1 1" => "6", "1 1 1" => "7");
for (keys %ids) { push(@ids,$bin2dec{$_}) }
for (sort @ids) {
$bin = $dec2bin{$_};
$perc_gud = int(100 * ($ids_gud{"$bin"} / $ids{"$bin"}));
$perc_bad = 100 - $perc_gud;
$id_out = sprintf("Level %s (%5.f): Gud: %3.f%% (%5.f) - Bad: %3.f%% (%5.f)",
$_, $ids{$bin}, ${perc_gud}, $ids_gud{$bin}, ${perc_bad}, $ids_bad{$bin});
print "$id_out \n";
}
close(ERR_FILE);
print STDOUT "++ Done.\n\n";
} # end &finale