Annotation of loncom/publisher/testthesaurus.pl, revision 1.1
1.1 ! www 1: use strict;
! 2:
! 3:
! 4: my @related=();
! 5: my @word=();
! 6: my @count=();
! 7: my %index=();
! 8: my $totalcount=0;
! 9: my $fuzzy=2;
! 10:
! 11: # --------------------- Read thesaurus
! 12:
! 13: open(IN,"thesaurus.dat");
! 14:
! 15: while (my $entry=<IN>) {
! 16: my ($tword,$tindex,$tcount,$trelated)=split(/\@/,$entry);
! 17: $index{$tword}=$tindex;
! 18: $word[$tindex]=$tword;
! 19: $count[$tindex]=$tcount;
! 20: $totalcount+=$tcount;
! 21: $related[$tindex]=$trelated;
! 22: }
! 23:
! 24: close(IN);
! 25:
! 26: my $avecount=$totalcount/$#count;
! 27:
! 28: print "Read thesaurus\n";
! 29: print "Average count: $avecount\n";
! 30:
! 31: # --------------------- Test it
! 32: print "Input word [,fuzzyness]: ";
! 33: while (my $input=<STDIN>) {
! 34: chomp($input);
! 35: my ($newword,$newfuzzy)=split(/\,/,$input);
! 36: if ($newfuzzy) { $fuzzy=$newfuzzy; }
! 37: my $threshold=0.1/$fuzzy;
! 38: $newword=~s/\W//g;
! 39: $newword=~tr/A-Z/a-z/;
! 40: print "\n\n\n--- $newword (Fuzzy: $fuzzy) ---\n";
! 41: my $tindex=$index{$newword};
! 42: if ($tindex) {
! 43: if ($count[$tindex]>$avecount) {
! 44: print "\nKEYWORD\n\n";
! 45: } else {
! 46: print "\nNot keyword\n\n";
! 47: }
! 48: my %found=();
! 49: print "Related:\n";
! 50: map {
! 51: # - Related word found
! 52: my ($ridx,$rcount)=split(/\:/,$_);
! 53: # - Direct relation index
! 54: my $directrel=$rcount/$count[$tindex];
! 55: if ($directrel>$threshold) {
! 56: map {
! 57: my ($rridx,$rrcount)=split(/\:/,$_);
! 58: if ($rridx==$tindex) {
! 59: # - Determine reverse relation index
! 60: my $revrel=$rrcount/$count[$ridx];
! 61: # - Calculate full index
! 62: $found{$ridx}=$directrel*$revrel;
! 63: if ($found{$ridx}>$threshold) {
! 64: map {
! 65: my ($rrridx,$rrrcount)=split(/\:/,$_);
! 66: unless ($found{$rrridx}) {
! 67: my $revrevrel=$rrrcount/$count[$ridx];
! 68: if (
! 69: $directrel*$revrel*$revrevrel>$threshold
! 70: ) {
! 71: $found{$rrridx}=
! 72: $directrel*$revrel*$revrevrel;
! 73: }
! 74: }
! 75: } split(/\,/,$related[$ridx]);
! 76: }
! 77: }
! 78: } split(/\,/,$related[$ridx]);
! 79: }
! 80: } split(/\,/,$related[$tindex]);
! 81: # - Print results
! 82: map {
! 83: if ($found{$_}>$threshold) {
! 84: print ' '.$word[$_].' '.$found{$_}."\n";
! 85: }
! 86: } sort { $found{$b}<=>$found{$a} } keys %found;
! 87: } else {
! 88: print "\nNot found\n\n";
! 89: }
! 90: print "\nInput word: ";
! 91: }
! 92:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>