1: use strict;
2:
3:
4: my @related=();
5: my @word=();
6: my @count=();
7: my %index=();
8: my $totalcount=0;
9: my $fuzzy=2;
10:
11: # --------------------- Read thesaurus
12:
13: open(IN,"thesaurus.dat");
14:
15: while (my $entry=<IN>) {
16: my ($tword,$tindex,$tcount,$trelated)=split(/\@/,$entry);
17: $index{$tword}=$tindex;
18: $word[$tindex]=$tword;
19: $count[$tindex]=$tcount;
20: $totalcount+=$tcount;
21: $related[$tindex]=$trelated;
22: }
23:
24: close(IN);
25:
26: my $avecount=$totalcount/$#count;
27:
28: print "Read thesaurus\n";
29: print "Average count: $avecount\n";
30:
31: # --------------------- Test it
32: print "Input word [,fuzzyness]: ";
33: while (my $input=<STDIN>) {
34: chomp($input);
35: my ($newword,$newfuzzy)=split(/\,/,$input);
36: if ($newfuzzy) { $fuzzy=$newfuzzy; }
37: my $threshold=0.1/$fuzzy;
38: $newword=~s/\W//g;
39: $newword=~tr/A-Z/a-z/;
40: print "\n\n\n--- $newword (Fuzzy: $fuzzy) ---\n";
41: my $tindex=$index{$newword};
42: if ($tindex) {
43: if ($count[$tindex]>$avecount) {
44: print "\nKEYWORD\n\n";
45: } else {
46: print "\nNot keyword\n\n";
47: }
48: my %found=();
49: print "Related:\n";
50: map {
51: # - Related word found
52: my ($ridx,$rcount)=split(/\:/,$_);
53: # - Direct relation index
54: my $directrel=$rcount/$count[$tindex];
55: if ($directrel>$threshold) {
56: map {
57: my ($rridx,$rrcount)=split(/\:/,$_);
58: if ($rridx==$tindex) {
59: # - Determine reverse relation index
60: my $revrel=$rrcount/$count[$ridx];
61: # - Calculate full index
62: $found{$ridx}=$directrel*$revrel;
63: if ($found{$ridx}>$threshold) {
64: map {
65: my ($rrridx,$rrrcount)=split(/\:/,$_);
66: unless ($found{$rrridx}) {
67: my $revrevrel=$rrrcount/$count[$ridx];
68: if (
69: $directrel*$revrel*$revrevrel>$threshold
70: ) {
71: $found{$rrridx}=
72: $directrel*$revrel*$revrevrel;
73: }
74: }
75: } split(/\,/,$related[$ridx]);
76: }
77: }
78: } split(/\,/,$related[$ridx]);
79: }
80: } split(/\,/,$related[$tindex]);
81: # - Print results
82: map {
83: if ($found{$_}>$threshold) {
84: print ' '.$word[$_].' '.$found{$_}."\n";
85: }
86: } sort { $found{$b}<=>$found{$a} } keys %found;
87: } else {
88: print "\nNot found\n\n";
89: }
90: print "\nInput word: ";
91: }
92:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>