1: use strict;
2:
3: # ---------------------- Read word file
4:
5: open(IN,"rawkey.txt");
6: print "Opened file\n";
7: my $readdata='';
8: my %wordcount=();
9: my %wordindex=();
10: my %related=();
11: my $line;
12: while ($line=<IN>) {
13: $readdata.=$line;
14: }
15: close(IN);
16: undef $line;
17:
18: # ---------------------- Get all words and index them
19:
20: my $index=0;
21: map {
22: map {
23: my $word=$_;
24: if ($word) {
25: $word=~tr/A-Z/a-z/;
26: $wordcount{$word}++;
27: unless (defined $wordindex{$word}) {
28: $index++;
29: $wordindex{$word}=$index;
30: }
31: }
32: } split(/\W+/,$_);
33: } split(/(\n|\r)+/,$readdata);
34:
35: print "Built word index\n";
36:
37: # ---------------------- Find related words
38:
39: map {
40: my $line=$_;
41: if ($line) {
42: my @words=split(/\W+/,$line);
43: map {
44: my $word=$_;
45: if ($word) {
46: $word=~tr/A-Z/a-z/;
47: my $twordidx=$wordindex{$word};
48: if ($twordidx) {
49: my %alreadyrelated=();
50: if (defined $related{$twordidx}) {
51: map {
52: my ($idx,$count)=split(/\:/,$_);
53: $alreadyrelated{$idx}=$count;
54: } split(/\,/,$related{$twordidx});
55: }
56: map {
57: my $rword=$_;
58: $rword=~tr/A-Z/a-z/;
59: if (($rword) && ($rword ne $word)) {
60: my $rwordidx=$wordindex{$rword};
61: if (defined $alreadyrelated{$rwordidx}) {
62: $alreadyrelated{$rwordidx}++;
63: } else {
64: $alreadyrelated{$rwordidx}=1;
65: }
66: }
67: } @words;
68: $related{$twordidx}='';
69: map {
70: $related{$twordidx}.=$_.':'.$alreadyrelated{$_}.',';
71: } keys %alreadyrelated;
72: chop $related{$twordidx};
73: } else {
74: print "Warning! Unknown word: ".$word;
75: }
76: }
77: } @words;
78: }
79: } split(/(\n|\r)+/,$readdata);
80:
81: print "Built hash of related words\n";
82:
83: # ---------------------- Output
84:
85: open(OUT,">thesaurus.dat");
86: map {
87: my $wordidx=$wordindex{$_};
88: print OUT $_.'@'.$wordidx.'@'.$wordcount{$_}.'@'.$related{$wordidx}.
89: "\n";
90: } sort keys %wordindex;
91: close(OUT);
92:
93: print "Wrote thesaurus file\n";
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>