Annotation of loncom/html/htmlarea/plugins/SpellChecker/spell-check-logic.cgi, revision 1.2
1.1 www 1: #! /usr/bin/perl -w
2:
3: # Spell Checker Plugin for HTMLArea-3.0
1.2 ! www 4: # Sponsored by www.americanbible.org
! 5: # Implementation by Mihai Bazon, http://dynarch.com/mishoo/
1.1 www 6: #
1.2 ! www 7: # (c) dynarch.com 2003.
! 8: # Distributed under the same terms as HTMLArea itself.
1.1 www 9: # This notice MUST stay intact for use (see license.txt).
10: #
1.2 ! www 11: # $Id: spell-check-logic.cgi,v 1.10 2004/01/31 13:47:05 mishoo Exp $
1.1 www 12:
13: use strict;
14: use utf8;
15: use Encode;
16: use Text::Aspell;
1.2 ! www 17: use XML::DOM;
1.1 www 18: use CGI;
19:
1.2 ! www 20: my $TIMER_start = undef;
! 21: eval {
! 22: use Time::HiRes qw( gettimeofday tv_interval );
! 23: $TIMER_start = [gettimeofday()];
! 24: };
! 25: # use POSIX qw( locale_h );
1.1 www 26:
1.2 ! www 27: binmode STDIN, ':utf8';
! 28: binmode STDOUT, ':utf8';
1.1 www 29:
1.2 ! www 30: my $debug = 0;
1.1 www 31:
32: my $speller = new Text::Aspell;
33: my $cgi = new CGI;
34:
1.2 ! www 35: my $total_words = 0;
! 36: my $total_mispelled = 0;
! 37: my $total_suggestions = 0;
! 38: my $total_words_suggested = 0;
! 39:
1.1 www 40: # FIXME: report a nice error...
41: die "Can't create speller!" unless $speller;
42:
1.2 ! www 43: my $dict = $cgi->param('dictionary') || $cgi->cookie('dictionary') || 'en';
! 44:
1.1 www 45: # add configurable option for this
46: $speller->set_option('lang', $dict);
1.2 ! www 47: $speller->set_option('encoding', 'UTF-8');
! 48: #setlocale(LC_CTYPE, $dict);
1.1 www 49:
50: # ultra, fast, normal, bad-spellers
51: # bad-spellers seems to cause segmentation fault
1.2 ! www 52: $speller->set_option('sug-mode', 'normal');
1.1 www 53:
1.2 ! www 54: my %suggested_words = ();
! 55: keys %suggested_words = 128;
1.1 www 56:
1.2 ! www 57: my $file_content = decode('UTF-8', $cgi->param('content'));
! 58: $file_content = parse_with_dom($file_content);
1.1 www 59:
1.2 ! www 60: my $ck_dictionary = $cgi->cookie(-name => 'dictionary',
! 61: -value => $dict,
! 62: -expires => '+30d');
! 63:
! 64: print $cgi->header(-type => 'text/html; charset: utf-8',
! 65: -cookie => $ck_dictionary);
! 66:
! 67: my $js_suggested_words = make_js_hash(\%suggested_words);
! 68: my $js_spellcheck_info = make_js_hash_from_array
! 69: ([
! 70: [ 'Total words' , $total_words ],
! 71: [ 'Mispelled words' , $total_mispelled . ' in dictionary \"'.$dict.'\"' ],
! 72: [ 'Total suggestions' , $total_suggestions ],
! 73: [ 'Total words suggested' , $total_words_suggested ],
! 74: [ 'Spell-checked in' , defined $TIMER_start ? (tv_interval($TIMER_start) . ' seconds') : 'n/a' ]
! 75: ]);
1.1 www 76:
1.2 ! www 77: print qq^<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
1.1 www 78: <html>
79: <head>
80: <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
81: <link rel="stylesheet" type="text/css" media="all" href="spell-check-style.css" />
1.2 ! www 82: <script type="text/javascript">
! 83: var suggested_words = { $js_suggested_words };
! 84: var spellcheck_info = { $js_spellcheck_info }; </script>
1.1 www 85: </head>
86: <body onload="window.parent.finishedSpellChecking();">^;
87:
88: print $file_content;
89: if ($cgi->param('init') eq '1') {
90: my @dicts = $speller->dictionary_info();
91: my $dictionaries = '';
92: foreach my $i (@dicts) {
1.2 ! www 93: next if $i->{jargon};
! 94: my $name = $i->{name};
! 95: if ($name eq $dict) {
! 96: $name = '@'.$name;
! 97: }
! 98: $dictionaries .= ',' . $name;
1.1 www 99: }
100: $dictionaries =~ s/^,//;
1.2 ! www 101: print qq^<div id="HA-spellcheck-dictionaries">$dictionaries</div>^;
1.1 www 102: }
103:
104: print '</body></html>';
105:
106: # Perl is beautiful.
107: sub spellcheck {
1.2 ! www 108: my $node = shift;
! 109: my $doc = $node->getOwnerDocument;
! 110: my $check = sub { # called for each word in the text
1.1 www 111: # input is in UTF-8
1.2 ! www 112: my $word = shift;
! 113: my $already_suggested = defined $suggested_words{$word};
! 114: ++$total_words;
! 115: if (!$already_suggested && $speller->check($word)) {
! 116: return undef;
1.1 www 117: } else {
118: # we should have suggestions; give them back to browser in UTF-8
1.2 ! www 119: ++$total_mispelled;
! 120: if (!$already_suggested) {
! 121: # compute suggestions for this word
! 122: my @suggestions = $speller->suggest($word);
! 123: my $suggestions = decode($speller->get_option('encoding'), join(',', @suggestions));
! 124: $suggested_words{$word} = $suggestions;
! 125: ++$total_suggestions;
! 126: $total_words_suggested += scalar @suggestions;
! 127: }
! 128: # HA-spellcheck-error
! 129: my $err = $doc->createElement('span');
! 130: $err->setAttribute('class', 'HA-spellcheck-error');
! 131: my $tmp = $doc->createTextNode;
! 132: $tmp->setNodeValue($word);
! 133: $err->appendChild($tmp);
! 134: return $err;
! 135: }
! 136: };
! 137: while ($node->getNodeValue =~ /([\p{IsWord}']+)/) {
! 138: my $word = $1;
! 139: my $before = $`;
! 140: my $after = $';
! 141: my $df = &$check($word);
! 142: if (!$df) {
! 143: $before .= $word;
! 144: }
! 145: {
! 146: my $parent = $node->getParentNode;
! 147: my $n1 = $doc->createTextNode;
! 148: $n1->setNodeValue($before);
! 149: $parent->insertBefore($n1, $node);
! 150: $parent->insertBefore($df, $node) if $df;
! 151: $node->setNodeValue($after);
! 152: }
! 153: }
! 154: };
! 155:
! 156: sub check_inner_text {
! 157: my $node = shift;
! 158: my $text = '';
! 159: for (my $i = $node->getFirstChild; defined $i; $i = $i->getNextSibling) {
! 160: if ($i->getNodeType == TEXT_NODE) {
! 161: spellcheck($i);
! 162: }
! 163: }
! 164: };
! 165:
! 166: sub parse_with_dom {
! 167: my ($text) = @_;
! 168: $text = '<spellchecker>'.$text.'</spellchecker>';
! 169:
! 170: my $parser = new XML::DOM::Parser;
! 171: if ($debug) {
! 172: open(FOO, '>:utf8', '/tmp/foo');
! 173: print FOO $text;
! 174: close FOO;
! 175: }
! 176: my $doc = $parser->parse($text);
! 177: my $nodes = $doc->getElementsByTagName('*');
! 178: my $n = $nodes->getLength;
! 179:
! 180: for (my $i = 0; $i < $n; ++$i) {
! 181: my $node = $nodes->item($i);
! 182: if ($node->getNodeType == ELEMENT_NODE) {
! 183: check_inner_text($node);
1.1 www 184: }
185: }
186:
1.2 ! www 187: my $ret = $doc->toString;
! 188: $ret =~ s{<spellchecker>(.*)</spellchecker>}{$1}sg;
! 189: return $ret;
! 190: };
! 191:
! 192: sub make_js_hash {
! 193: my ($hash) = @_;
! 194: my $js_hash = '';
! 195: while (my ($key, $val) = each %$hash) {
! 196: $js_hash .= ',' if $js_hash;
! 197: $js_hash .= '"'.$key.'":"'.$val.'"';
! 198: }
! 199: return $js_hash;
! 200: };
! 201:
! 202: sub make_js_hash_from_array {
! 203: my ($array) = @_;
! 204: my $js_hash = '';
! 205: foreach my $i (@$array) {
! 206: $js_hash .= ',' if $js_hash;
! 207: $js_hash .= '"'.$i->[0].'":"'.$i->[1].'"';
! 208: }
! 209: return $js_hash;
! 210: };
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>