Annotation of loncom/html/htmlarea/plugins/SpellChecker/spell-check-logic.cgi, revision 1.2

1.1       www         1: #! /usr/bin/perl -w
                      2: 
                      3: # Spell Checker Plugin for HTMLArea-3.0
1.2     ! www         4: # Sponsored by www.americanbible.org
        !             5: # Implementation by Mihai Bazon, http://dynarch.com/mishoo/
1.1       www         6: #
1.2     ! www         7: # (c) dynarch.com 2003.
        !             8: # Distributed under the same terms as HTMLArea itself.
1.1       www         9: # This notice MUST stay intact for use (see license.txt).
                     10: #
1.2     ! www        11: # $Id: spell-check-logic.cgi,v 1.10 2004/01/31 13:47:05 mishoo Exp $
1.1       www        12: 
                     13: use strict;
                     14: use utf8;
                     15: use Encode;
                     16: use Text::Aspell;
1.2     ! www        17: use XML::DOM;
1.1       www        18: use CGI;
                     19: 
1.2     ! www        20: my $TIMER_start = undef;
        !            21: eval {
        !            22:     use Time::HiRes qw( gettimeofday tv_interval );
        !            23:     $TIMER_start = [gettimeofday()];
        !            24: };
        !            25: # use POSIX qw( locale_h );
1.1       www        26: 
1.2     ! www        27: binmode STDIN, ':utf8';
        !            28: binmode STDOUT, ':utf8';
1.1       www        29: 
1.2     ! www        30: my $debug = 0;
1.1       www        31: 
                     32: my $speller = new Text::Aspell;
                     33: my $cgi = new CGI;
                     34: 
1.2     ! www        35: my $total_words = 0;
        !            36: my $total_mispelled = 0;
        !            37: my $total_suggestions = 0;
        !            38: my $total_words_suggested = 0;
        !            39: 
1.1       www        40: # FIXME: report a nice error...
                     41: die "Can't create speller!" unless $speller;
                     42: 
1.2     ! www        43: my $dict = $cgi->param('dictionary') || $cgi->cookie('dictionary') || 'en';
        !            44: 
1.1       www        45: # add configurable option for this
                     46: $speller->set_option('lang', $dict);
1.2     ! www        47: $speller->set_option('encoding', 'UTF-8');
        !            48: #setlocale(LC_CTYPE, $dict);
1.1       www        49: 
                     50: # ultra, fast, normal, bad-spellers
                     51: # bad-spellers seems to cause segmentation fault
1.2     ! www        52: $speller->set_option('sug-mode', 'normal');
1.1       www        53: 
1.2     ! www        54: my %suggested_words = ();
        !            55: keys %suggested_words = 128;
1.1       www        56: 
1.2     ! www        57: my $file_content = decode('UTF-8', $cgi->param('content'));
        !            58: $file_content = parse_with_dom($file_content);
1.1       www        59: 
1.2     ! www        60: my $ck_dictionary = $cgi->cookie(-name     => 'dictionary',
        !            61:                                  -value    => $dict,
        !            62:                                  -expires  => '+30d');
        !            63: 
        !            64: print $cgi->header(-type    => 'text/html; charset: utf-8',
        !            65:                    -cookie  => $ck_dictionary);
        !            66: 
        !            67: my $js_suggested_words = make_js_hash(\%suggested_words);
        !            68: my $js_spellcheck_info = make_js_hash_from_array
        !            69:   ([
        !            70:     [ 'Total words'           , $total_words ],
        !            71:     [ 'Mispelled words'       , $total_mispelled . ' in dictionary \"'.$dict.'\"' ],
        !            72:     [ 'Total suggestions'     , $total_suggestions ],
        !            73:     [ 'Total words suggested' , $total_words_suggested ],
        !            74:     [ 'Spell-checked in'      , defined $TIMER_start ? (tv_interval($TIMER_start) . ' seconds') : 'n/a' ]
        !            75:    ]);
1.1       www        76: 
1.2     ! www        77: print qq^<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
1.1       www        78: <html>
                     79: <head>
                     80: <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
                     81: <link rel="stylesheet" type="text/css" media="all" href="spell-check-style.css" />
1.2     ! www        82: <script type="text/javascript">
        !            83:   var suggested_words = { $js_suggested_words };
        !            84:   var spellcheck_info = { $js_spellcheck_info }; </script>
1.1       www        85: </head>
                     86: <body onload="window.parent.finishedSpellChecking();">^;
                     87: 
                     88: print $file_content;
                     89: if ($cgi->param('init') eq '1') {
                     90:     my @dicts = $speller->dictionary_info();
                     91:     my $dictionaries = '';
                     92:     foreach my $i (@dicts) {
1.2     ! www        93:         next if $i->{jargon};
        !            94:         my $name = $i->{name};
        !            95:         if ($name eq $dict) {
        !            96:             $name = '@'.$name;
        !            97:         }
        !            98:         $dictionaries .= ',' . $name;
1.1       www        99:     }
                    100:     $dictionaries =~ s/^,//;
1.2     ! www       101:     print qq^<div id="HA-spellcheck-dictionaries">$dictionaries</div>^;
1.1       www       102: }
                    103: 
                    104: print '</body></html>';
                    105: 
                    106: # Perl is beautiful.
                    107: sub spellcheck {
1.2     ! www       108:     my $node = shift;
        !           109:     my $doc = $node->getOwnerDocument;
        !           110:     my $check = sub {                 # called for each word in the text
1.1       www       111:         # input is in UTF-8
1.2     ! www       112:         my $word = shift;
        !           113:         my $already_suggested = defined $suggested_words{$word};
        !           114:         ++$total_words;
        !           115:         if (!$already_suggested && $speller->check($word)) {
        !           116:             return undef;
1.1       www       117:         } else {
                    118:             # we should have suggestions; give them back to browser in UTF-8
1.2     ! www       119:             ++$total_mispelled;
        !           120:             if (!$already_suggested) {
        !           121:                 # compute suggestions for this word
        !           122:                 my @suggestions = $speller->suggest($word);
        !           123:                 my $suggestions = decode($speller->get_option('encoding'), join(',', @suggestions));
        !           124:                 $suggested_words{$word} = $suggestions;
        !           125:                 ++$total_suggestions;
        !           126:                 $total_words_suggested += scalar @suggestions;
        !           127:             }
        !           128:             # HA-spellcheck-error
        !           129:             my $err = $doc->createElement('span');
        !           130:             $err->setAttribute('class', 'HA-spellcheck-error');
        !           131:             my $tmp = $doc->createTextNode;
        !           132:             $tmp->setNodeValue($word);
        !           133:             $err->appendChild($tmp);
        !           134:             return $err;
        !           135:         }
        !           136:     };
        !           137:     while ($node->getNodeValue =~ /([\p{IsWord}']+)/) {
        !           138:         my $word = $1;
        !           139:         my $before = $`;
        !           140:         my $after = $';
        !           141:         my $df = &$check($word);
        !           142:         if (!$df) {
        !           143:             $before .= $word;
        !           144:         }
        !           145:         {
        !           146:             my $parent = $node->getParentNode;
        !           147:             my $n1 = $doc->createTextNode;
        !           148:             $n1->setNodeValue($before);
        !           149:             $parent->insertBefore($n1, $node);
        !           150:             $parent->insertBefore($df, $node) if $df;
        !           151:             $node->setNodeValue($after);
        !           152:         }
        !           153:     }
        !           154: };
        !           155: 
        !           156: sub check_inner_text {
        !           157:     my $node = shift;
        !           158:     my $text = '';
        !           159:     for (my $i = $node->getFirstChild; defined $i; $i = $i->getNextSibling) {
        !           160:         if ($i->getNodeType == TEXT_NODE) {
        !           161:             spellcheck($i);
        !           162:         }
        !           163:     }
        !           164: };
        !           165: 
        !           166: sub parse_with_dom {
        !           167:     my ($text) = @_;
        !           168:     $text = '<spellchecker>'.$text.'</spellchecker>';
        !           169: 
        !           170:     my $parser = new XML::DOM::Parser;
        !           171:     if ($debug) {
        !           172:         open(FOO, '>:utf8', '/tmp/foo');
        !           173:         print FOO $text;
        !           174:         close FOO;
        !           175:     }
        !           176:     my $doc = $parser->parse($text);
        !           177:     my $nodes = $doc->getElementsByTagName('*');
        !           178:     my $n = $nodes->getLength;
        !           179: 
        !           180:     for (my $i = 0; $i < $n; ++$i) {
        !           181:         my $node = $nodes->item($i);
        !           182:         if ($node->getNodeType == ELEMENT_NODE) {
        !           183:             check_inner_text($node);
1.1       www       184:         }
                    185:     }
                    186: 
1.2     ! www       187:     my $ret = $doc->toString;
        !           188:     $ret =~ s{<spellchecker>(.*)</spellchecker>}{$1}sg;
        !           189:     return $ret;
        !           190: };
        !           191: 
        !           192: sub make_js_hash {
        !           193:     my ($hash) = @_;
        !           194:     my $js_hash = '';
        !           195:     while (my ($key, $val) = each %$hash) {
        !           196:         $js_hash .= ',' if $js_hash;
        !           197:         $js_hash .= '"'.$key.'":"'.$val.'"';
        !           198:     }
        !           199:     return $js_hash;
        !           200: };
        !           201: 
        !           202: sub make_js_hash_from_array {
        !           203:     my ($array) = @_;
        !           204:     my $js_hash = '';
        !           205:     foreach my $i (@$array) {
        !           206:         $js_hash .= ',' if $js_hash;
        !           207:         $js_hash .= '"'.$i->[0].'":"'.$i->[1].'"';
        !           208:     }
        !           209:     return $js_hash;
        !           210: };

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>