Annotation of loncom/html/htmlarea/plugins/SpellChecker/spell-check-logic.cgi, revision 1.1

1.1     ! www         1: #! /usr/bin/perl -w
        !             2: 
        !             3: # Spell Checker Plugin for HTMLArea-3.0
        !             4: # Implementation by Mihai Bazon.  Sponsored by www.americanbible.org
        !             5: #
        !             6: # htmlArea v3.0 - Copyright (c) 2002 interactivetools.com, inc.
        !             7: # This notice MUST stay intact for use (see license.txt).
        !             8: #
        !             9: # A free WYSIWYG editor replacement for <textarea> fields.
        !            10: # For full source code and docs, visit http://www.interactivetools.com/
        !            11: #
        !            12: # Version 3.0 developed by Mihai Bazon for InteractiveTools.
        !            13: #	     http://students.infoiasi.ro/~mishoo
        !            14: #
        !            15: # $Id: spell-check-logic.cgi,v 1.2 2003/08/10 15:56:35 mishoo Exp $
        !            16: 
        !            17: use strict;
        !            18: use utf8;
        !            19: use Encode;
        !            20: use Text::Aspell;
        !            21: use HTML::Parser;
        !            22: use HTML::Entities;
        !            23: use CGI;
        !            24: 
        !            25: my $debug = 0;
        !            26: 
        !            27: open (DEBUG, '>:encoding(UTF-8)', '> /tmp/spell-check-debug.log') if $debug;
        !            28: 
        !            29: # use Data::Dumper; # for debug only
        !            30: 
        !            31: my $speller = new Text::Aspell;
        !            32: my $cgi = new CGI;
        !            33: 
        !            34: # FIXME: report a nice error...
        !            35: die "Can't create speller!" unless $speller;
        !            36: 
        !            37: # add configurable option for this
        !            38: my $dict = $cgi->param('dictionary') || 'en_US';
        !            39: $speller->set_option('lang', $dict);
        !            40: 
        !            41: # ultra, fast, normal, bad-spellers
        !            42: # bad-spellers seems to cause segmentation fault
        !            43: $speller->set_option('sug-mode', 'ultra');
        !            44: 
        !            45: my @replacements = ();
        !            46: 
        !            47: sub text_handler {
        !            48:     my ($offset, $length, $text, $is_cdata) = @_;
        !            49:     if ($is_cdata or $text =~ /^\s*$/) {
        !            50:         return 0;
        !            51:     }
        !            52:     # print STDERR "*** OFFSET: $offset, LENGTH: $length, $text\n";
        !            53:     $text = decode_entities($text);
        !            54:     $text =~ s/&#([0-9]+);/chr($1)/eg;
        !            55:     $text =~ s/&#x([0-9a-fA-F]+);/chr(hex $1)/eg;
        !            56:     my $repl = spellcheck($text);
        !            57:     if ($repl) {
        !            58:         push(@replacements, [ $offset, $length, $repl ]);
        !            59:     }
        !            60: }
        !            61: 
        !            62: my $p = HTML::Parser->new
        !            63:   (api_version => 3,
        !            64:    handlers => { start => [ sub {
        !            65:                                 my ($self, $tagname, $attrs) = @_;
        !            66:                                 # print STDERR "\033[1;31m parsing tag: $tagname\033[0m\n";
        !            67:                                 # following we skip words that have already been marked as "fixed".
        !            68:                                 if ($tagname eq "span" and $attrs->{class} =~ /HA-spellcheck-fixed/) {
        !            69:                                     $self->handler(text => undef);
        !            70:                                 }
        !            71:                             }, "self, tagname, attr"
        !            72:                           ],
        !            73:                  end => [ sub {
        !            74:                               my ($self, $tagname) = @_;
        !            75:                               # print STDERR "\033[1;32m END tag: $tagname\033[0m\n";
        !            76:                               $self->handler(text => \&text_handler, 'offset, length, dtext, is_cdata');
        !            77:                           }, "self, tagname"
        !            78:                         ]
        !            79:                }
        !            80:   );
        !            81: $p->handler(text => \&text_handler, 'offset, length, dtext, is_cdata');
        !            82: $p->case_sensitive(1);
        !            83: my $file_content = $cgi->param('content');
        !            84: 
        !            85: if ($debug) {
        !            86:     open (FOO, '>:encoding(UTF-8)', '/tmp/spell-check-before');
        !            87:     print FOO $file_content, "\n";
        !            88:     close(FOO);
        !            89: }
        !            90: 
        !            91: $p->parse($file_content);
        !            92: $p->eof();
        !            93: 
        !            94: foreach (reverse @replacements) {
        !            95:     substr($file_content, $_->[0], $_->[1], $_->[2]);
        !            96: }
        !            97: 
        !            98: # we output UTF-8
        !            99: binmode(STDOUT, ':encoding(UTF-8)'); # apparently, this sucks.
        !           100: print "Content-type: text/html; charset: utf-8\n\n";
        !           101: print qq^
        !           102: <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
        !           103: <html>
        !           104: <head>
        !           105: <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
        !           106: <link rel="stylesheet" type="text/css" media="all" href="spell-check-style.css" />
        !           107: </head>
        !           108: <body onload="window.parent.finishedSpellChecking();">^;
        !           109: 
        !           110: print $file_content;
        !           111: if ($cgi->param('init') eq '1') {
        !           112:     my @dicts = $speller->dictionary_info();
        !           113:     my $dictionaries = '';
        !           114:     foreach my $i (@dicts) {
        !           115:         $dictionaries .= ',' . $i->{name} unless $i->{jargon};
        !           116:     }
        !           117:     $dictionaries =~ s/^,//;
        !           118:     print qq^
        !           119: <div id="HA-spellcheck-dictionaries"
        !           120: >$dictionaries</div>
        !           121: ^;
        !           122: }
        !           123: 
        !           124: if ($debug) {
        !           125:     open (FOO, '>:encoding(UTF-8)', '/tmp/spell-check-after');
        !           126:     print FOO $file_content, "\n";
        !           127:     close(FOO);
        !           128: }
        !           129: 
        !           130: print '</body></html>';
        !           131: 
        !           132: # Perl is beautiful.
        !           133: sub spellcheck {
        !           134:     my $text = shift;
        !           135:     sub check {                 # called for each word in the text
        !           136:         # input is in UTF-8
        !           137:         my $U_word = shift;
        !           138:         my $word = encode($speller->get_option('encoding'), $U_word);
        !           139:         print DEBUG "*$U_word* ----> |$word|\n" if $debug;
        !           140:         if ($speller->check($word)) {
        !           141:             return $U_word;      # we return the word in UTF-8
        !           142:         } else {
        !           143:             # we should have suggestions; give them back to browser in UTF-8
        !           144:             my $suggestions = decode($speller->get_option('encoding'), join(',', $speller->suggest($word)));
        !           145:             my $ret = '<span class="HA-spellcheck-error">'.$U_word.'</span><span class="HA-spellcheck-suggestions">'.$suggestions.'</span>';
        !           146:             return $ret;
        !           147:         }
        !           148:     }
        !           149:     $text =~ s/([[:word:]']+)/check($1)/egs;
        !           150:     # $text =~ s/(\w+)/check($1)/egs;
        !           151: 
        !           152:     # the following is definitely what we want to use; too bad it sucks most.
        !           153:     # $text =~ s/(\p{IsWord}+)/check($1)/egs;
        !           154:     return $text;
        !           155: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>