Annotation of loncom/html/htmlarea/plugins/SpellChecker/spell-check-logic.cgi, revision 1.1
1.1 ! www 1: #! /usr/bin/perl -w
! 2:
! 3: # Spell Checker Plugin for HTMLArea-3.0
! 4: # Implementation by Mihai Bazon. Sponsored by www.americanbible.org
! 5: #
! 6: # htmlArea v3.0 - Copyright (c) 2002 interactivetools.com, inc.
! 7: # This notice MUST stay intact for use (see license.txt).
! 8: #
! 9: # A free WYSIWYG editor replacement for <textarea> fields.
! 10: # For full source code and docs, visit http://www.interactivetools.com/
! 11: #
! 12: # Version 3.0 developed by Mihai Bazon for InteractiveTools.
! 13: # http://students.infoiasi.ro/~mishoo
! 14: #
! 15: # $Id: spell-check-logic.cgi,v 1.2 2003/08/10 15:56:35 mishoo Exp $
! 16:
! 17: use strict;
! 18: use utf8;
! 19: use Encode;
! 20: use Text::Aspell;
! 21: use HTML::Parser;
! 22: use HTML::Entities;
! 23: use CGI;
! 24:
! 25: my $debug = 0;
! 26:
! 27: open (DEBUG, '>:encoding(UTF-8)', '> /tmp/spell-check-debug.log') if $debug;
! 28:
! 29: # use Data::Dumper; # for debug only
! 30:
! 31: my $speller = new Text::Aspell;
! 32: my $cgi = new CGI;
! 33:
! 34: # FIXME: report a nice error...
! 35: die "Can't create speller!" unless $speller;
! 36:
! 37: # add configurable option for this
! 38: my $dict = $cgi->param('dictionary') || 'en_US';
! 39: $speller->set_option('lang', $dict);
! 40:
! 41: # ultra, fast, normal, bad-spellers
! 42: # bad-spellers seems to cause segmentation fault
! 43: $speller->set_option('sug-mode', 'ultra');
! 44:
! 45: my @replacements = ();
! 46:
! 47: sub text_handler {
! 48: my ($offset, $length, $text, $is_cdata) = @_;
! 49: if ($is_cdata or $text =~ /^\s*$/) {
! 50: return 0;
! 51: }
! 52: # print STDERR "*** OFFSET: $offset, LENGTH: $length, $text\n";
! 53: $text = decode_entities($text);
! 54: $text =~ s/&#([0-9]+);/chr($1)/eg;
! 55: $text =~ s/&#x([0-9a-fA-F]+);/chr(hex $1)/eg;
! 56: my $repl = spellcheck($text);
! 57: if ($repl) {
! 58: push(@replacements, [ $offset, $length, $repl ]);
! 59: }
! 60: }
! 61:
! 62: my $p = HTML::Parser->new
! 63: (api_version => 3,
! 64: handlers => { start => [ sub {
! 65: my ($self, $tagname, $attrs) = @_;
! 66: # print STDERR "\033[1;31m parsing tag: $tagname\033[0m\n";
! 67: # following we skip words that have already been marked as "fixed".
! 68: if ($tagname eq "span" and $attrs->{class} =~ /HA-spellcheck-fixed/) {
! 69: $self->handler(text => undef);
! 70: }
! 71: }, "self, tagname, attr"
! 72: ],
! 73: end => [ sub {
! 74: my ($self, $tagname) = @_;
! 75: # print STDERR "\033[1;32m END tag: $tagname\033[0m\n";
! 76: $self->handler(text => \&text_handler, 'offset, length, dtext, is_cdata');
! 77: }, "self, tagname"
! 78: ]
! 79: }
! 80: );
! 81: $p->handler(text => \&text_handler, 'offset, length, dtext, is_cdata');
! 82: $p->case_sensitive(1);
! 83: my $file_content = $cgi->param('content');
! 84:
! 85: if ($debug) {
! 86: open (FOO, '>:encoding(UTF-8)', '/tmp/spell-check-before');
! 87: print FOO $file_content, "\n";
! 88: close(FOO);
! 89: }
! 90:
! 91: $p->parse($file_content);
! 92: $p->eof();
! 93:
! 94: foreach (reverse @replacements) {
! 95: substr($file_content, $_->[0], $_->[1], $_->[2]);
! 96: }
! 97:
! 98: # we output UTF-8
! 99: binmode(STDOUT, ':encoding(UTF-8)'); # apparently, this sucks.
! 100: print "Content-type: text/html; charset: utf-8\n\n";
! 101: print qq^
! 102: <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
! 103: <html>
! 104: <head>
! 105: <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
! 106: <link rel="stylesheet" type="text/css" media="all" href="spell-check-style.css" />
! 107: </head>
! 108: <body onload="window.parent.finishedSpellChecking();">^;
! 109:
! 110: print $file_content;
! 111: if ($cgi->param('init') eq '1') {
! 112: my @dicts = $speller->dictionary_info();
! 113: my $dictionaries = '';
! 114: foreach my $i (@dicts) {
! 115: $dictionaries .= ',' . $i->{name} unless $i->{jargon};
! 116: }
! 117: $dictionaries =~ s/^,//;
! 118: print qq^
! 119: <div id="HA-spellcheck-dictionaries"
! 120: >$dictionaries</div>
! 121: ^;
! 122: }
! 123:
! 124: if ($debug) {
! 125: open (FOO, '>:encoding(UTF-8)', '/tmp/spell-check-after');
! 126: print FOO $file_content, "\n";
! 127: close(FOO);
! 128: }
! 129:
! 130: print '</body></html>';
! 131:
! 132: # Perl is beautiful.
! 133: sub spellcheck {
! 134: my $text = shift;
! 135: sub check { # called for each word in the text
! 136: # input is in UTF-8
! 137: my $U_word = shift;
! 138: my $word = encode($speller->get_option('encoding'), $U_word);
! 139: print DEBUG "*$U_word* ----> |$word|\n" if $debug;
! 140: if ($speller->check($word)) {
! 141: return $U_word; # we return the word in UTF-8
! 142: } else {
! 143: # we should have suggestions; give them back to browser in UTF-8
! 144: my $suggestions = decode($speller->get_option('encoding'), join(',', $speller->suggest($word)));
! 145: my $ret = '<span class="HA-spellcheck-error">'.$U_word.'</span><span class="HA-spellcheck-suggestions">'.$suggestions.'</span>';
! 146: return $ret;
! 147: }
! 148: }
! 149: $text =~ s/([[:word:]']+)/check($1)/egs;
! 150: # $text =~ s/(\w+)/check($1)/egs;
! 151:
! 152: # the following is definitely what we want to use; too bad it sucks most.
! 153: # $text =~ s/(\p{IsWord}+)/check($1)/egs;
! 154: return $text;
! 155: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>