version 1.12, 2002/03/06 22:47:45
|
version 1.15, 2003/02/03 18:03:52
|
Line 1
|
Line 1
|
LON-CAPA perl modules |
LON-CAPA perl modules |
Scott Harrison |
|
May 28, 2001 |
May 28, 2001 |
October 4, 2001 |
October 4, 2001 |
October 20, 2001 |
October 20, 2001 |
Line 16 Not ordinarily on a 6.2 or 7.1 system
|
Line 15 Not ordinarily on a 6.2 or 7.1 system
|
|
|
----------------------------------------------- Digest |
----------------------------------------------- Digest |
***NEED TO HAVE DIGEST::MD5, etc IN HERE*** |
***NEED TO HAVE DIGEST::MD5, etc IN HERE*** |
|
http://www.cpan.org/authors/id/GAAS/Digest-MD5-2.20.tar.gz |
|
|
----------------------------------------------- Math-FFT |
----------------------------------------------- Math-FFT |
http://www.linuxjar.com/CPAN/authors/id/R/RK/RKOBES/Math-FFT-0.25.tar.gz |
http://www.cpan.org/authors/id/R/RK/RKOBES/Math-FFT-0.25.tar.gz |
Randy Kobes [randy@theoryx5.uwinnipeg.ca] |
Randy Kobes [randy@theoryx5.uwinnipeg.ca] |
|
|
The Math::FFT module provides an interface to various |
The Math::FFT module provides an interface to various |
Line 205 Sean M. Burke [sburke@cpan.org]
|
Line 205 Sean M. Burke [sburke@cpan.org]
|
HTML/Parse.pm |
HTML/Parse.pm |
|
|
---------------------------------------------- HTML-Parser |
---------------------------------------------- HTML-Parser |
http://www.cpan.org/authors/id/G/GA/GAAS/HTML-Parser-3.25.tar.gz |
http://www.cpan.org/authors/id/G/GA/GAAS/HTML-Parser-3.26.tar.gz |
Gisle Aas [gisle@aas.no] |
Gisle Aas [gisle@aas.no] |
HTML/Parser.pm 1 |
HTML/Parser.pm 1 |
HTML/TokeParser.pm 1 |
HTML/TokeParser.pm 1 |
|
|
Need these patches applied: |
Need this patches applied: |
|
|
diff -urN HTML-Parser-3.25/hparser.c HTML-Parser-3.25.1/hparser.c |
diff -u HTML-Parser-3.26/hparser.c HTML-Parser-3.26.simpleslashfix/hparser.c |
--- HTML-Parser-3.25/hparser.c Thu May 10 15:27:28 2001 |
--- HTML-Parser-3.26/hparser.c Sun Mar 17 15:07:57 2002 |
+++ HTML-Parser-3.25.1/hparser.c Wed Feb 20 13:23:34 2002 |
+++ HTML-Parser-3.26.simpleslashfix/hparser.c Fri Mar 22 13:23:17 2002 |
@@ -1094,14 +1094,21 @@ |
@@ -1101,14 +1101,21 @@ |
hctype_t tag_name_first, tag_name_char; |
hctype_t tag_name_first, tag_name_char; |
hctype_t attr_name_first, attr_name_char; |
hctype_t attr_name_first, attr_name_char; |
|
|
Line 228 diff -urN HTML-Parser-3.25/hparser.c HTM
|
Line 228 diff -urN HTML-Parser-3.25/hparser.c HTM
|
- tag_name_first = tag_name_char = HCTYPE_NOT_SPACE_GT; |
- tag_name_first = tag_name_char = HCTYPE_NOT_SPACE_GT; |
- attr_name_first = HCTYPE_NOT_SPACE_GT; |
- attr_name_first = HCTYPE_NOT_SPACE_GT; |
- attr_name_char = HCTYPE_NOT_SPACE_EQ_GT; |
- attr_name_char = HCTYPE_NOT_SPACE_EQ_GT; |
+ if (p_state->xml_mode) { |
+ if (p_state->xml_mode) { |
+ tag_name_first = tag_name_char = HCTYPE_NOT_SPACE_SLASH_GT; |
+ tag_name_first = tag_name_char = HCTYPE_NOT_SPACE_SLASH_GT; |
+ attr_name_first = HCTYPE_NOT_SPACE_SLASH_GT; |
+ attr_name_first = HCTYPE_NOT_SPACE_SLASH_GT; |
+ attr_name_char = HCTYPE_NOT_SPACE_EQ_GT; |
+ attr_name_char = HCTYPE_NOT_SPACE_EQ_GT; |
+ } |
+ } |
+ else { |
+ else { |
+ tag_name_first = tag_name_char = HCTYPE_NOT_SPACE_GT; |
+ tag_name_first = tag_name_char = HCTYPE_NOT_SPACE_GT; |
+ attr_name_first = HCTYPE_NOT_SPACE_GT; |
+ attr_name_first = HCTYPE_NOT_SPACE_GT; |
+ attr_name_char = HCTYPE_NOT_SPACE_EQ_GT; |
+ attr_name_char = HCTYPE_NOT_SPACE_EQ_GT; |
+ } |
+ } |
} |
} |
|
|
s += 2; |
s += 2; |
@@ -1158,8 +1165,11 @@ |
@@ -1165,8 +1172,11 @@ |
else { |
else { |
char *word_start = s; |
char *word_start = s; |
while (s < end && isHNOT_SPACE_GT(*s)) { |
while (s < end && isHNOT_SPACE_GT(*s)) { |
- if (p_state->xml_mode && *s == '/') |
- if (p_state->xml_mode && *s == '/') |
- break; |
- break; |
+ if (p_state->xml_mode && *s == '/') { |
+ if (p_state->xml_mode && *s == '/') { |
+ /* look ahead to see if the tag ends */ |
+ /* look ahead to see if the tag ends */ |
+ if ((s+1)==end || *(s+1)=='>') |
+ if ((s+1)==end || *(s+1)=='>') |
+ break; |
+ break; |
+ } |
+ } |
s++; |
s++; |
} |
} |
if (s == end) |
if (s == end) |
|
|
diff -urN HTML-Parser-3.25/Parser.pm HTML-Parser-3.25.2/Parser.pm |
|
--- HTML-Parser-3.25/Parser.pm Fri May 11 13:24:09 2001 |
|
+++ HTML-Parser-3.25.2/Parser.pm Wed Mar 6 16:47:46 2002 |
|
@@ -427,6 +427,11 @@ |
|
There are currently no events associated with the marked section |
|
markup, but the text can be returned as C<skipped_text>. |
|
|
|
+=item $p->encoded_entities( [$bool] ) |
|
+ |
|
+By default, attr and @attr decode general enitites for attribute values. |
|
+This turns off that behavior. |
|
+ |
|
=back |
|
|
|
As markup and text is recognized, handlers are invoked. The following |
|
diff -urN HTML-Parser-3.25/Parser.xs HTML-Parser-3.25.2/Parser.xs |
|
--- HTML-Parser-3.25/Parser.xs Thu May 10 15:27:28 2001 |
|
+++ HTML-Parser-3.25.2/Parser.xs Wed Mar 6 16:48:56 2002 |
|
@@ -297,6 +297,7 @@ |
|
HTML::Parser::xml_mode = 3 |
|
HTML::Parser::unbroken_text = 4 |
|
HTML::Parser::marked_sections = 5 |
|
+ HTML::Parser::encoded_entities = 6 |
|
PREINIT: |
|
bool *attr; |
|
CODE: |
|
@@ -311,6 +312,7 @@ |
|
#else |
|
croak("marked sections not supported"); break; |
|
#endif |
|
+ case 6: attr = &pstate->encoded_entities; break; |
|
default: |
|
croak("Unknown boolean attribute (%d)", ix); |
|
} |
|
diff -urN HTML-Parser-3.25/hparser.c HTML-Parser-3.25.2/hparser.c |
|
--- HTML-Parser-3.25/hparser.c Thu May 10 15:27:28 2001 |
|
+++ HTML-Parser-3.25.2/hparser.c Wed Mar 6 16:44:47 2002 |
|
@@ -398,7 +398,8 @@ |
|
beg++; len -= 2; |
|
} |
|
attrval = newSVpvn(beg, len); |
|
- decode_entities(aTHX_ attrval, p_state->entity2char); |
|
+ if (!p_state->encoded_entities) |
|
+ decode_entities(aTHX_ attrval, p_state->entity2char); |
|
} |
|
else { /* boolean */ |
|
if (p_state->bool_attr_val) |
|
diff -urN HTML-Parser-3.25/hparser.h HTML-Parser-3.25.2/hparser.h |
|
--- HTML-Parser-3.25/hparser.h Tue May 8 13:03:27 2001 |
|
+++ HTML-Parser-3.25.2/hparser.h Wed Mar 6 16:48:18 2002 |
|
@@ -99,6 +99,7 @@ |
|
bool strict_names; |
|
bool xml_mode; |
|
bool unbroken_text; |
|
+ bool encoded_entities; |
|
|
|
/* other configuration stuff */ |
|
SV* bool_attr_val; |
|
diff -urN HTML-Parser-3.25/t/encoded-entities.t HTML-Parser-3.25.2/t/encoded-entities.t |
|
--- HTML-Parser-3.25/t/encoded-entities.t Wed Dec 31 19:00:00 1969 |
|
+++ HTML-Parser-3.25.2/t/encoded-entities.t Wed Mar 6 17:13:53 2002 |
|
@@ -0,0 +1,32 @@ |
|
+use strict; |
|
+print "1..2\n"; |
|
+ |
|
+use HTML::Parser (); |
|
+my $p = HTML::Parser->new(); |
|
+$p->encoded_entities(1); |
|
+ |
|
+my $text = ""; |
|
+$p->handler(start => |
|
+ sub { |
|
+ my($tag, $attr) = @_; |
|
+ $text .= "S[$tag"; |
|
+ for my $k (sort keys %$attr) { |
|
+ my $v = $attr->{$k}; |
|
+ $text .= " $k=$v"; |
|
+ } |
|
+ $text .= "]"; |
|
+ }, "tagname,attr"); |
|
+ |
|
+my $html = <<'EOT'; |
|
+<tag arg="&<>"> |
|
+EOT |
|
+ |
|
+$p->parse($html)->eof; |
|
+ |
|
+print "not " unless $text eq 'S[tag arg=&<>]'; print "ok 1\n"; |
|
+ |
|
+$text = ""; |
|
+$p->encoded_entities(0); |
|
+$p->parse($html)->eof; |
|
+ |
|
+print "not " unless $text eq 'S[tag arg=&<>]'; print "ok 2\n"; |
|
|
|
---------------------------------------------- IO-stringy |
---------------------------------------------- IO-stringy |
http://www.cpan.org/authors/id/E/ER/ERYQ/IO-stringy-2.108.tar.gz |
http://www.cpan.org/authors/id/E/ER/ERYQ/IO-stringy-2.108.tar.gz |
(needed by MIME-tools) |
(needed by MIME-tools) |