--- loncom/interface/lonsearchcat.pm 2004/05/09 22:55:17 1.225 +++ loncom/interface/lonsearchcat.pm 2005/01/19 20:22:36 1.230.2.1 @@ -1,7 +1,7 @@ # The LearningOnline Network with CAPA # Search Catalog # -# $Id: lonsearchcat.pm,v 1.225 2004/05/09 22:55:17 matthew Exp $ +# $Id: lonsearchcat.pm,v 1.230.2.1 2005/01/19 20:22:36 albertel Exp $ # # Copyright Michigan State University Board of Trustees # @@ -75,6 +75,7 @@ use Apache::lonhtmlcommon; use Apache::lonlocal; use LONCAPA::lonmetadata(); use HTML::Entities(); +use Parse::RecDescent; ###################################################################### ###################################################################### @@ -174,6 +175,7 @@ sub handler { '&launch='.$ENV{'form.launch'}. '&mode='.$ENV{'form.mode'}, text=>"Course and Catalog Search", + target=>'_top', bug=>'Searching',}); } else { &Apache::lonhtmlcommon::add_breadcrumb @@ -182,6 +184,7 @@ sub handler { '&launch='.$ENV{'form.launch'}. '&mode='.$ENV{'form.mode'}, text=>"Catalog Search", + target=>'_top', bug=>'Searching',}); } # @@ -335,6 +338,29 @@ END &course_search($r); } elsif(($ENV{'form.phase'} eq 'basic_search') || ($ENV{'form.phase'} eq 'adv_search')) { + # + # We are running a search, try to parse it + my ($query,$customquery,$customshow,$libraries) = + (undef,undef,undef,undef); + my $pretty_string; + if ($ENV{'form.phase'} eq 'basic_search') { + ($query,$pretty_string,$libraries) = + &parse_basic_search($r,$closebutton,$hidden_fields); + return OK if (! defined($query)); + &make_persistent({ basicexp => $ENV{'form.basicexp'}}, + $persistent_db_file); + } else { # Advanced search + ($query,$customquery,$customshow,$libraries,$pretty_string) + = &parse_advanced_search($r,$closebutton,$hidden_fields); + return OK if (! defined($query)); + } + &make_persistent({ query => $query, + customquery => $customquery, + customshow => $customshow, + libraries => $libraries, + pretty_string => $pretty_string }, + $persistent_db_file); + # # Set up table if (! defined(&create_results_table())) { my $errorstring=&Apache::lonmysql::get_error(); @@ -362,29 +388,12 @@ Unable to properly store search informat END return OK; } - # - # We are running a search - my ($query,$customquery,$customshow,$libraries) = - (undef,undef,undef,undef); - my $pretty_string; - if ($ENV{'form.phase'} eq 'basic_search') { - ($query,$pretty_string,$libraries) = - &parse_basic_search($r,$closebutton,$hidden_fields); - } else { # Advanced search - ($query,$customquery,$customshow,$libraries,$pretty_string) - = &parse_advanced_search($r,$closebutton,$hidden_fields); - return OK if (! defined($query)); - } - &make_persistent({ query => $query, - customquery => $customquery, - customshow => $customshow, - libraries => $libraries, - pretty_string => $pretty_string }, - $persistent_db_file); ## ## Print out the frames interface ## - &print_frames_interface($r); + if (defined($query)) { + &print_frames_interface($r); + } } return OK; } @@ -435,10 +444,18 @@ sub hidden_field { { # Scope the course search to avoid global variables # # Variables For course search -my %alreadyseen; my %hash; my $totalfound; +sub make_symb { + my ($id)=@_; + my ($mapid,$resid)=split(/\./,$id); + my $map=$hash{'map_id_'.$mapid}; + my $res=$hash{'src_'.$id}; + my $symb=&Apache::lonnet::encode_symb($map,$resid,$res); + return $symb; +} + sub course_search { my $r=shift; my $bodytag=&Apache::loncommon::bodytag('Course Search'); @@ -460,16 +477,18 @@ sub course_search { $bodytag.'
'.
- &Apache::lonhtmlcommon::textbox('basicexp',
- $ENV{'form.basicexp'},50).' '. + &Apache::lonhtmlcommon::textbox + ('basicexp', + &HTML::Entities::encode($ENV{'form.basicexp'},'<>&"'),50 + ). + ' '. ''.&searchhelp().''.' | '.
''.
' '. @@ -609,12 +645,6 @@ sub print_basic_search_form { ' | '.
'||||||
'. -# ''. -# $userelatedwords.(' 'x3). -# $onlysearchdomain.(' 'x2).$adv_search_link. -# ''. -# ' | |||||||
'.
''.
' 'Search',
'reset' => 'Reset',
'help' => 'Help');
@@ -774,7 +803,7 @@ ENDHEADER
$scrout.=''.
&titlefield(&mt('Copyright/Distribution')).' | '.
&Apache::lonmeta::selectbox('copyright',
- '',,
+ $ENV{'form.copyright'},
\&Apache::loncommon::copyrightdescription,
( undef,
&Apache::loncommon::copyrightids)
@@ -782,7 +811,7 @@ ENDHEADER
$scrout.=' | '.
&titlefield(&mt('Language')).' | '.
&Apache::lonmeta::selectbox('language',
- 'notset',,
+ $ENV{'form.language'},
\&Apache::loncommon::languagedescription,
('any',&Apache::loncommon::languageids)
).' | \n"; + my $pretty_search_string = ""; # Clean up fields for safety for my $field (@BasicFields, 'creationdatestart_month','creationdatestart_day', @@ -1204,7 +1233,6 @@ sub parse_advanced_search { 'lastrevisiondatestart_year','lastrevisiondateend_month', 'lastrevisiondateend_day','lastrevisiondateend_year') { $ENV{'form.'.$field}=~s/[^\w\/\s\(\)\=\-\"\']//g; - $ENV{'form.'.$field}=~s/(not\s*$|^\s*(and|or)|)//gi; } foreach ('mode','form','element') { # is this required? Hmmm. @@ -1249,21 +1277,29 @@ sub parse_advanced_search { # Evaluate logical expression AND/OR/NOT phrase fields. foreach my $field (@BasicFields) { next if (!defined($ENV{'form.'.$field}) || $ENV{'form.'.$field} eq ''); - foreach my $searchphrase(&process_phrase_input($ENV{'form.'.$field})){ - $pretty_search_string .= $font."$field contains ". - $searchphrase.""; + my ($error,$SQLQuery) = + &process_phrase_input($ENV{'form.'.$field}, + $ENV{'form.'.$field.'_related'},$field); + if (defined($error)) { + &output_unparsed_phrase_error($r,$closebutton,'phase=disp_adv', + $hidden_fields,$field); + return; + } else { + $pretty_search_string .= + $font.$field.': '.$ENV{'form.'.$field}; if ($ENV{'form.'.$field.'_related'}) { - my @New_Words; - ($searchphrase,@New_Words) = &related_version($searchphrase); - if (@New_Words) { - $pretty_search_string .= " with related words: ". - "@New_Words."; + my @Words = + &Apache::loncommon::get_related_words + ($ENV{'form.'.$field}); + if (@Words) { + $pretty_search_string.= ' with related words: '. + join(', ',@Words[0..4]); } else { - $pretty_search_string .= " with no related words."; + $pretty_search_string.= ' with related words.'; } } - $pretty_search_string .= " \n"; - push @queries,&build_SQL_query($field,$searchphrase); + $pretty_search_string .= ' '; + push (@queries,$SQLQuery); } } # @@ -1280,7 +1316,8 @@ sub parse_advanced_search { } } if (defined($searchphrase)) { - push @queries,&build_SQL_query('mime',$searchphrase); + my ($error,$SQLsearch) = &process_phrase_input($searchphrase,0,'mime'); + push @queries,$SQLsearch; $pretty_search_string .=$font.'mime contains '. $searchphrase.' '; } @@ -1312,7 +1349,7 @@ sub parse_advanced_search { push @queries,"(copyright like \"$ENV{'form.copyright'}\")"; $pretty_search_string.=$font."copyright = ". &Apache::loncommon::copyrightdescription($ENV{'form.copyright'}). - " \n"; + " \n"; } # # Statistics @@ -1400,11 +1437,11 @@ sub parse_advanced_search { $pretty_search_string .= $pretty_domains_string." \n"; # if (@queries) { - $query="SELET * FROM metadata WHERE ".join(" AND ",@queries); + $query="SELECT * FROM metadata WHERE (".join(") AND (",@queries).')'; } elsif ($customquery) { $query = ''; } -# &Apache::lonnet::logthis('query = '.$/.$query); + # &Apache::lonnet::logthis('query = '.$/.$query); return ($query,$customquery,$customshow,$libraries_to_query, $pretty_search_string); } @@ -1466,7 +1503,7 @@ sub parse_basic_search { # # Clean up fields for safety for my $field ('basicexp') { - $ENV{"form.$field"}=~s/[^\w\s\(\)\-]//g; + $ENV{"form.$field"}=~s/[^\w\s\'\"\!\(\)\-]//g; } foreach ('mode','form','element') { # is this required? Hmmm. @@ -1479,36 +1516,28 @@ sub parse_basic_search { # # Check to see if enough of a query is filled in my $search_string = $ENV{'form.basicexp'}; - $search_string =~ s/(not\s*$|^\s*(and|or)|)//gi; if (! &filled($search_string)) { &output_blank_field_error($r,$closebutton,'phase=disp_basic'); return OK; } - my $pretty_search_string=''; + my $pretty_search_string=$search_string; my @Queries; - my $concatarg=join(',', - ('title', 'author', 'subject', 'notes', 'abstract', - 'keywords')); - foreach my $search (&process_phrase_input($search_string)){ - if ($ENV{'form.related'}) { - $pretty_search_string .= ' and ' if ($pretty_search_string ne ''); - $pretty_search_string .= ''.$search.''; - my @New_Words; - ($search,@New_Words) = &related_version($search); - next if (! $search); - if (@New_Words) { - $pretty_search_string .= - " with related words: @New_Words"; - } - } else { - $pretty_search_string .= ' and ' if ($pretty_search_string ne ''); - $pretty_search_string .= ''.$search.''; - } - # - # Build SQL query string based on form page - push(@Queries, - &build_SQL_query('concat_ws(" ",'.$concatarg.')',$search)); + my $searchfield = 'concat_ws(" ",'.join(',', + ('title','author','subject', + 'notes','abstract','keywords') + ).')'; + my ($error,$SQLQuery) = &process_phrase_input($search_string, + $ENV{'form.related'}, + $searchfield); + if ($error) { + &output_unparsed_phrase_error($r,$closebutton,'phase=disp_basic', + '','basicexp'); + return; } + push(@Queries,$SQLQuery); + #foreach my $q (@Queries) { + # &Apache::lonnet::logthis(' '.$q); + #} my $final_query = 'SELECT * FROM metadata WHERE '.join(" AND ",@Queries); # if (defined($pretty_domains_string) && $pretty_domains_string ne '') { @@ -1516,40 +1545,189 @@ sub parse_basic_search { } $pretty_search_string .= " \n"; $pretty_search_string =~ s:^ and ::; -# &Apache::lonnet::logthis($final_query); + # &Apache::lonnet::logthis($final_query); return ($final_query,$pretty_search_string, $libraries_to_query); } + +############################################################### +############################################################### + +my @Phrases; + +sub concat { + my ($item) = @_; + my $results = ''; + foreach (@$item) { + if (ref($_) eq 'ARRAY') { + $results .= join(' ',@$_); + } + } + return $results; +} + sub process_phrase_input { - my ($phrase)=@_; - my @Phrases; - # &Apache::lonnet::logthis('phrase = :'.$phrase.':'); - my $in_quotes = 0; - my @Words = split(/\s+/,$phrase); - foreach my $word (@Words) { - $word =~ s/(\w+)\"(\w+)/$1$2/g; - if ($in_quotes) { - if ($word =~ s/(\")$//) { - $in_quotes = 0; + my ($phrase,$related,$field)=@_; + #&Apache::lonnet::logthis('phrase = :'.$phrase.':'); + my $grammar = <<'ENDGRAMMAR'; + searchphrase: + expression /^\Z/ { + # &Apache::lonsearchcat::print_item(\@item,0); + [@item]; + } + expression: + phrase(s) { + [@item]; + } + phrase: + orword { + [@item]; + } + | andword { + [@item]; + } + | minusword { + unshift(@::Phrases,$item[1]->[0]); + unshift(@::Phrases,$item[1]->[1]); + [@item]; + } + | word { + unshift(@::Phrases,$item[1]); + [@item]; + } + # + orword: + word 'OR' phrase { + unshift(@::Phrases,'OR'); + unshift(@::Phrases,$item[1]); + [@item]; + } + | word 'or' phrase { + unshift(@::Phrases,'OR'); + unshift(@::Phrases,$item[1]); + [@item]; + } + | minusword 'OR' phrase { + unshift(@::Phrases,'OR'); + unshift(@::Phrases,$item[1]->[0]); + unshift(@::Phrases,$item[1]->[1]); + [@item]; + } + | minusword 'or' phrase { + unshift(@::Phrases,'OR'); + unshift(@::Phrases,$item[1]->[0]); + unshift(@::Phrases,$item[1]->[1]); + [@item]; + } + andword: + word phrase { + unshift(@::Phrases,'AND'); + unshift(@::Phrases,$item[1]); + [@item]; + } + | minusword phrase { + unshift(@::Phrases,'AND'); + unshift(@::Phrases,$item[1]->[0]); + unshift(@::Phrases,$item[1]->[1]); + [@item]; + } + # + minusword: + '-' word { + [$item[2],'NOT']; + } + word: + "'" term(s) "'" { + &Apache::lonsearchcat::concat(\@item); + } + | '"' term(s) '"' { + &Apache::lonsearchcat::concat(\@item); + } + | term { + $item[1]; + } + term: + /[\w\Q:!@#$%^&*()+_=|{}<>,.;\\\/?\E]+/ { + $item[1]; + } +ENDGRAMMAR + # + # The end result of parsing the phrase with the grammar is an array + # @::Phrases. + # $phrase = "gene splicing" or cat -> "gene splicing","OR","cat" + # $phrase = "genetic engineering" -dna -> + # "genetic engineering","AND","NOT","dna" + # $phrase = cat or dog -poodle -> "cat","OR","dog","AND","NOT","poodle" + undef(@::Phrases); + my $p = new Parse::RecDescent($grammar); + if (! defined($p->searchphrase($phrase))) { + &Apache::lonnet::logthis('lonsearchcat:unable to process:'.$phrase); + return 'Unable to process phrase '.$phrase; + } + # + # Go through the phrases and make sense of them. + # Apply modifiers NOT OR and AND to the phrases. + my @NewPhrases; + while(@::Phrases) { + my $phrase = shift(@::Phrases); + # &Apache::lonnet::logthis('phrase = '.$phrase); + my $phrasedata; + if ($phrase =~ /^(NOT|OR|AND)$/) { + if ($phrase eq 'OR') { + $phrasedata->{'or'}++; + if (! @::Phrases) { $phrasedata = undef; last; } + $phrase = shift(@::Phrases); + } elsif ($phrase eq 'AND') { + $phrasedata->{'and'}++; + if (! @::Phrases) { $phrasedata = undef; last; } + $phrase = shift(@::Phrases); } - if ($Phrases[-1] ne '') { - $Phrases[-1] .= ' '; + if ($phrase eq 'NOT') { + $phrasedata->{'negate'}++; + if (! @::Phrases) { $phrasedata = undef; last; } + $phrase = shift(@::Phrases); } - $Phrases[-1] .= $word; + } + $phrasedata->{'phrase'} = $phrase; + if ($related) { + my @NewWords; + (undef,@NewWords) = &related_version($phrasedata->{'phrase'}); + $phrasedata->{'related_words'} = \@NewWords; + } + push(@NewPhrases,$phrasedata); + } + # + # Actually build the sql query from the phrases + my $SQLQuery; + foreach my $phrase (@NewPhrases) { + my $query; + if ($phrase->{'negate'}) { + $query .= $field.' NOT LIKE "%'.$phrase->{'phrase'}.'%"'; } else { - if ($word =~ s/^(\")//) { - $in_quotes=1; + $query .= $field.' LIKE "%'.$phrase->{'phrase'}.'%"'; + } + foreach my $related (@{$phrase->{'related_words'}}) { + if ($phrase->{'negate'}) { + $query .= ' AND '.$field.' NOT LIKE "%'.$related.'%"'; + } else { + $query .= ' OR '.$field.' LIKE "%'.$related.'%"'; + } + } + if ($SQLQuery) { + if ($phrase->{'or'}) { + $SQLQuery .= ' OR ('.$query.')'; + } else { + $SQLQuery .= ' AND ('.$query.')'; } - push(@Phrases,$word); + } else { + $SQLQuery = '('.$query.')'; } } # - #foreach my $p (@Phrases) { - # &Apache::lonnet::logthis(' subphrase = '.$p); - #} + # &Apache::lonnet::logthis("SQLQuery = $SQLQuery"); # - return @Phrases; + return undef,$SQLQuery; } ###################################################################### @@ -1579,32 +1757,6 @@ sub related_version { return $result,sort(@Words); } -###################################################################### -###################################################################### - -=pod - -=item &build_SQL_query() - -Builds a SQL query string from a logical expression with AND/OR keywords -using Text::Query and &recursive_SQL_query_builder() - -=cut - -###################################################################### -###################################################################### -sub build_SQL_query { - my ($field_name,$logic_statement)=@_; - my $q=new Text::Query('abc', - -parse => 'Text::Query::ParseAdvanced', - -build => 'Text::Query::Build'); - $q->prepare($logic_statement); - my $matchexp=${$q}{'matchexp'}; chomp $matchexp; - &Apache::lonnet::logthis('matchexp = '.$matchexp); - my $sql_query=&recursive_SQL_query_build($field_name,$matchexp); - &Apache::lonnet::logthis('sql_query = '.$sql_query); - return $sql_query; -} ###################################################################### ###################################################################### @@ -1639,47 +1791,6 @@ sub build_custommetadata_query { return $matchexp; } -###################################################################### -###################################################################### - -=pod - -=item &recursive_SQL_query_build() - -Recursively constructs an SQL query. Takes as input $dkey and $pattern. - -=cut - -###################################################################### -###################################################################### -sub recursive_SQL_query_build { - my ($dkey,$pattern)=@_; - my @matches=($pattern=~/(\[[^\]|\[]*\])/g); - return $pattern unless @matches; - foreach my $match (@matches) { - $match=~/\[ (\w+)\s(.*) \]/; - my ($key,$value)=($1,$2); - my $replacement=''; - if ($key eq 'literal') { - $replacement="($dkey LIKE \"\%$value\%\")"; - } elsif (lc($key) eq 'not') { - $value=~s/LIKE/NOT LIKE/; -# $replacement="($dkey not like $value)"; - $replacement="$value"; - } elsif ($key eq 'and') { - $value=~/(.*[\"|\)]) ([|\(|\^].*)/; - $replacement="($1 AND $2)"; - } elsif ($key eq 'or') { - $value=~/(.*[\"|\)]) ([|\(|\^].*)/; - $replacement="($1 OR $2)"; - } - substr($pattern, - index($pattern,$match), - length($match), - $replacement); - } - &recursive_SQL_query_build($dkey,$pattern); -} ###################################################################### ###################################################################### @@ -1875,7 +1986,7 @@ sub print_sort_form { my $bodytag=&Apache::loncommon::bodytag(undef,undef,undef,1). &Apache::lonhtmlcommon::breadcrumbs (undef,'Searching','Searching',undef,undef, - ! ($ENV{'form.catalogmode'} eq 'groupsearch')); + $ENV{'form.catalogmode'} ne 'groupsearch'); ## my %SortableFields=&Apache::lonlocal::texthash( @@ -2142,10 +2253,9 @@ results into MySQL. sub run_search { my ($r,$query,$customquery,$customshow,$serverlist,$pretty_string) = @_; my $bodytag=&Apache::loncommon::bodytag(undef,undef,undef,1); - $bodytag.= - &Apache::lonhtmlcommon::breadcrumbs(undef,'Searching','Searching', - undef,undef,! $ENV{'form.launch'}); - + $bodytag.=&Apache::lonhtmlcommon::breadcrumbs + (undef,'Searching','Searching',undef,undef, + $ENV{'form.catalogmode'} ne 'groupsearch'); my $connection = $r->connection; # # Print run_search header @@ -2157,12 +2267,16 @@ $bodytag |