--- loncom/interface/lonsearchcat.pm 2004/05/05 17:29:06 1.223 +++ loncom/interface/lonsearchcat.pm 2005/02/02 21:27:28 1.230.2.2 @@ -1,7 +1,7 @@ # The LearningOnline Network with CAPA # Search Catalog # -# $Id: lonsearchcat.pm,v 1.223 2004/05/05 17:29:06 matthew Exp $ +# $Id: lonsearchcat.pm,v 1.230.2.2 2005/02/02 21:27:28 albertel Exp $ # # Copyright Michigan State University Board of Trustees # @@ -75,6 +75,7 @@ use Apache::lonhtmlcommon; use Apache::lonlocal; use LONCAPA::lonmetadata(); use HTML::Entities(); +use Parse::RecDescent; ###################################################################### ###################################################################### @@ -174,6 +175,7 @@ sub handler { '&launch='.$ENV{'form.launch'}. '&mode='.$ENV{'form.mode'}, text=>"Course and Catalog Search", + target=>'_top', bug=>'Searching',}); } else { &Apache::lonhtmlcommon::add_breadcrumb @@ -182,6 +184,7 @@ sub handler { '&launch='.$ENV{'form.launch'}. '&mode='.$ENV{'form.mode'}, text=>"Catalog Search", + target=>'_top', bug=>'Searching',}); } # @@ -335,6 +338,29 @@ END &course_search($r); } elsif(($ENV{'form.phase'} eq 'basic_search') || ($ENV{'form.phase'} eq 'adv_search')) { + # + # We are running a search, try to parse it + my ($query,$customquery,$customshow,$libraries) = + (undef,undef,undef,undef); + my $pretty_string; + if ($ENV{'form.phase'} eq 'basic_search') { + ($query,$pretty_string,$libraries) = + &parse_basic_search($r,$closebutton,$hidden_fields); + return OK if (! defined($query)); + &make_persistent({ basicexp => $ENV{'form.basicexp'}}, + $persistent_db_file); + } else { # Advanced search + ($query,$customquery,$customshow,$libraries,$pretty_string) + = &parse_advanced_search($r,$closebutton,$hidden_fields); + return OK if (! defined($query)); + } + &make_persistent({ query => $query, + customquery => $customquery, + customshow => $customshow, + libraries => $libraries, + pretty_string => $pretty_string }, + $persistent_db_file); + # # Set up table if (! defined(&create_results_table())) { my $errorstring=&Apache::lonmysql::get_error(); @@ -362,29 +388,12 @@ Unable to properly store search informat END return OK; } - # - # We are running a search - my ($query,$customquery,$customshow,$libraries) = - (undef,undef,undef,undef); - my $pretty_string; - if ($ENV{'form.phase'} eq 'basic_search') { - ($query,$pretty_string,$libraries) = - &parse_basic_search($r,$closebutton,$hidden_fields); - } else { # Advanced search - ($query,$customquery,$customshow,$libraries,$pretty_string) - = &parse_advanced_search($r,$closebutton,$hidden_fields); - return OK if (! defined($query)); - } - &make_persistent({ query => $query, - customquery => $customquery, - customshow => $customshow, - libraries => $libraries, - pretty_string => $pretty_string }, - $persistent_db_file); ## ## Print out the frames interface ## - &print_frames_interface($r); + if (defined($query)) { + &print_frames_interface($r); + } } return OK; } @@ -439,12 +448,22 @@ my %alreadyseen; my %hash; my $totalfound; +sub make_symb { + my ($id)=@_; + my ($mapid,$resid)=split(/\./,$id); + my $map=$hash{'map_id_'.$mapid}; + my $res=$hash{'src_'.$id}; + my $symb=&Apache::lonnet::encode_symb($map,$resid,$res); + return $symb; +} + sub course_search { my $r=shift; my $bodytag=&Apache::loncommon::bodytag('Course Search'); my $pretty_search_string = ''.$ENV{'form.courseexp'}.''; my $search_string = $ENV{'form.courseexp'}; my @New_Words; + undef(%alreadyseen); if ($ENV{'form.crsrelated'}) { ($search_string,@New_Words) = &related_version($ENV{'form.courseexp'}); if (@New_Words) { @@ -460,16 +479,18 @@ sub course_search { $bodytag.'
'.
- &Apache::lonhtmlcommon::textbox('basicexp',
- $ENV{'form.basicexp'},50).' '. + &Apache::lonhtmlcommon::textbox + ('basicexp', + &HTML::Entities::encode($ENV{'form.basicexp'},'<>&"'),50 + ). + ' '. ''.&searchhelp().''.' | '.
''.
' '. @@ -609,12 +649,6 @@ sub print_basic_search_form { ' | '.
'||||||
'. -# ''. -# $userelatedwords.(' 'x3). -# $onlysearchdomain.(' 'x2).$adv_search_link. -# ''. -# ' | |||||||
'.
''.
' 'Search',
'reset' => 'Reset',
'help' => 'Help');
@@ -774,7 +807,7 @@ ENDHEADER
$scrout.=''.
&titlefield(&mt('Copyright/Distribution')).' | '.
&Apache::lonmeta::selectbox('copyright',
- '',,
+ $ENV{'form.copyright'},
\&Apache::loncommon::copyrightdescription,
( undef,
&Apache::loncommon::copyrightids)
@@ -782,7 +815,7 @@ ENDHEADER
$scrout.=' | '.
&titlefield(&mt('Language')).' | '.
&Apache::lonmeta::selectbox('language',
- 'notset',,
+ $ENV{'form.language'},
\&Apache::loncommon::languagedescription,
('any',&Apache::loncommon::languageids)
).' | \n"; + my $pretty_search_string = ""; # Clean up fields for safety for my $field (@BasicFields, 'creationdatestart_month','creationdatestart_day', @@ -1204,7 +1237,6 @@ sub parse_advanced_search { 'lastrevisiondatestart_year','lastrevisiondateend_month', 'lastrevisiondateend_day','lastrevisiondateend_year') { $ENV{'form.'.$field}=~s/[^\w\/\s\(\)\=\-\"\']//g; - $ENV{'form.'.$field}=~s/(not\s*$|^\s*(and|or)|)//gi; } foreach ('mode','form','element') { # is this required? Hmmm. @@ -1248,22 +1280,30 @@ sub parse_advanced_search { my $font = ''; # Evaluate logical expression AND/OR/NOT phrase fields. foreach my $field (@BasicFields) { - if ($ENV{'form.'.$field}) { - my $searchphrase = $ENV{'form.'.$field}; - $pretty_search_string .= $font."$field contains ". - $searchphrase.""; + next if (!defined($ENV{'form.'.$field}) || $ENV{'form.'.$field} eq ''); + my ($error,$SQLQuery) = + &process_phrase_input($ENV{'form.'.$field}, + $ENV{'form.'.$field.'_related'},$field); + if (defined($error)) { + &output_unparsed_phrase_error($r,$closebutton,'phase=disp_adv', + $hidden_fields,$field); + return; + } else { + $pretty_search_string .= + $font.$field.': '.$ENV{'form.'.$field}; if ($ENV{'form.'.$field.'_related'}) { - my @New_Words; - ($searchphrase,@New_Words) = &related_version($searchphrase); - if (@New_Words) { - $pretty_search_string .= " with related words: ". - "@New_Words."; + my @Words = + &Apache::loncommon::get_related_words + ($ENV{'form.'.$field}); + if (@Words) { + $pretty_search_string.= ' with related words: '. + join(', ',@Words[0..4]); } else { - $pretty_search_string .= " with no related words."; + $pretty_search_string.= ' with related words.'; } } - $pretty_search_string .= " \n"; - push @queries,&build_SQL_query($field,$searchphrase); + $pretty_search_string .= ' '; + push (@queries,$SQLQuery); } } # @@ -1280,7 +1320,8 @@ sub parse_advanced_search { } } if (defined($searchphrase)) { - push @queries,&build_SQL_query('mime',$searchphrase); + my ($error,$SQLsearch) = &process_phrase_input($searchphrase,0,'mime'); + push @queries,$SQLsearch; $pretty_search_string .=$font.'mime contains '. $searchphrase.' '; } @@ -1312,7 +1353,7 @@ sub parse_advanced_search { push @queries,"(copyright like \"$ENV{'form.copyright'}\")"; $pretty_search_string.=$font."copyright = ". &Apache::loncommon::copyrightdescription($ENV{'form.copyright'}). - " \n"; + " \n"; } # # Statistics @@ -1400,11 +1441,11 @@ sub parse_advanced_search { $pretty_search_string .= $pretty_domains_string." \n"; # if (@queries) { - $query="select * from metadata where ".join(" AND ",@queries); + $query="SELECT * FROM metadata WHERE (".join(") AND (",@queries).')'; } elsif ($customquery) { $query = ''; } -# &Apache::lonnet::logthis('query = '.$/.$query); + # &Apache::lonnet::logthis('query = '.$/.$query); return ($query,$customquery,$customshow,$libraries_to_query, $pretty_search_string); } @@ -1466,7 +1507,7 @@ sub parse_basic_search { # # Clean up fields for safety for my $field ('basicexp') { - $ENV{"form.$field"}=~s/[^\w\s\(\)\-]//g; + $ENV{"form.$field"}=~s/[^\w\s\'\"\!\(\)\-]//g; } foreach ('mode','form','element') { # is this required? Hmmm. @@ -1479,39 +1520,220 @@ sub parse_basic_search { # # Check to see if enough of a query is filled in my $search_string = $ENV{'form.basicexp'}; - $search_string =~ s/(not\s*$|^\s*(and|or)|)//gi; if (! &filled($search_string)) { &output_blank_field_error($r,$closebutton,'phase=disp_basic'); return OK; } - my $pretty_search_string = ''.$ENV{'form.basicexp'}.''; - if ($ENV{'form.related'}) { - my @New_Words; - ($search_string,@New_Words) = &related_version($ENV{'form.basicexp'}); - if (@New_Words) { - $pretty_search_string .= " with related words: @New_Words."; - } else { - $pretty_search_string .= " with no related words."; - } + my $pretty_search_string=$search_string; + my @Queries; + my $searchfield = 'concat_ws(" ",'.join(',', + ('title','author','subject', + 'notes','abstract','keywords') + ).')'; + my ($error,$SQLQuery) = &process_phrase_input($search_string, + $ENV{'form.related'}, + $searchfield); + if ($error) { + &output_unparsed_phrase_error($r,$closebutton,'phase=disp_basic', + '','basicexp'); + return; } + push(@Queries,$SQLQuery); + #foreach my $q (@Queries) { + # &Apache::lonnet::logthis(' '.$q); + #} + my $final_query = 'SELECT * FROM metadata WHERE '.join(" AND ",@Queries); # - # Build SQL query string based on form page - my $query=''; - my $concatarg=join(',', - ('title', 'author', 'subject', 'notes', 'abstract', - 'keywords')); - $concatarg='title' if $ENV{'form.titleonly'}; - $query=&build_SQL_query('concat_ws(" ",'.$concatarg.')',$search_string); if (defined($pretty_domains_string) && $pretty_domains_string ne '') { $pretty_search_string .= ' '.$pretty_domains_string; } $pretty_search_string .= " \n"; - my $final_query = 'SELECT * FROM metadata WHERE '.$query; + $pretty_search_string =~ s:^ and ::; # &Apache::lonnet::logthis($final_query); return ($final_query,$pretty_search_string, $libraries_to_query); } + +############################################################### +############################################################### + +my @Phrases; + +sub concat { + my ($item) = @_; + my $results = ''; + foreach (@$item) { + if (ref($_) eq 'ARRAY') { + $results .= join(' ',@$_); + } + } + return $results; +} + +sub process_phrase_input { + my ($phrase,$related,$field)=@_; + #&Apache::lonnet::logthis('phrase = :'.$phrase.':'); + my $grammar = <<'ENDGRAMMAR'; + searchphrase: + expression /^\Z/ { + # &Apache::lonsearchcat::print_item(\@item,0); + [@item]; + } + expression: + phrase(s) { + [@item]; + } + phrase: + orword { + [@item]; + } + | andword { + [@item]; + } + | minusword { + unshift(@::Phrases,$item[1]->[0]); + unshift(@::Phrases,$item[1]->[1]); + [@item]; + } + | word { + unshift(@::Phrases,$item[1]); + [@item]; + } + # + orword: + word 'OR' phrase { + unshift(@::Phrases,'OR'); + unshift(@::Phrases,$item[1]); + [@item]; + } + | word 'or' phrase { + unshift(@::Phrases,'OR'); + unshift(@::Phrases,$item[1]); + [@item]; + } + | minusword 'OR' phrase { + unshift(@::Phrases,'OR'); + unshift(@::Phrases,$item[1]->[0]); + unshift(@::Phrases,$item[1]->[1]); + [@item]; + } + | minusword 'or' phrase { + unshift(@::Phrases,'OR'); + unshift(@::Phrases,$item[1]->[0]); + unshift(@::Phrases,$item[1]->[1]); + [@item]; + } + andword: + word phrase { + unshift(@::Phrases,'AND'); + unshift(@::Phrases,$item[1]); + [@item]; + } + | minusword phrase { + unshift(@::Phrases,'AND'); + unshift(@::Phrases,$item[1]->[0]); + unshift(@::Phrases,$item[1]->[1]); + [@item]; + } + # + minusword: + '-' word { + [$item[2],'NOT']; + } + word: + "'" term(s) "'" { + &Apache::lonsearchcat::concat(\@item); + } + | '"' term(s) '"' { + &Apache::lonsearchcat::concat(\@item); + } + | term { + $item[1]; + } + term: + /[\w\Q:!@#$%^&*()+_=|{}<>,.;\\\/?\E]+/ { + $item[1]; + } +ENDGRAMMAR + # + # The end result of parsing the phrase with the grammar is an array + # @::Phrases. + # $phrase = "gene splicing" or cat -> "gene splicing","OR","cat" + # $phrase = "genetic engineering" -dna -> + # "genetic engineering","AND","NOT","dna" + # $phrase = cat or dog -poodle -> "cat","OR","dog","AND","NOT","poodle" + undef(@::Phrases); + my $p = new Parse::RecDescent($grammar); + if (! defined($p->searchphrase($phrase))) { + &Apache::lonnet::logthis('lonsearchcat:unable to process:'.$phrase); + return 'Unable to process phrase '.$phrase; + } + # + # Go through the phrases and make sense of them. + # Apply modifiers NOT OR and AND to the phrases. + my @NewPhrases; + while(@::Phrases) { + my $phrase = shift(@::Phrases); + # &Apache::lonnet::logthis('phrase = '.$phrase); + my $phrasedata; + if ($phrase =~ /^(NOT|OR|AND)$/) { + if ($phrase eq 'OR') { + $phrasedata->{'or'}++; + if (! @::Phrases) { $phrasedata = undef; last; } + $phrase = shift(@::Phrases); + } elsif ($phrase eq 'AND') { + $phrasedata->{'and'}++; + if (! @::Phrases) { $phrasedata = undef; last; } + $phrase = shift(@::Phrases); + } + if ($phrase eq 'NOT') { + $phrasedata->{'negate'}++; + if (! @::Phrases) { $phrasedata = undef; last; } + $phrase = shift(@::Phrases); + } + } + $phrasedata->{'phrase'} = $phrase; + if ($related) { + my @NewWords; + (undef,@NewWords) = &related_version($phrasedata->{'phrase'}); + $phrasedata->{'related_words'} = \@NewWords; + } + push(@NewPhrases,$phrasedata); + } + # + # Actually build the sql query from the phrases + my $SQLQuery; + foreach my $phrase (@NewPhrases) { + my $query; + if ($phrase->{'negate'}) { + $query .= $field.' NOT LIKE "%'.$phrase->{'phrase'}.'%"'; + } else { + $query .= $field.' LIKE "%'.$phrase->{'phrase'}.'%"'; + } + foreach my $related (@{$phrase->{'related_words'}}) { + if ($phrase->{'negate'}) { + $query .= ' AND '.$field.' NOT LIKE "%'.$related.'%"'; + } else { + $query .= ' OR '.$field.' LIKE "%'.$related.'%"'; + } + } + if ($SQLQuery) { + if ($phrase->{'or'}) { + $SQLQuery .= ' OR ('.$query.')'; + } else { + $SQLQuery .= ' AND ('.$query.')'; + } + } else { + $SQLQuery = '('.$query.')'; + } + } + # + # &Apache::lonnet::logthis("SQLQuery = $SQLQuery"); + # + return undef,$SQLQuery; +} + ###################################################################### ###################################################################### @@ -1530,45 +1752,15 @@ Note: Using this twice on a string is pr ###################################################################### ###################################################################### sub related_version { - my $search_string = shift; - my $result = $search_string; - my %New_Words = (); - while ($search_string =~ /(\w+)/cg) { - my $word = $1; - next if (lc($word) =~ /\b(or|and|not)\b/); - my @Words = &Apache::loncommon::get_related_words($word); - @Words = ($#Words>4? @Words[0..4] : @Words); - foreach (@Words) { $New_Words{$_}++;} - my $replacement = join " OR ", ($word,@Words); - $result =~ s/(\b)$word(\b)/$1($replacement)$2/g; - } - return $result,sort(keys(%New_Words)); + my ($word) = @_; + return (undef) if (lc($word) =~ /\b(or|and|not)\b/); + my @Words = &Apache::loncommon::get_related_words($word); + # Only use 4 related words + @Words = ($#Words>4? @Words[0..4] : @Words); + my $result = join " OR ", ($word,@Words); + return $result,sort(@Words); } -###################################################################### -###################################################################### - -=pod - -=item &build_SQL_query() - -Builds a SQL query string from a logical expression with AND/OR keywords -using Text::Query and &recursive_SQL_query_builder() - -=cut - -###################################################################### -###################################################################### -sub build_SQL_query { - my ($field_name,$logic_statement)=@_; - my $q=new Text::Query('abc', - -parse => 'Text::Query::ParseAdvanced', - -build => 'Text::Query::Build'); - $q->prepare($logic_statement); - my $matchexp=${$q}{'matchexp'}; chomp $matchexp; - my $sql_query=&recursive_SQL_query_build($field_name,$matchexp); - return $sql_query; -} ###################################################################### ###################################################################### @@ -1603,47 +1795,6 @@ sub build_custommetadata_query { return $matchexp; } -###################################################################### -###################################################################### - -=pod - -=item &recursive_SQL_query_build() - -Recursively constructs an SQL query. Takes as input $dkey and $pattern. - -=cut - -###################################################################### -###################################################################### -sub recursive_SQL_query_build { - my ($dkey,$pattern)=@_; - my @matches=($pattern=~/(\[[^\]|\[]*\])/g); - return $pattern unless @matches; - foreach my $match (@matches) { - $match=~/\[ (\w+)\s(.*) \]/; - my ($key,$value)=($1,$2); - my $replacement=''; - if ($key eq 'literal') { - $replacement="($dkey LIKE \"\%$value\%\")"; - } elsif (lc($key) eq 'not') { - $value=~s/LIKE/NOT LIKE/; -# $replacement="($dkey not like $value)"; - $replacement="$value"; - } elsif ($key eq 'and') { - $value=~/(.*[\"|\)]) ([|\(|\^].*)/; - $replacement="($1 AND $2)"; - } elsif ($key eq 'or') { - $value=~/(.*[\"|\)]) ([|\(|\^].*)/; - $replacement="($1 OR $2)"; - } - substr($pattern, - index($pattern,$match), - length($match), - $replacement); - } - &recursive_SQL_query_build($dkey,$pattern); -} ###################################################################### ###################################################################### @@ -1836,7 +1987,11 @@ a link to change the search query. ###################################################################### sub print_sort_form { my ($r,$pretty_query_string) = @_; - my $bodytag=&Apache::loncommon::bodytag(undef,undef,undef,1); + my $bodytag=&Apache::loncommon::bodytag(undef,undef,undef,1). + &Apache::lonhtmlcommon::breadcrumbs + (undef,'Searching','Searching',undef,undef, + $ENV{'form.catalogmode'} ne 'groupsearch'); + ## my %SortableFields=&Apache::lonlocal::texthash( id => 'Default', @@ -2102,6 +2257,9 @@ results into MySQL. sub run_search { my ($r,$query,$customquery,$customshow,$serverlist,$pretty_string) = @_; my $bodytag=&Apache::loncommon::bodytag(undef,undef,undef,1); + $bodytag.=&Apache::lonhtmlcommon::breadcrumbs + (undef,'Searching','Searching',undef,undef, + $ENV{'form.catalogmode'} ne 'groupsearch'); my $connection = $r->connection; # # Print run_search header @@ -2113,12 +2271,16 @@ $bodytag |