--- loncom/homework/lonr.pm	2009/04/17 00:34:51	1.1
+++ loncom/homework/lonr.pm	2009/08/12 15:30:16	1.7
@@ -1,7 +1,7 @@
 # The LearningOnline Network with CAPA
 # Interface routines to R CAS
 #
-# $Id: lonr.pm,v 1.1 2009/04/17 00:34:51 www Exp $
+# $Id: lonr.pm,v 1.7 2009/08/12 15:30:16 www Exp $
 #
 # Copyright Michigan State University Board of Trustees
 #
@@ -34,6 +34,173 @@ use Apache::lonnet;
 use Apache::response();
 use LONCAPA;
 
+### You need to install the libraries below for this to work!
+
+###use Tie::IxHash::Easy; # autoties all subhashes to keep index order
+###use Data::Dumper;  # used to output hash contents
+
+my $errormsg='';
+
+#
+# Rcroak: for use with R-error messages
+#
+sub Rcroak {
+   $errormsg=$_[0];
+}
+
+#
+#
+# Rpeel takes a string containing serialized values from R, 
+# peels off the first syntactically complete unit (number, string or array),
+# and returns a list (first unit, remainder).
+#
+sub Rpeel {
+        my $x = $_[0];  # the string containing the serialized R object(s)
+        if ($x =~ /^((?:i|d):(.+?);)(.*)$/) {
+                return ($1, $+);  # x starts with a number
+        }
+        elsif ($x =~ /^s:(\d+):/) {
+                my $n = $1;  # x starts with a string of length n
+                if ($x =~ /^(s:\d+:\"(.{$n})\";)(.*)$/) {
+                        return ($1, $+);  # x starts with a valid string
+                } else {
+                        &Rcroak('invalid string detected');
+                }
+        }
+        elsif ($x =~ /^a:/) {
+                # x starts with an array -- need to find the closing brace
+                my $i = index $x, '{', 0;  # position of first opening brace
+                if ($i < 0) {
+                        &Rcroak('array with no opening brace');
+                }
+                my $open = 1;  # counts open braces
+                my $j = index $x, '}', $i; # position of first closing brace
+                $i = index $x, '{', $i + 1; # position of next opening brace (if any)
+                my $pos = -1;  # position of final closing brace
+                do {
+                        if (($i < $j) && ($i > 0)) {
+                                # encounter another opening brace before next closing brace
+                                $open++;
+                                $i = index $x, '{', $i + 1;  # find the next opening brace
+                        } elsif ($j > 0) {
+                                # next brace encountered is a closing brace
+                                $open--;
+                                $pos = $j;
+                                $j = index $x, '}', $j + 1;
+                        } else {
+                                &Rcroak('unmatched left brace');
+                        }
+                } until ($open eq 0);
+                # array runs from start to $pos
+                my $a = substr $x, 0, $pos + 1;  # array
+                my $b = substr $x, $pos + 1;     # remainder
+                return ($a, $b);
+        } else {
+                &Rcroak('unrecognized R value');
+        }
+}
+# --- end Rpeel ---
+
+#
+# Rreturn accepts a string containing a serialized R object
+# and returns either the object's value (if it is scalar) or a reference
+# to a hash containing the contents of the object.  Any null keys in the hash
+# are replaced by 'resultNNN' where NNN is the index of the entry in the original
+# R array.
+#
+sub Rreturn {
+    my $x = $_[0];  # the string containing the serialized R object(s)
+    $x=~s/^\"//;
+    $x=~s/\"$//;
+    $x=~s/\\\"/\"/g;
+    $errormsg='';
+    if ($x =~ /^(?:i|d):(.+?);$/) {
+        return $1;  # return the value of the number
+    } elsif ($x =~ /^s:(\d+):\"(.*)\";$/) {
+        # string -- verify the length
+        if (length($2) eq $1) {
+           return $2;  # return the string
+        } else {
+           return 'mismatch in string length';
+        }
+    } elsif ($x =~ /^a:(\d+):\{(.*)\}$/) {
+        # array
+        my $dim = $1;  # array size
+        $x = $2;  # array contents
+        tie(my %h,'Tie::IxHash::Easy'); # start a hash
+        keys(%h) = $dim; # allocate space for the hash
+        my $key;
+        my $y;
+        for (my $i = 0; $i < $dim; $i++) {
+           ($y, $x) = &Rpeel($x);  # strip off the entry for the key
+           if ($y eq '') {
+              &Rcroak('ran out of keys');
+           }
+           $key = &Rreturn($y);
+           if ($key eq '') {
+              $key = "result$i";  # correct null key
+           }
+           ($y, $x) = &Rpeel($x);  # strip off the value
+           if ($y eq '') {
+               &Rcroak('ran out of values');
+           }
+           if ($y =~ /^a:/) {
+               $h{$key} = \&Rreturn($y);  # array value: store as reference
+           } else {
+               $h{$key} = &Rreturn($y);  # scalar value: store the entry in the hash
+           }
+        }
+        if ($errormsg) { return $errormsg; }
+        return \%h;  # return a reference to the hash
+    } else {
+        return 'Unrecognized output';
+    }
+}
+# --- end Rreturn ---
+
+sub Rentry {
+    my $hash = shift;  # pointer to tied hash
+    my $i;
+    if (ref($hash) ne 'HASH') {
+       return 'Argument to cas_hashref_entry is not a hash!';
+    }
+    while ($i = shift) {
+       if (exists($hash->{$i})) {
+          $hash = $hash->{$i};
+       } else {
+          return undef;
+       }
+       if (ref($hash) eq 'REF') {
+          $hash = $$hash;  # dereference one layer
+       } elsif (ref($hash) ne 'HASH') {
+          return $hash;  # drilled down to a scalar
+       }
+    }
+}
+
+sub Rarray {
+    my $hash = shift;  # pointer to tied hash
+    my $i;
+    if (ref($hash) ne 'HASH') {
+       return 'Argument to cas_hashref_array is not a hash!';
+    }
+    while ($i = shift) {
+       if (exists($hash->{$i})) {
+          $hash = $hash->{$i};
+       } else {
+          return undef;
+       }
+       if (ref($hash) eq 'REF') {
+          $hash = $$hash;  # dereference one layer
+       }
+    }
+    my @returnarray=();
+    foreach my $key (keys(%{$hash})) {
+        $returnarray[$key-1]=$$hash{$key};
+    }
+    return @returnarray;
+}
+
 sub connect {
    return IO::Socket::UNIX->new(Peer    => $Apache::lonnet::perlvar{'lonSockDir'}.'/rsock',
 				Type    => SOCK_STREAM,
@@ -61,11 +228,15 @@ sub rreply {
 sub blacklisted {
     my ($cmd)=@_;
     foreach my $forbidden (
-        '\? ','\?','%i\d+','%o','batch','block'
-       ,'compil','concat','describe','display2d','file','inchar'
-       ,'includ','lisp','load','outchar','plot','quit'
-       ,'read','reset','save','stin','stout','stringout'
-       ,'system','translat','ttyoff','with_stdout','writefile'
+        'read','write','scan','save','socket','connections',
+        'open','close',
+        'plot','X11','windows','quartz',
+        'postscript','pdf','png','jpeg',
+        'dev\.list','dev\.next','dev\.prev','dev\.set',
+        'dev\.off','dev\.copy','dev\.print','graphics',
+        'library','package','source','sink','objects',
+        'Sys\.','unlink','file\.','on\.exit','error',
+        'q\(\)'
      ) {
 	if ($cmd=~/$forbidden/s) { return 1; }
     } 
@@ -73,12 +244,9 @@ sub blacklisted {
 }
 
 sub r_allowed_libraries {
-   return (
-      "absimp","affine","atensor","atrig1","augmented_lagrangian","contrib_ode","ctensor","descriptive","diag",
-      "eigen","facexp","fft","fourie","functs","ggf","grobner","impdiff","ineq","interpol","itensor","lapack",
-      "lbfgs","lindstedt","linearalgebra","lsquares","makeOrders","mnewton","mchrpl","ntrig","orthopoly",
-      "quadpack","rducon","romberg","scifac","simplex","solve_rec","sqdnst","stats","sterling","sym","units",
-      "vect","zeilberger");
+   return ('boot','class','cluster','datasets','KernSmooth','MASS',
+           'methods','mgcv','nlme','nnet','rpart','spatial',
+           'splines','stats','stats4','survival');
 }
 
 sub r_is_allowed_library {
@@ -105,7 +273,7 @@ sub runscript {
           }
        }
     }
-    foreach my $line (split(/\;/s,$fullscript)) {
+    foreach my $line (split(/[\n\r]+/s,$fullscript)) {
 	if ($line=~/\w/) { $reply=&rreply($socket,$line.";\n"); }
 	if ($reply=~/^Error\:/) { return $reply; }
     }
@@ -115,6 +283,35 @@ sub runscript {
     return $reply;
 }
 
+sub runserializedscript {
+    my ($socket,$fullscript,$libraries)=@_;
+    if (&blacklisted($fullscript)) { return 'Error: blacklisted'; }
+    my $reply;
+    $fullscript=~s/[\n\r\l]//gs;
+    if ($libraries) {
+       foreach my $library (split(/\s*\,\s*/,$libraries)) {
+          unless ($library=~/\w/) { next; }
+          if (&r_is_allowed_library($library)) {
+              $reply=&rreply($socket,'library('.$library.');'."\n");
+              if ($reply=~/^Error\:/) { return($reply,$reply); }
+          } else {
+             return 'Error: blacklisted';
+          }
+       }
+    }
+    my @actuallines=();
+    foreach my $line (split(/\;/s,$fullscript)) {
+        if ($line=~/\w/) { push (@actuallines,$line); }
+    }
+    for (my $i=0; $i<$#actuallines; $i++) {
+        $reply=&rreply($socket,$actuallines[$i].";\n");
+        if ($reply=~/^Error\:/) { return($reply,$reply); }
+    }
+# The last line needs to be serialized
+    $reply=&Rreturn(&rreply($socket,"phpSerialize($actuallines[-1]);\n"));
+    return($reply,&Dumper($reply));
+}
+
 sub r_cas_formula_fix {
    my ($expression)=@_;
    return &Apache::response::implicit_multiplication($expression);
@@ -143,11 +340,17 @@ sub r_run {
 }
 
 sub r_eval {
-    my ($script,$libraries) = @_;
+    my ($script,$libraries,$hashflag) = @_;
     my $socket=&connect();
-    my $reply=&runscript($socket,$script,$libraries);
+    my $reply;
+    my $dump='';
+    if ($hashflag) {
+       ($reply,$dump)=&runserializedscript($socket,$script,$libraries);
+    } else {
+       $reply=&runscript($socket,$script,$libraries);
+    }
     &disconnect($socket);
-    return $reply;
+    return ($reply,$dump);
 }