--- loncom/interface/statistics/lonproblemanalysis.pm 2004/11/02 20:45:41 1.102 +++ loncom/interface/statistics/lonproblemanalysis.pm 2004/11/10 16:56:39 1.103 @@ -1,6 +1,6 @@ # The LearningOnline Network with CAPA # -# $Id: lonproblemanalysis.pm,v 1.102 2004/11/02 20:45:41 matthew Exp $ +# $Id: lonproblemanalysis.pm,v 1.103 2004/11/10 16:56:39 matthew Exp $ # # Copyright Michigan State University Board of Trustees # @@ -152,17 +152,8 @@ sub BuildProblemAnalysisPage { $problem_data, \@Students); } elsif ($current_problem->{'resptype'} eq 'numerical') { - ## - ## analyze all responses of a problem at once - my $res = $current_problem->{'resource'}; - foreach my $partid (@{$res->{'parts'}}) { - $current_problem->{'part'} = $partid; - foreach my $respid (@{$res->{'partdata'}->{$partid}->{'ResponseIds'}}) { - $current_problem->{'respid'}=$respid; - &NumericalResponseAnalysis($r,$current_problem, - $problem_data,\@Students); - } - } + &numerical_response_analysis($r,$current_problem, + $problem_data,\@Students); } else { $r->print('

Analysis of '.$current_problem->{'resptype'}.' is not supported

'); } @@ -178,7 +169,6 @@ sub BuildProblemAnalysisPage { } } - ######################################################### ######################################################### ## @@ -186,80 +176,273 @@ sub BuildProblemAnalysisPage { ## ######################################################### ######################################################### -sub NumericalResponseAnalysis { - my ($r,$problem,$problem_data,$Students) = @_; +sub numerical_response_analysis { + my ($r,$problem,$problem_analysis,$students) = @_; my $c = $r->connection(); - my ($resource,$partid,$respid) = ($problem->{'resource'}, - $problem->{'part'}, - $problem->{'respid'}); # - if (scalar(@{$resource->{'parts'}})>1) { - if (@{$resource->{'partdata'}->{$partid}->{'ResponseIds'}}>1) { - $r->print('

'. - &mt('Part [_1], response [_2].',$partid,$respid). - '

'); - } else { - $r->print('

'. - &mt('Part [_1]',$partid,$respid). - '

'); - } - } elsif (@{$resource->{'partdata'}->{$partid}->{'ResponseIds'}}>1) { - $r->print('

'.&mt('Response [_1]',$respid).'

'); + if ($ENV{'form.AnalyzeOver'} !~ /^(tries|time)$/) { + $r->print('Bad request'); } # - my $analysis_html; - my $PerformanceData = &Apache::loncoursedata::get_response_data + my ($resource,$partid,$respid) = ($problem->{'resource'}, + $problem->{'part'}, + $problem->{'respid'}); + # Gather student data + my $response_data = &Apache::loncoursedata::get_response_data (\@Apache::lonstatistics::SelectedSections, $Apache::lonstatistics::enrollment_status, $resource->{'symb'},$respid); - if (! defined($PerformanceData) || - ref($PerformanceData) ne 'ARRAY' ) { - $analysis_html = '

'. - &mt('There is no submission data for this resource'). - '

'; + # + for (my $plot_num = 1;$plot_num<=$ENV{'form.NumPlots'};$plot_num++) { + my $restriction_function; + $restriction_function = sub {($_[0]->{'tries'} == $plot_num?1:0)}; + # + my ($correct,$intervals,$answers) = + &numerical_response_determine_intervals($r,$resource,$partid, + $respid,$students); + if ($c->aborted()) { return; }; + # + my $responses = &classify_response_data($response_data, + $correct, + $restriction_function); + my $student_count = $responses->{'_students'}; + my $correct_count = $responses->{'_correct'}; + my $total_count = $responses->{'_count'}; + my $max = $responses->{'_max'}; + # + my $analysis_html = '

'.&mt('Attempt [_1]',$plot_num).'

'; + $analysis_html.= &numerical_one_dimensional_plot($r,600,150, + scalar(@$students), + $answers); + $analysis_html.= ''. + ''. + ''. + ''.$/.'
'. + &numerical_plot_percent($r,$responses).''. + &numerical_plot_differences($r,$responses).'
'; $r->print($analysis_html); - return; } # - # This next call causes all the waiting around that people complain about - &Apache::lonstathelpers::GetStudentAnswers($r,$problem,$Students, - 'Statistics', - 'stats_status'); - return if ($c->aborted()); - # - # Collate the data - my %Data; - foreach my $student (@$Students) { - my $answer = $student->{'answer'}; - $Data{$answer}++; - } - my @Labels = sort {$a <=> $b } keys(%Data); - my @PlotData = @Data{@Labels}; - # - my $width = 500; - my $height = 100; - my $plot = &one_dimensional_plot($r,500,100,scalar(@$Students), - \@Labels,\@PlotData); - - $r->print($plot); return; } -sub one_dimensional_plot { - my ($r,$width,$height,$N,$Labels,$Data)=@_; +sub numerical_plot_percent { + my ($r,$responses) = @_; # - # Compute data -> image scaling factors - my $min = $Labels->[0]; - my $max = $Labels->[-1]; - if ($max == $min) { - $max =$min+1; + my $total = $responses->{'_count'}; + return '' if ($total == 0); + my $minbin = 0.5; + while (my ($interval,$submissions) = each(%$responses)) { + next if ($interval =~ /^_/); + my ($ans,$ans_low,$ans_high) = split(" ",$interval); + my $low_percent = abs(($ans-$ans_low)/$ans); + my $high_percent = abs(($ans_high-$ans)/$ans); + if ($minbin > $high_percent) { $minbin = $high_percent; } + if ($minbin > $low_percent) { $minbin = $low_percent; } + } + # + my @bins; + if ($minbin < 1) { + @bins = (0.1, ".5",1,1.5,2,2.5,3,4,5,10,20,50,100); + } elsif ($minbin < 2) { + @bins = (0.5,1,1.5,2,2.5,3,4,5,10,20,50,100); + } elsif ($minbin < 5) { + @bins = (1,2,3,4,5,10,25,50,75,100,200); + } elsif ($minbin < 10) { + @bins = (2,4,6,8,10,12,15,20,25,30,50,75,100,200); + } else { + @bins = (5,10,15,20,25,30,50,75,100,200); + } + my @labels = (1..scalar(@bins)); + # + my @correct; + my @incorrect; + while (my ($interval,$submissions) = each(%$responses)) { + next if ($interval =~ /^_/); + my ($ans,$ans_low,$ans_high) = split(" ",$interval); + while (my ($submission,$count) = each(%$submissions)) { + my $scaled_value = abs(($submission-$ans)/$ans); + my $bin=0; + for ($bin=0;$bin<$#bins;$bin++) { # not <= for a reason + last if ($bins[$bin]>$scaled_value); + } + if (($submission <= $ans_high) && ($submission >= $ans_low)) { + $correct[$bin]+=$count; + } else { + $incorrect[$bin]+=$count; + } + } + } + # + for (my $i=0;$i<=$#bins;$i++) { + if (! defined( $correct[$i])) { $correct[$i]=0;} + if (! defined($incorrect[$i])) { $incorrect[$i]=0;} + $correct[$i]*=100/$total; + $incorrect[$i]*=100/$total; + } + my $title = &mt('Distribution by Percent'); + my $graph = &Apache::loncommon::DrawBarGraph + ($title,'Percent difference from correct','Number of answers', + 100,['#33FF00','#FF3300'],\@labels,\@correct,\@incorrect, + {xskip=>1}); + # + my $table = $graph.''. + ''; + for (my $i=0;$i<=$#bins;$i++) { + my $lownum; + if ($i == 0) { + $lownum = 0; + } else { + $lownum = $bins[$i-1]; + } + my $highnum = $bins[$i]; + $table .= + ''. + ''. + ''. + ''. + ''.''.$/; + } + $table.= '
'.&mt('Bar').''.&mt('Range').'
'.$labels[$i].''.$lownum.' - '.$highnum.'
'.$/; + return $table; +} + +sub numerical_plot_differences { + my ($r,$responses) = @_; + # + my $total = $responses->{'_count'}; + return '' if ($total == 0); + my $minbin = undef; + my $maxbin = undef; + while (my ($interval,$submissions) = each(%$responses)) { + next if ($interval =~ /^_/); + my ($ans,$ans_low,$ans_high) = split(" ",$interval); + my $low_diff = abs($ans-$ans_low); + my $high_diff = abs($ans_high-$ans); + if (! defined($maxbin)) { $maxbin = $low_diff;} + if (! defined($minbin)) { $minbin = $low_diff;} + # + if ($minbin > $high_diff) { $minbin = $high_diff; } + if ($minbin > $low_diff ) { $minbin = $low_diff; } + # + if ($maxbin < $high_diff) { $maxbin = $high_diff; } + if ($maxbin < $low_diff ) { $maxbin = $low_diff; } + } + # + my @bins; + my @labels; + # Hmmmm, should switch to absolute difference + for (my $i=1;$i<=20;$i++) { + push(@bins,$i*$minbin/2); + push(@labels,$i); + } + # + my @correct; + my @incorrect; + while (my ($interval,$submissions) = each(%$responses)) { + next if ($interval =~ /^_/); + my ($ans,$ans_low,$ans_high) = split(" ",$interval); + while (my ($submission,$count) = each(%$submissions)) { + my $value = abs($submission-$ans); + my $bin=0; + for ($bin=0;$bin<$#bins;$bin++) { # not <= for a reason + last if ($bins[$bin]>$value); + } + if (($submission <= $ans_high) && ($submission >= $ans_low)) { + $correct[$bin]+=$count; + } else { + $incorrect[$bin]+=$count; + } + } } - my $h_scale = ($width-10)/($max-$min); # + for (my $i=0;$i<=$#bins;$i++) { + if (! defined( $correct[$i])) { $correct[$i]=0;} + if (! defined($incorrect[$i])) { $incorrect[$i]=0;} + $correct[$i]*=100/$total; + $incorrect[$i]*=100/$total; + } + my $title = &mt('Distribution by Magnitude'); + my $graph = &Apache::loncommon::DrawBarGraph + ($title,'magnitude difference from correct','Number of answers', + 100,['#33FF00','#FF3300'],\@labels,\@correct,\@incorrect,{xskip=>1}); + # + my $table = $graph.''. + ''; + for (my $i=0;$i<=$#bins;$i++) { + my $lownum; + if ($i == 0) { + $lownum = 0; + } else { + $lownum = $bins[$i-1]; + } + my $highnum = $bins[$i]; + $table .= + ''. + ''. + ''. + ''. + ''.''.$/; + } + $table.= '
'.&mt('Bar').''.&mt('Range').'
'.$labels[$i].''.$lownum.' - '.$highnum.'
'.$/; + return $table; +} + +sub numerical_response_determine_intervals { + my ($r,$resource,$partid,$respid,$students)=@_; + my $c = $r->connection(); + # + # FIX ME: Need progress dialog updates + # + # Read in the cache (if it exists) before we start timing things. + &Apache::lonstathelpers::ensure_proper_cache($resource->{'symb'}); + # + my $correct; + # %intervals differs from %answers because it may be possible for two + # students to have the same correct answer but different intervals. + my %intervals; + my %answers; + foreach my $student (@$students) { + last if ($c->aborted()); + my $sname = $student->{'username'}; + my $sdom = $student->{'domain'}; + # analyze problem + my $analysis = + &Apache::lonstathelpers::analyze_problem_as_student($resource, + $sname, + $sdom); + # make the key + my $key = $partid.'.'.$respid; + my $interval = join(' ',($analysis->{$key.'.answer'}->[0], + $analysis->{$key.'.ans_low'}->[0], + $analysis->{$key.'.ans_high'}->[0])); + $correct->{$sname.':'.$sdom} = $interval; + $intervals{$interval}++; + $answers{$analysis->{$key.'.answer'}->[0]}++; + } + &Apache::lonstathelpers::write_analysis_cache(); + return ($correct,\%intervals,\%answers); +} + +sub numerical_one_dimensional_plot { + my ($r,$width,$height,$n,$data)=@_; + # + # Compute data -> image scaling factors my $max_y = 0; - foreach (@$Data) { - $max_y = $_ if ($max_y < $_); + my $min_x = undef; + my $max_x = undef; + while (my ($answer,$count) = each(%$data)) { + $max_y = $count if ($max_y < $count); + if (! defined($min_x) || $answer < $min_x) { + $min_x = $answer; + } + if (! defined($max_x) || $answer > $max_x) { + $max_x = $answer; + } } + # + my $h_scale = ($width-10)/($max_x-$min_x); + # my $ticscale = 5; if ($max_y * $ticscale > $height/2) { $ticscale = int($height/2/$max_y); @@ -269,17 +452,16 @@ sub one_dimensional_plot { # Create the plot my $plot = qq{}; - for (my $idx=0;$idx[$idx] - $min); - my $top = $height/2-$Data->[$idx]*$ticscale; - my $bottom = $height/2+$Data->[$idx]*$ticscale; - $plot .= - &line($xloc,$top,$xloc,$bottom,'888888',1); + while (my ($answer,$count) = each(%$data)) { + my $xloc = 5+$h_scale*($answer - $min_x); + my $top = $height/2-$count*$ticscale; + my $bottom = $height/2+$count*$ticscale; + $plot .= &line($xloc,$top,$xloc,$bottom,'888888',1); } # # Put the scale on last to ensure it is on top of the data. - if ($min < 0 && $max > 0) { - my $circle_x = 5+$h_scale*abs($min); # '0' in data coordinates + if ($min_x < 0 && $max_x > 0) { + my $circle_x = 5+$h_scale*abs($min_x); # '0' in data coordinates my $r = 4; $plot .= &line(5,$height/2,$circle_x-$r,$height/2,'000000',1); $plot .= &circle($circle_x,$height/2,$r+1,'000000'); @@ -290,20 +472,18 @@ sub one_dimensional_plot { $plot .= ''; my $plotresult = &Apache::lonxml::xmlparse($r,'web',$plot); - my $title = 'Distribution of correct answers'; my $result = ''. ''. ''. - ''. + ''. ''. - ''. + ''. ''. - ''. '
'. - ''.$title.' (N='.$N.')'. - ''. + ''.&mt('Distribution of correct answers').''. + '
'.&mt('[_1] students, [_2] distinct correct answers', + $n,scalar(keys(%$data))). + '
'.&mt('Maximum number of coinciding values: [_1]',$max_y). '
'.$min.''.$min_x.''.$plotresult.''.$max.''.$max_x.'
'. - 'Maximum Number of Coinciding Values: '.$max_y. - '
'; return $result; } @@ -313,7 +493,7 @@ sub one_dimensional_plot { ## These should probably go somewhere more suitable soon. sub line { my ($x1,$y1,$x2,$y2,$color,$thickness) = @_; - return qq{$/}; + return qq{}; } sub text { @@ -486,8 +666,8 @@ sub radio_response_analysis { $no_data_text = 'No data for [_5] to [_6]'; } my $foil_choice_data = - &RR_classify_response_data($response_data,$correct, - $restriction_function); + &classify_response_data($response_data,$correct, + $restriction_function); # &Apache::lonstathelpers::log_hash_ref($foil_choice_data); my $answers; if (ref($correct)) { @@ -758,12 +938,22 @@ sub RR_create_stacked_selection_plot { return ($graph,\%count_per_foil); } + +######################################################### +######################################################### +## +## Misc routines +## +######################################################### +######################################################### + # if $correct is a hash ref, it is assumed to be indexed by student names. # the values are assumed to be hash refs with a key of 'answer'. -sub RR_classify_response_data { +sub classify_response_data { my ($full_row_data,$correct,$function) = @_; my %submission_data; my %students; + my $max=0; foreach my $row (@$full_row_data) { my %subm = &hashify_attempt($row); if (ref($correct) eq 'HASH') { @@ -775,12 +965,17 @@ sub RR_classify_response_data { if (&$function(\%subm)) { $students{$subm{'student'}}++; $submission_data{'_count'}++; + if (&submission_is_correct($subm{'award'})) { $submission_data{'_correct'}++; } - $submission_data{$subm{'correct'}}->{$subm{'submission'}}++; + + if($max<++$submission_data{$subm{'correct'}}->{$subm{'submission'}}) { + $max=$submission_data{$subm{'correct'}}->{$subm{'submission'}}; + } } } + $submission_data{'_max'} = $max; $submission_data{'_students'}=scalar(keys(%students)); return \%submission_data; }