Annotation of loncom/metadata_database/parse_activity_log.pl, revision 1.1
1.1 ! matthew 1: #!/usr/bin/perl
! 2: #
! 3: # The LearningOnline Network
! 4: #
! 5: # $Id: parse_activity_log.pl,v 1.4 2004/08/11 18:36:31 matthew Exp $
! 6: #
! 7: # Copyright Michigan State University Board of Trustees
! 8: #
! 9: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
! 10: #
! 11: # LON-CAPA is free software; you can redistribute it and/or modify
! 12: # it under the terms of the GNU General Public License as published by
! 13: # the Free Software Foundation; either version 2 of the License, or
! 14: # (at your option) any later version.
! 15: #
! 16: # LON-CAPA is distributed in the hope that it will be useful,
! 17: # but WITHOUT ANY WARRANTY; without even the implied warranty of
! 18: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! 19: # GNU General Public License for more details.
! 20: #
! 21: # You should have received a copy of the GNU General Public License
! 22: # along with LON-CAPA; if not, write to the Free Software
! 23: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
! 24: #
! 25: # /home/httpd/html/adm/gpl.txt
! 26: #
! 27: # http://www.lon-capa.org/
! 28: #
! 29: ###############################################################################
! 30: #
! 31: # Expects
! 32: #
! 33: # ../key/$class.key - key file $username:$keynumber
! 34: # ../rawdata/$class.log - log file
! 35: # ../rawdata/$class.seq - sequence file
! 36: # ../data writable
! 37: # ------------------------------------------------------------------ Course log
! 38:
! 39: #
! 40: # Exit codes
! 41: # 0 Everything is okay
! 42: # 1 Another copy is running on this course
! 43: # 2 Activity log does not exist
! 44: # 3 Unable to connect to database
! 45: # 4 Unable to create database tables
! 46: # 5 Unspecified error?
! 47: #
! 48:
! 49: use strict;
! 50: use DBI;
! 51: use lib '/home/httpd/lib/perl/Apache';
! 52: use lonmysql();
! 53: use Time::HiRes();
! 54: use Getopt::Long();
! 55:
! 56: #
! 57: # Determine parameters
! 58: my ($help,$course,$domain,$drop,$file,$time_run,$nocleanup);
! 59: &Getopt::Long::GetOptions( "course=s" => \$course,
! 60: "domain=s" => \$domain,
! 61: "help" => \$help,
! 62: "logfile=s" => \$file,
! 63: "timerun" => \$time_run,
! 64: "nocleanup" => \$nocleanup,
! 65: "drop" => \$drop);
! 66: if (! defined($course) || $help) {
! 67: print<<USAGE;
! 68: parse_activity_log.pl
! 69:
! 70: Process a lon-capa activity log into a database.
! 71: Parameters:
! 72: course Required
! 73: domain Optional
! 74: drop optional if present, drop all course
! 75: specific activity log tables.
! 76: file optional Specify the file to parse, including path
! 77: time optional if present, print out timing data
! 78: nocleanup optional if present, do not remove old files
! 79: Examples:
! 80: $0 -course=123456abcdef -domain=msu
! 81: $0 -course=123456abcdef -file=activity.log
! 82: USAGE
! 83: exit;
! 84: }
! 85:
! 86: ##
! 87: ## Set up timing code
! 88: ##
! 89: my $time_this = \¬hing;
! 90: if ($time_run) {
! 91: $time_this = \&time_action;
! 92: }
! 93: my $initial_time = Time::HiRes::time;
! 94:
! 95: ##
! 96: ## Read in configuration parameters
! 97: ##
! 98: my %perlvar;
! 99: &initialize_configuration();
! 100: if (! defined($domain) || $domain eq '') {
! 101: $domain = $perlvar{'lonDefDomain'};
! 102: }
! 103: &update_process_name($course.'@'.$domain);
! 104:
! 105: ##
! 106: ## Determine filenames
! 107: ##
! 108: my $sourcefilename; # activity log data
! 109: my $newfilename; # $sourcefilename will be renamed to this
! 110: my $sql_filename; # the mysql backup data file name.
! 111: if ($file) {
! 112: $sourcefilename = $file;
! 113: } else {
! 114: $sourcefilename = &get_filename($course,$domain);
! 115: }
! 116: $sql_filename = $sourcefilename;
! 117: $sql_filename =~ s|[^/]*$|activitylog.sql|;
! 118:
! 119: ##
! 120: ## There will only be a $newfilename file if a copy of this program is already
! 121: ## running.
! 122: my $newfilename = $sourcefilename.'.processing';
! 123: if (-e $newfilename) {
! 124: warn "$newfilename exists";
! 125: exit 2;
! 126: }
! 127:
! 128: if (-e $sourcefilename) {
! 129: rename($sourcefilename,$newfilename);
! 130: }
! 131:
! 132: ##
! 133: ## Table definitions
! 134: ##
! 135: my $prefix = $course.'_'.$domain.'_';
! 136: my $student_table = $prefix.'students';
! 137: my $student_table_def =
! 138: { id => $student_table,
! 139: permanent => 'no',
! 140: columns => [
! 141: { name => 'student_id',
! 142: type => 'MEDIUMINT UNSIGNED',
! 143: restrictions => 'NOT NULL',
! 144: auto_inc => 'yes', },
! 145: { name => 'student',
! 146: type => 'VARCHAR(100) BINARY',
! 147: restrictions => 'NOT NULL', },
! 148: ],
! 149: 'PRIMARY KEY' => ['student_id',],
! 150: };
! 151:
! 152: my $res_table = $prefix.'resource';
! 153: my $res_table_def =
! 154: { id => $res_table,
! 155: permanent => 'no',
! 156: columns => [{ name => 'res_id',
! 157: type => 'MEDIUMINT UNSIGNED',
! 158: restrictions => 'NOT NULL',
! 159: auto_inc => 'yes', },
! 160: { name => 'resource',
! 161: type => 'MEDIUMTEXT',
! 162: restrictions => 'NOT NULL'},
! 163: ],
! 164: 'PRIMARY KEY' => ['res_id'],
! 165: };
! 166:
! 167: my $action_table = $prefix.'actions';
! 168: my $action_table_def =
! 169: { id => $action_table,
! 170: permanent => 'no',
! 171: columns => [{ name => 'action_id',
! 172: type => 'MEDIUMINT UNSIGNED',
! 173: restrictions => 'NOT NULL',
! 174: auto_inc => 'yes', },
! 175: { name => 'action',
! 176: type => 'VARCHAR(100)',
! 177: restrictions => 'NOT NULL'},
! 178: ],
! 179: 'PRIMARY KEY' => ['action_id',],
! 180: };
! 181:
! 182: my $machine_table = $prefix.'machine_table';
! 183: my $machine_table_def =
! 184: { id => $machine_table,
! 185: permanent => 'no',
! 186: columns => [{ name => 'machine_id',
! 187: type => 'MEDIUMINT UNSIGNED',
! 188: restrictions => 'NOT NULL',
! 189: auto_inc => 'yes', },
! 190: { name => 'machine',
! 191: type => 'VARCHAR(100)',
! 192: restrictions => 'NOT NULL'},
! 193: ],
! 194: 'PRIMARY KEY' => ['machine_id',],
! 195: };
! 196:
! 197: my $activity_table = $prefix.'activity';
! 198: my $activity_table_def =
! 199: { id => $activity_table,
! 200: permanent => 'no',
! 201: columns => [
! 202: { name => 'res_id',
! 203: type => 'MEDIUMINT UNSIGNED',
! 204: restrictions => 'NOT NULL',},
! 205: { name => 'time',
! 206: type => 'DATETIME',
! 207: restrictions => 'NOT NULL',},
! 208: { name => 'student_id',
! 209: type => 'VARCHAR(100) BINARY',
! 210: restrictions => 'NOT NULL',},
! 211: { name => 'action_id',
! 212: type => 'VARCHAR(100) BINARY',
! 213: restrictions => 'NOT NULL',},
! 214: { name => 'idx', # This is here in case a student
! 215: type => 'MEDIUMINT UNSIGNED', # has multiple submissions during
! 216: restrictions => 'NOT NULL', # one second. It happens, trust
! 217: auto_inc => 'yes', }, # me.
! 218: { name => 'machine_id',
! 219: type => 'VARCHAR(100) BINARY',
! 220: restrictions => 'NOT NULL',},
! 221: { name => 'action_values',
! 222: type => 'MEDIUMTEXT', },
! 223: ],
! 224: 'PRIMARY KEY' => ['res_id','time','student_id','action_id','idx'],
! 225: };
! 226: my @Activity_Tables = ($student_table_def,$res_table_def,
! 227: $action_table_def,$machine_table_def,
! 228: $activity_table_def);
! 229:
! 230:
! 231: ##
! 232: ## End of table definitions
! 233: ##
! 234:
! 235: #
! 236: &Apache::lonmysql::set_mysql_user_and_password($perlvar{'lonSqlUser'},
! 237: $perlvar{'lonSqlAccess'});
! 238: if (!&Apache::lonmysql::verify_sql_connection()) {
! 239: warn "Unable to connect to MySQL database.";
! 240: exit 3;
! 241: }
! 242:
! 243: if ($drop) { &drop_tables(); }
! 244: if (-e $sql_filename) {
! 245: # if ANY one of the tables does not exist, load the tables from the
! 246: # backup.
! 247: my @Current_Tables = &Apache::lonmysql::tables_in_db();
! 248: my %Found;
! 249: foreach my $tablename (@Current_Tables) {
! 250: foreach my $table (@Activity_Tables) {
! 251: if ($tablename eq $table->{'id'}) {
! 252: $Found{$tablename}++;
! 253: }
! 254: }
! 255: }
! 256: foreach my $table (@Activity_Tables) {
! 257: if (! $Found{$table->{'id'}}) {
! 258: $time_this->();
! 259: &load_backup_tables($sql_filename);
! 260: $time_this->('load backup tables');
! 261: last;
! 262: }
! 263: }
! 264: }
! 265:
! 266: # create_tables does not complain if the tables already exist
! 267: if (! &create_tables()) {
! 268: warn "Unable to create tables";
! 269: exit 4;
! 270: }
! 271:
! 272: &read_id_tables();
! 273:
! 274: ##
! 275: ## Do the main bit of work
! 276: if (-e $newfilename) {
! 277: my $result = &process_courselog($newfilename);
! 278: if (! defined($result)) {
! 279: # Something went wrong along the way...
! 280: exit 5;
! 281: } elsif ($result > 0) {
! 282: $time_this->();
! 283: &backup_tables($sql_filename);
! 284: $time_this->('write backup tables');
! 285: }
! 286: }
! 287:
! 288: ##
! 289: ## Clean up the filesystem
! 290: ##
! 291: &Apache::lonmysql::disconnect_from_db();
! 292: unlink($newfilename) if (! $nocleanup);
! 293:
! 294: if ($time_run) {
! 295: print "Overall time: ".(Time::HiRes::time - $initial_time).$/;
! 296: print &outputtimes();
! 297: }
! 298:
! 299: exit 0; # Everything is okay, so end here before it gets worse.
! 300:
! 301: ########################################################
! 302: ########################################################
! 303: ##
! 304: ## Process Course Log
! 305: ##
! 306: ########################################################
! 307: ########################################################
! 308: #
! 309: # Returns the number of lines in the activity.log file that were processed.
! 310: sub process_courselog {
! 311: my ($inputfile) = @_;
! 312: if (! open(IN,$inputfile)) {
! 313: warn "Unable to open '$inputfile' for reading";
! 314: return undef;
! 315: }
! 316: my ($linecount,$insertcount);
! 317: my $dbh = &Apache::lonmysql::get_dbh();
! 318: #
! 319: # Timing variables
! 320: my @RowData;
! 321: while (my $line=<IN>){
! 322: # last if ($linecount > 1000);
! 323: #
! 324: # Bulk storage variables
! 325: $time_this->();
! 326: chomp($line);
! 327: $linecount++;
! 328: # print $linecount++.$/;
! 329: my ($timestamp,$host,$log)=split(/\:/,$line,3);
! 330: $time_this->('splitline');
! 331: #
! 332: # $log has the actual log entries; currently still escaped, and
! 333: # %26(timestamp)%3a(url)%3a(user)%3a(domain)
! 334: # then additionally
! 335: # %3aPOST%3a(name)%3d(value)%3a(name)%3d(value)
! 336: # or
! 337: # %3aCSTORE%3a(name)%3d(value)%26(name)%3d(value)
! 338: #
! 339: # get delimiter between timestamped entries to be &&&
! 340: $log=~s/\%26(\d{9,10})\%3a/\&\&\&$1\%3a/g;
! 341: $log = &unescape($log);
! 342: $time_this->('translate_and_unescape');
! 343: # now go over all log entries
! 344: my $machine_id = &get_id($machine_table,'machine',$host);
! 345: foreach (split(/\&\&\&/,$log)) {
! 346: $time_this->();
! 347: my ($time,$res,$uname,$udom,$action,@values)= split(/:/,$_);
! 348: if (! defined($res) || $res =~ /^\s*$/) {
! 349: $res = '/adm/roles';
! 350: $action = 'log in';
! 351: }
! 352: if ($res =~ m|^/prtspool/|) {
! 353: $res = '/prtspool/';
! 354: }
! 355: if (! defined($action) || $action eq '') {
! 356: $action = 'view';
! 357: }
! 358: my $student = $uname.':'.$udom;
! 359: $time_this->('split_and_error_check');
! 360: my $student_id = &get_id($student_table,'student',$student);
! 361: my $res_id = &get_id($res_table,'resource',$res);
! 362: my $action_id = &get_id($action_table,'action',$action);
! 363: my $sql_time = &Apache::lonmysql::sqltime($time);
! 364: my $values = $dbh->quote(join('',@values));
! 365: $time_this->('get_ids');
! 366: #
! 367: my $row = [$res_id,
! 368: qq{'$sql_time'},
! 369: $student_id,
! 370: $action_id,
! 371: qq{''}, # idx
! 372: $machine_id,
! 373: $values];
! 374: push(@RowData,$row);
! 375: $time_this->('push_row');
! 376: #
! 377: }
! 378: $time_this->();
! 379: if ($linecount % 100 == 0) {
! 380: my $result = &Apache::lonmysql::bulk_store_rows($activity_table,
! 381: undef,
! 382: \@RowData);
! 383: $time_this->('bulk_store_rows');
! 384: if (! defined($result)) {
! 385: warn "Error occured during insert.".
! 386: &Apache::lonmysql::get_error();
! 387: }
! 388: undef(@RowData);
! 389: }
! 390: }
! 391: if (@RowData) {
! 392: $time_this->();
! 393: my $result = &Apache::lonmysql::bulk_store_rows($activity_table,
! 394: undef,
! 395: \@RowData);
! 396: $time_this->('bulk_store_rows');
! 397: if (! defined($result)) {
! 398: warn "Error occured during insert.".
! 399: &Apache::lonmysql::get_error();
! 400: }
! 401: undef(@RowData);
! 402: }
! 403: close IN;
! 404: # print "Number of lines: ".$linecount.$/;
! 405: # print "Number of inserts: ".$insertcount.$/;
! 406: return $linecount;
! 407: }
! 408:
! 409: ##
! 410: ## Timing routines
! 411: ##
! 412: {
! 413: my %Timing;
! 414: my $starttime;
! 415:
! 416: sub nothing {
! 417: return;
! 418: }
! 419:
! 420: sub time_action {
! 421: my ($key) = @_;
! 422: if (defined($key)) {
! 423: $Timing{$key}+=Time::HiRes::time-$starttime;
! 424: $Timing{'count_'.$key}++;
! 425: }
! 426: $starttime = Time::HiRes::time;
! 427: }
! 428:
! 429: sub outputtimes {
! 430: my $Str;
! 431: if ($time_run) {
! 432: $Str = "Timing Data:".$/;
! 433: while (my($k,$v) = each(%Timing)) {
! 434: next if ($k =~ /^count_/);
! 435: my $count = $Timing{'count_'.$k};
! 436: $Str .=
! 437: ' '.sprintf("%25.25s",$k).
! 438: ' '.sprintf('% 8d',$count).
! 439: ' '.sprintf('%12.5f',$v).$/;
! 440: }
! 441: }
! 442: return $Str;
! 443: }
! 444:
! 445: }
! 446:
! 447:
! 448: ##
! 449: ## Use mysqldump to store backups of the tables
! 450: ##
! 451: sub backup_tables {
! 452: my ($sql_filename) = @_;
! 453: my $command = qq{mysqldump --opt loncapa };
! 454:
! 455: foreach my $table (@Activity_Tables) {
! 456: my $tablename = $table->{'id'};
! 457: $command .= $tablename.' ';
! 458: }
! 459: $command .= '>'.$sql_filename;
! 460: warn $command.$/;
! 461: system($command);
! 462: }
! 463:
! 464: ##
! 465: ## Load in mysqldumped files
! 466: ##
! 467: sub load_backup_tables {
! 468: my ($sql_filename) = @_;
! 469: return undef if (! -e $sql_filename);
! 470: # Check for .my.cnf
! 471: my $command = 'mysql -e "SOURCE '.$sql_filename.'" loncapa';
! 472: warn $command.$/;
! 473: system($command);
! 474: }
! 475:
! 476: ##
! 477: ##
! 478: ##
! 479: sub initialize_configuration {
! 480: # Fake it for now:
! 481: $perlvar{'lonSqlUser'} = 'www';
! 482: $perlvar{'lonSqlAccess'} = 'localhostkey';
! 483: $perlvar{'lonUsersDir'} = '/home/httpd/lonUsers';
! 484: $perlvar{'lonDefDomain'} = '103';
! 485: }
! 486:
! 487: sub update_process_name {
! 488: my ($text) = @_;
! 489: $0 = 'parse_activity_log.pl: '.$text;
! 490: }
! 491:
! 492: sub get_filename {
! 493: my ($course,$domain) = @_;
! 494: my ($a,$b,$c,undef) = split('',$course,4);
! 495: return "$perlvar{'lonUsersDir'}/$domain/$a/$b/$c/$course/activity.log";
! 496: }
! 497:
! 498: sub create_tables {
! 499: foreach my $table (@Activity_Tables) {
! 500: my $table_id = &Apache::lonmysql::create_table($table);
! 501: if (! defined($table_id)) {
! 502: warn "Unable to create table ".$table->{'id'}.$/;
! 503: warn &Apache::lonmysql::build_table_creation_request($table).$/;
! 504: return 0;
! 505: }
! 506: }
! 507: return 1;
! 508: }
! 509:
! 510: sub drop_tables {
! 511: foreach my $table (@Activity_Tables) {
! 512: my $table_id = $table->{'id'};
! 513: &Apache::lonmysql::drop_table($table_id);
! 514: }
! 515: }
! 516:
! 517: #################################################################
! 518: #################################################################
! 519: ##
! 520: ## Database item id code
! 521: ##
! 522: #################################################################
! 523: #################################################################
! 524: { # Scoping for ID lookup code
! 525: my %IDs;
! 526:
! 527: sub read_id_tables {
! 528: foreach my $table (@Activity_Tables) {
! 529: my @Data = &Apache::lonmysql::get_rows($table->{'id'});
! 530: foreach my $row (@Data) {
! 531: $IDs{$table->{'id'}}->{$row->[1]} = $row->[0];
! 532: }
! 533: }
! 534: }
! 535:
! 536: sub get_id {
! 537: my ($table,$fieldname,$value) = @_;
! 538: if (exists($IDs{$table}->{$value})) {
! 539: return $IDs{$table}->{$value};
! 540: } else {
! 541: # insert into the table - if the item already exists, that is
! 542: # okay.
! 543: my $result = &Apache::lonmysql::store_row($table,[undef,$value]);
! 544: if (! defined($result)) {
! 545: warn("Got error on id insert for $value\n".&Apache::lonmysql::get_error());
! 546: }
! 547: # get the id
! 548: my @Data =
! 549: &Apache::lonmysql::get_rows($table,qq{$fieldname='$value'});
! 550: if (@Data) {
! 551: $IDs{$table}->{$value}=$Data[0]->[0];
! 552: return $IDs{$table}->{$value};
! 553: } else {
! 554: warn "Unable to retrieve id for $table $fieldname $value".$/;
! 555: return undef;
! 556: }
! 557: }
! 558: }
! 559:
! 560: } # End of ID scoping
! 561:
! 562:
! 563: ###############################################################
! 564: ###############################################################
! 565: ##
! 566: ## The usual suspects
! 567: ##
! 568: ###############################################################
! 569: ###############################################################
! 570: sub escape {
! 571: my $str=shift;
! 572: $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
! 573: return $str;
! 574: }
! 575:
! 576: sub unescape {
! 577: my $str=shift;
! 578: $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
! 579: return $str;
! 580: }
! 581:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>