Annotation of loncom/metadata_database/parse_activity_log.pl, revision 1.2
1.1 matthew 1: #!/usr/bin/perl
2: #
3: # The LearningOnline Network
4: #
1.2 ! matthew 5: # $Id: parse_activity_log.pl,v 1.1 2004/08/11 18:37:23 matthew Exp $
1.1 matthew 6: #
7: # Copyright Michigan State University Board of Trustees
8: #
9: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
10: #
11: # LON-CAPA is free software; you can redistribute it and/or modify
12: # it under the terms of the GNU General Public License as published by
13: # the Free Software Foundation; either version 2 of the License, or
14: # (at your option) any later version.
15: #
16: # LON-CAPA is distributed in the hope that it will be useful,
17: # but WITHOUT ANY WARRANTY; without even the implied warranty of
18: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19: # GNU General Public License for more details.
20: #
21: # You should have received a copy of the GNU General Public License
22: # along with LON-CAPA; if not, write to the Free Software
23: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24: #
25: # /home/httpd/html/adm/gpl.txt
26: #
27: # http://www.lon-capa.org/
28: #
29: ###############################################################################
30: #
31: # Expects
32: #
33: # ../key/$class.key - key file $username:$keynumber
34: # ../rawdata/$class.log - log file
35: # ../rawdata/$class.seq - sequence file
36: # ../data writable
37: # ------------------------------------------------------------------ Course log
38:
39: #
40: # Exit codes
41: # 0 Everything is okay
42: # 1 Another copy is running on this course
43: # 2 Activity log does not exist
44: # 3 Unable to connect to database
45: # 4 Unable to create database tables
46: # 5 Unspecified error?
47: #
48:
49: use strict;
50: use DBI;
51: use lib '/home/httpd/lib/perl/Apache';
52: use lonmysql();
53: use Time::HiRes();
54: use Getopt::Long();
55:
56: #
57: # Determine parameters
1.2 ! matthew 58: my ($help,$course,$domain,$drop,$file,$time_run,$nocleanup,$log);
1.1 matthew 59: &Getopt::Long::GetOptions( "course=s" => \$course,
60: "domain=s" => \$domain,
61: "help" => \$help,
62: "logfile=s" => \$file,
63: "timerun" => \$time_run,
64: "nocleanup" => \$nocleanup,
1.2 ! matthew 65: "drop" => \$drop,
! 66: "log" => \$log);
1.1 matthew 67: if (! defined($course) || $help) {
68: print<<USAGE;
69: parse_activity_log.pl
70:
71: Process a lon-capa activity log into a database.
72: Parameters:
73: course Required
74: domain Optional
75: drop optional if present, drop all course
76: specific activity log tables.
77: file optional Specify the file to parse, including path
78: time optional if present, print out timing data
79: nocleanup optional if present, do not remove old files
1.2 ! matthew 80: log optional if present, prepare log file of activity
1.1 matthew 81: Examples:
82: $0 -course=123456abcdef -domain=msu
83: $0 -course=123456abcdef -file=activity.log
84: USAGE
85: exit;
86: }
87:
88: ##
89: ## Set up timing code
90: ##
91: my $time_this = \¬hing;
92: if ($time_run) {
93: $time_this = \&time_action;
94: }
95: my $initial_time = Time::HiRes::time;
96:
97: ##
1.2 ! matthew 98: ## Set up logging code
! 99: ##
! 100: my $logthis = \¬hing;
! 101: if ($log) {
! 102: my $logfile = "/tmp/parse_activity_log.log.".time;
! 103: print STDERR "$0: logging to $logfile".$/;
! 104: if (! open(LOGFILE,">$logfile")) {
! 105: die "Unable to open $logfile for writing. Run aborted.";
! 106: } else {
! 107: $logthis = \&log_to_file;
! 108: }
! 109: }
! 110: ##
1.1 matthew 111: ## Read in configuration parameters
112: ##
113: my %perlvar;
114: &initialize_configuration();
115: if (! defined($domain) || $domain eq '') {
116: $domain = $perlvar{'lonDefDomain'};
117: }
118: &update_process_name($course.'@'.$domain);
119:
120: ##
121: ## Determine filenames
122: ##
123: my $sourcefilename; # activity log data
124: my $newfilename; # $sourcefilename will be renamed to this
125: my $sql_filename; # the mysql backup data file name.
126: if ($file) {
127: $sourcefilename = $file;
128: } else {
129: $sourcefilename = &get_filename($course,$domain);
130: }
131: $sql_filename = $sourcefilename;
1.2 ! matthew 132: $sql_filename =~ s|[^/]*$|activity.log.sql|;
1.1 matthew 133:
134: ##
135: ## There will only be a $newfilename file if a copy of this program is already
136: ## running.
137: my $newfilename = $sourcefilename.'.processing';
138: if (-e $newfilename) {
139: warn "$newfilename exists";
1.2 ! matthew 140: $logthis->($newfilename.' exists');
1.1 matthew 141: exit 2;
142: }
143:
144: if (-e $sourcefilename) {
145: rename($sourcefilename,$newfilename);
1.2 ! matthew 146: $logthis->("renamed $sourcefilename to $newfilename");
1.1 matthew 147: }
148:
149: ##
150: ## Table definitions
151: ##
152: my $prefix = $course.'_'.$domain.'_';
153: my $student_table = $prefix.'students';
154: my $student_table_def =
155: { id => $student_table,
156: permanent => 'no',
157: columns => [
158: { name => 'student_id',
159: type => 'MEDIUMINT UNSIGNED',
160: restrictions => 'NOT NULL',
161: auto_inc => 'yes', },
162: { name => 'student',
163: type => 'VARCHAR(100) BINARY',
164: restrictions => 'NOT NULL', },
165: ],
166: 'PRIMARY KEY' => ['student_id',],
167: };
168:
169: my $res_table = $prefix.'resource';
170: my $res_table_def =
171: { id => $res_table,
172: permanent => 'no',
173: columns => [{ name => 'res_id',
174: type => 'MEDIUMINT UNSIGNED',
175: restrictions => 'NOT NULL',
176: auto_inc => 'yes', },
177: { name => 'resource',
178: type => 'MEDIUMTEXT',
179: restrictions => 'NOT NULL'},
180: ],
181: 'PRIMARY KEY' => ['res_id'],
182: };
183:
184: my $action_table = $prefix.'actions';
185: my $action_table_def =
186: { id => $action_table,
187: permanent => 'no',
188: columns => [{ name => 'action_id',
189: type => 'MEDIUMINT UNSIGNED',
190: restrictions => 'NOT NULL',
191: auto_inc => 'yes', },
192: { name => 'action',
193: type => 'VARCHAR(100)',
194: restrictions => 'NOT NULL'},
195: ],
196: 'PRIMARY KEY' => ['action_id',],
197: };
198:
199: my $machine_table = $prefix.'machine_table';
200: my $machine_table_def =
201: { id => $machine_table,
202: permanent => 'no',
203: columns => [{ name => 'machine_id',
204: type => 'MEDIUMINT UNSIGNED',
205: restrictions => 'NOT NULL',
206: auto_inc => 'yes', },
207: { name => 'machine',
208: type => 'VARCHAR(100)',
209: restrictions => 'NOT NULL'},
210: ],
211: 'PRIMARY KEY' => ['machine_id',],
212: };
213:
214: my $activity_table = $prefix.'activity';
215: my $activity_table_def =
216: { id => $activity_table,
217: permanent => 'no',
218: columns => [
219: { name => 'res_id',
220: type => 'MEDIUMINT UNSIGNED',
221: restrictions => 'NOT NULL',},
222: { name => 'time',
223: type => 'DATETIME',
224: restrictions => 'NOT NULL',},
225: { name => 'student_id',
1.2 ! matthew 226: type => 'MEDIUMINT UNSIGNED',
1.1 matthew 227: restrictions => 'NOT NULL',},
228: { name => 'action_id',
1.2 ! matthew 229: type => 'MEDIUMINT UNSIGNED',
1.1 matthew 230: restrictions => 'NOT NULL',},
231: { name => 'idx', # This is here in case a student
232: type => 'MEDIUMINT UNSIGNED', # has multiple submissions during
233: restrictions => 'NOT NULL', # one second. It happens, trust
234: auto_inc => 'yes', }, # me.
235: { name => 'machine_id',
1.2 ! matthew 236: type => 'MEDIUMINT UNSIGNED',
1.1 matthew 237: restrictions => 'NOT NULL',},
238: { name => 'action_values',
239: type => 'MEDIUMTEXT', },
240: ],
241: 'PRIMARY KEY' => ['res_id','time','student_id','action_id','idx'],
242: };
243: my @Activity_Tables = ($student_table_def,$res_table_def,
244: $action_table_def,$machine_table_def,
245: $activity_table_def);
246:
247:
248: ##
249: ## End of table definitions
250: ##
251:
252: #
253: &Apache::lonmysql::set_mysql_user_and_password($perlvar{'lonSqlUser'},
254: $perlvar{'lonSqlAccess'});
255: if (!&Apache::lonmysql::verify_sql_connection()) {
256: warn "Unable to connect to MySQL database.";
1.2 ! matthew 257: $logthis->("Unable to connect to MySQL database.");
1.1 matthew 258: exit 3;
259: }
260:
1.2 ! matthew 261: if ($drop) { &drop_tables(); $logthis->('dropped tables'); }
1.1 matthew 262: if (-e $sql_filename) {
1.2 ! matthew 263: $logthis->('reading in from '.$sql_filename);
1.1 matthew 264: # if ANY one of the tables does not exist, load the tables from the
265: # backup.
266: my @Current_Tables = &Apache::lonmysql::tables_in_db();
267: my %Found;
268: foreach my $tablename (@Current_Tables) {
269: foreach my $table (@Activity_Tables) {
270: if ($tablename eq $table->{'id'}) {
271: $Found{$tablename}++;
272: }
273: }
274: }
275: foreach my $table (@Activity_Tables) {
276: if (! $Found{$table->{'id'}}) {
277: $time_this->();
278: &load_backup_tables($sql_filename);
279: $time_this->('load backup tables');
280: last;
281: }
282: }
283: }
284:
285: # create_tables does not complain if the tables already exist
286: if (! &create_tables()) {
287: warn "Unable to create tables";
1.2 ! matthew 288: $logthis->('Unable to create tables');
1.1 matthew 289: exit 4;
290: }
291:
1.2 ! matthew 292: $logthis->('reading id tables');
1.1 matthew 293: &read_id_tables();
1.2 ! matthew 294: $logthis->('finished reading id tables');
1.1 matthew 295:
296: ##
297: ## Do the main bit of work
298: if (-e $newfilename) {
299: my $result = &process_courselog($newfilename);
300: if (! defined($result)) {
301: # Something went wrong along the way...
1.2 ! matthew 302: $logthis->('process_courselog returned undef');
1.1 matthew 303: exit 5;
304: } elsif ($result > 0) {
305: $time_this->();
1.2 ! matthew 306: $logthis->('process_courselog returned '.$result.' backup up tables');
1.1 matthew 307: &backup_tables($sql_filename);
308: $time_this->('write backup tables');
309: }
310: }
311:
312: ##
313: ## Clean up the filesystem
314: ##
315: &Apache::lonmysql::disconnect_from_db();
316: unlink($newfilename) if (! $nocleanup);
317:
318: if ($time_run) {
1.2 ! matthew 319: my $elapsed_time = Time::HiRes::time - $initial_time;
! 320: print "Overall time: ".$elapsed_time.$/;
1.1 matthew 321: print &outputtimes();
1.2 ! matthew 322: $logthis->("Overall time: ".$elapsed_time);
! 323: $logthis->(&outputtimes());
! 324: }
! 325:
! 326: if ($log) {
! 327: close LOGFILE;
1.1 matthew 328: }
329:
330: exit 0; # Everything is okay, so end here before it gets worse.
331:
332: ########################################################
333: ########################################################
334: ##
335: ## Process Course Log
336: ##
337: ########################################################
338: ########################################################
339: #
340: # Returns the number of lines in the activity.log file that were processed.
341: sub process_courselog {
342: my ($inputfile) = @_;
343: if (! open(IN,$inputfile)) {
344: warn "Unable to open '$inputfile' for reading";
1.2 ! matthew 345: $logthis->("Unable to open '$inputfile' for reading");
1.1 matthew 346: return undef;
347: }
348: my ($linecount,$insertcount);
349: my $dbh = &Apache::lonmysql::get_dbh();
350: #
351: # Timing variables
352: my @RowData;
353: while (my $line=<IN>){
354: # last if ($linecount > 1000);
355: #
356: # Bulk storage variables
357: $time_this->();
358: chomp($line);
359: $linecount++;
360: # print $linecount++.$/;
361: my ($timestamp,$host,$log)=split(/\:/,$line,3);
362: $time_this->('splitline');
363: #
364: # $log has the actual log entries; currently still escaped, and
365: # %26(timestamp)%3a(url)%3a(user)%3a(domain)
366: # then additionally
367: # %3aPOST%3a(name)%3d(value)%3a(name)%3d(value)
368: # or
369: # %3aCSTORE%3a(name)%3d(value)%26(name)%3d(value)
370: #
371: # get delimiter between timestamped entries to be &&&
372: $log=~s/\%26(\d{9,10})\%3a/\&\&\&$1\%3a/g;
373: $log = &unescape($log);
374: $time_this->('translate_and_unescape');
375: # now go over all log entries
1.2 ! matthew 376: if (! defined($host)) { $host = 'unknown'; }
1.1 matthew 377: my $machine_id = &get_id($machine_table,'machine',$host);
1.2 ! matthew 378: my $prevchunk = 'none';
! 379: foreach my $chunk (split(/\&\&\&/,$log)) {
! 380: my $warningflag = '';
1.1 matthew 381: $time_this->();
1.2 ! matthew 382: my ($time,$res,$uname,$udom,$action,@values)= split(/:/,$chunk);
! 383: my $student = $uname.':'.$udom;
1.1 matthew 384: if (! defined($res) || $res =~ /^\s*$/) {
385: $res = '/adm/roles';
1.2 ! matthew 386: $action = 'LOGIN';
! 387: # $warningflag .= 'res';
1.1 matthew 388: }
389: if ($res =~ m|^/prtspool/|) {
390: $res = '/prtspool/';
391: }
392: if (! defined($action) || $action eq '') {
1.2 ! matthew 393: $action = 'VIEW';
! 394: # $warningflag .= 'action';
1.1 matthew 395: }
1.2 ! matthew 396: if ($action !~ /^(LOGIN|VIEW|POST|CSTORE|STORE)$/) {
! 397: $warningflag .= 'action';
! 398: }
! 399: if (! defined($student) || $student eq ':') {
! 400: $student = 'unknown';
! 401: $warningflag .= 'student';
! 402: }
! 403: if (! defined($res) || $res =~ /^\s*$/) {
! 404: $res = 'unknown';
! 405: $warningflag .= 'res';
! 406: }
! 407: if (! defined($action) || $action =~ /^\s*$/) {
! 408: $action = 'unknown';
! 409: $warningflag .= 'action';
! 410: }
! 411: if (! defined($time) || $time !~ /^\d+$/) {
! 412: $time = 0;
! 413: $warningflag .= 'time';
! 414: }
! 415: #
1.1 matthew 416: $time_this->('split_and_error_check');
417: my $student_id = &get_id($student_table,'student',$student);
418: my $res_id = &get_id($res_table,'resource',$res);
419: my $action_id = &get_id($action_table,'action',$action);
420: my $sql_time = &Apache::lonmysql::sqltime($time);
1.2 ! matthew 421: #
! 422: if (! defined($student_id) || $student_id eq '') {
! 423: $warningflag.='student_id';
! 424: }
! 425: if (! defined($res_id) || $res_id eq '') {
! 426: $warningflag.='res_id';
! 427: }
! 428: if (! defined($action_id) || $action_id eq '') {
! 429: $warningflag.='action_id';
! 430: }
! 431: if ($warningflag ne '') {
! 432: $logthis->('warningflag ('.$warningflag.') on chunk '.
! 433: $/.$chunk.$/.'prevchunk = '.$/.$prevchunk);
! 434: $prevchunk .= $chunk;
! 435: next; # skip this chunk
! 436: }
! 437: #
1.1 matthew 438: my $values = $dbh->quote(join('',@values));
439: $time_this->('get_ids');
440: #
441: my $row = [$res_id,
442: qq{'$sql_time'},
443: $student_id,
444: $action_id,
445: qq{''}, # idx
446: $machine_id,
447: $values];
448: push(@RowData,$row);
449: $time_this->('push_row');
1.2 ! matthew 450: $prevchunk = $chunk;
1.1 matthew 451: #
452: }
453: $time_this->();
1.2 ! matthew 454: if ((scalar(@RowData) > 0) && ($linecount % 100 == 0)) {
1.1 matthew 455: my $result = &Apache::lonmysql::bulk_store_rows($activity_table,
456: undef,
457: \@RowData);
1.2 ! matthew 458: # $logthis->('result = '.$result);
1.1 matthew 459: $time_this->('bulk_store_rows');
460: if (! defined($result)) {
1.2 ! matthew 461: my $error = &Apache::lonmysql::get_error();
! 462: warn "Error occured during insert.".$error;
! 463: $logthis->('error = '.$error);
1.1 matthew 464: }
465: undef(@RowData);
466: }
467: }
468: if (@RowData) {
469: $time_this->();
1.2 ! matthew 470: $logthis->('storing '.$linecount);
1.1 matthew 471: my $result = &Apache::lonmysql::bulk_store_rows($activity_table,
472: undef,
473: \@RowData);
1.2 ! matthew 474: $logthis->('result = '.$result);
1.1 matthew 475: $time_this->('bulk_store_rows');
476: if (! defined($result)) {
1.2 ! matthew 477: my $error = &Apache::lonmysql::get_error();
! 478: warn "Error occured during insert.".$error;
! 479: $logthis->('error = '.$error);
1.1 matthew 480: }
481: undef(@RowData);
482: }
483: close IN;
484: # print "Number of lines: ".$linecount.$/;
485: # print "Number of inserts: ".$insertcount.$/;
486: return $linecount;
487: }
488:
1.2 ! matthew 489:
! 490: ##
! 491: ## Somtimes, instead of doing something, doing nothing is appropriate.
! 492: sub nothing {
! 493: return;
! 494: }
! 495:
! 496: ##
! 497: ## Logging routine
! 498: ##
! 499: sub log_to_file {
! 500: my ($input)=@_;
! 501: print LOGFILE $input.$/;
! 502: }
! 503:
1.1 matthew 504: ##
505: ## Timing routines
506: ##
507: {
508: my %Timing;
509: my $starttime;
510:
511: sub time_action {
512: my ($key) = @_;
513: if (defined($key)) {
514: $Timing{$key}+=Time::HiRes::time-$starttime;
515: $Timing{'count_'.$key}++;
516: }
517: $starttime = Time::HiRes::time;
518: }
519:
520: sub outputtimes {
521: my $Str;
522: if ($time_run) {
523: $Str = "Timing Data:".$/;
524: while (my($k,$v) = each(%Timing)) {
525: next if ($k =~ /^count_/);
526: my $count = $Timing{'count_'.$k};
527: $Str .=
528: ' '.sprintf("%25.25s",$k).
529: ' '.sprintf('% 8d',$count).
530: ' '.sprintf('%12.5f',$v).$/;
531: }
532: }
533: return $Str;
534: }
535:
536: }
537:
538:
539: ##
540: ## Use mysqldump to store backups of the tables
541: ##
542: sub backup_tables {
543: my ($sql_filename) = @_;
544: my $command = qq{mysqldump --opt loncapa };
545:
546: foreach my $table (@Activity_Tables) {
547: my $tablename = $table->{'id'};
548: $command .= $tablename.' ';
549: }
550: $command .= '>'.$sql_filename;
1.2 ! matthew 551: $logthis->($command);
1.1 matthew 552: system($command);
553: }
554:
555: ##
556: ## Load in mysqldumped files
557: ##
558: sub load_backup_tables {
559: my ($sql_filename) = @_;
560: return undef if (! -e $sql_filename);
561: # Check for .my.cnf
562: my $command = 'mysql -e "SOURCE '.$sql_filename.'" loncapa';
1.2 ! matthew 563: $logthis->('loading previously saved sql table'.$/.$command);
1.1 matthew 564: system($command);
565: }
566:
567: ##
568: ##
569: ##
570: sub initialize_configuration {
571: # Fake it for now:
572: $perlvar{'lonSqlUser'} = 'www';
573: $perlvar{'lonSqlAccess'} = 'localhostkey';
574: $perlvar{'lonUsersDir'} = '/home/httpd/lonUsers';
575: $perlvar{'lonDefDomain'} = '103';
576: }
577:
578: sub update_process_name {
579: my ($text) = @_;
580: $0 = 'parse_activity_log.pl: '.$text;
581: }
582:
583: sub get_filename {
584: my ($course,$domain) = @_;
585: my ($a,$b,$c,undef) = split('',$course,4);
586: return "$perlvar{'lonUsersDir'}/$domain/$a/$b/$c/$course/activity.log";
587: }
588:
589: sub create_tables {
590: foreach my $table (@Activity_Tables) {
591: my $table_id = &Apache::lonmysql::create_table($table);
592: if (! defined($table_id)) {
593: warn "Unable to create table ".$table->{'id'}.$/;
594: warn &Apache::lonmysql::build_table_creation_request($table).$/;
595: return 0;
596: }
597: }
598: return 1;
599: }
600:
601: sub drop_tables {
602: foreach my $table (@Activity_Tables) {
603: my $table_id = $table->{'id'};
604: &Apache::lonmysql::drop_table($table_id);
605: }
606: }
607:
608: #################################################################
609: #################################################################
610: ##
611: ## Database item id code
612: ##
613: #################################################################
614: #################################################################
615: { # Scoping for ID lookup code
616: my %IDs;
617:
618: sub read_id_tables {
619: foreach my $table (@Activity_Tables) {
620: my @Data = &Apache::lonmysql::get_rows($table->{'id'});
621: foreach my $row (@Data) {
622: $IDs{$table->{'id'}}->{$row->[1]} = $row->[0];
623: }
624: }
625: }
626:
627: sub get_id {
628: my ($table,$fieldname,$value) = @_;
629: if (exists($IDs{$table}->{$value})) {
630: return $IDs{$table}->{$value};
631: } else {
632: # insert into the table - if the item already exists, that is
633: # okay.
634: my $result = &Apache::lonmysql::store_row($table,[undef,$value]);
635: if (! defined($result)) {
636: warn("Got error on id insert for $value\n".&Apache::lonmysql::get_error());
637: }
638: # get the id
639: my @Data =
640: &Apache::lonmysql::get_rows($table,qq{$fieldname='$value'});
641: if (@Data) {
642: $IDs{$table}->{$value}=$Data[0]->[0];
643: return $IDs{$table}->{$value};
644: } else {
1.2 ! matthew 645: $logthis->("Unable to retrieve id for $table $fieldname $value");
1.1 matthew 646: return undef;
647: }
648: }
649: }
650:
651: } # End of ID scoping
652:
653:
654: ###############################################################
655: ###############################################################
656: ##
657: ## The usual suspects
658: ##
659: ###############################################################
660: ###############################################################
661: sub escape {
662: my $str=shift;
663: $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
664: return $str;
665: }
666:
667: sub unescape {
668: my $str=shift;
669: $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
670: return $str;
671: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>