1: #!/usr/bin/perl
2: #
3: # The LearningOnline Network
4: #
5: # $Id: parse_activity_log.pl,v 1.1 2004/08/11 18:37:23 matthew Exp $
6: #
7: # Copyright Michigan State University Board of Trustees
8: #
9: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
10: #
11: # LON-CAPA is free software; you can redistribute it and/or modify
12: # it under the terms of the GNU General Public License as published by
13: # the Free Software Foundation; either version 2 of the License, or
14: # (at your option) any later version.
15: #
16: # LON-CAPA is distributed in the hope that it will be useful,
17: # but WITHOUT ANY WARRANTY; without even the implied warranty of
18: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19: # GNU General Public License for more details.
20: #
21: # You should have received a copy of the GNU General Public License
22: # along with LON-CAPA; if not, write to the Free Software
23: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24: #
25: # /home/httpd/html/adm/gpl.txt
26: #
27: # http://www.lon-capa.org/
28: #
29: ###############################################################################
30: #
31: # Expects
32: #
33: # ../key/$class.key - key file $username:$keynumber
34: # ../rawdata/$class.log - log file
35: # ../rawdata/$class.seq - sequence file
36: # ../data writable
37: # ------------------------------------------------------------------ Course log
38:
39: #
40: # Exit codes
41: # 0 Everything is okay
42: # 1 Another copy is running on this course
43: # 2 Activity log does not exist
44: # 3 Unable to connect to database
45: # 4 Unable to create database tables
46: # 5 Unspecified error?
47: #
48:
49: use strict;
50: use DBI;
51: use lib '/home/httpd/lib/perl/Apache';
52: use lonmysql();
53: use Time::HiRes();
54: use Getopt::Long();
55:
56: #
57: # Determine parameters
58: my ($help,$course,$domain,$drop,$file,$time_run,$nocleanup);
59: &Getopt::Long::GetOptions( "course=s" => \$course,
60: "domain=s" => \$domain,
61: "help" => \$help,
62: "logfile=s" => \$file,
63: "timerun" => \$time_run,
64: "nocleanup" => \$nocleanup,
65: "drop" => \$drop);
66: if (! defined($course) || $help) {
67: print<<USAGE;
68: parse_activity_log.pl
69:
70: Process a lon-capa activity log into a database.
71: Parameters:
72: course Required
73: domain Optional
74: drop optional if present, drop all course
75: specific activity log tables.
76: file optional Specify the file to parse, including path
77: time optional if present, print out timing data
78: nocleanup optional if present, do not remove old files
79: Examples:
80: $0 -course=123456abcdef -domain=msu
81: $0 -course=123456abcdef -file=activity.log
82: USAGE
83: exit;
84: }
85:
86: ##
87: ## Set up timing code
88: ##
89: my $time_this = \¬hing;
90: if ($time_run) {
91: $time_this = \&time_action;
92: }
93: my $initial_time = Time::HiRes::time;
94:
95: ##
96: ## Read in configuration parameters
97: ##
98: my %perlvar;
99: &initialize_configuration();
100: if (! defined($domain) || $domain eq '') {
101: $domain = $perlvar{'lonDefDomain'};
102: }
103: &update_process_name($course.'@'.$domain);
104:
105: ##
106: ## Determine filenames
107: ##
108: my $sourcefilename; # activity log data
109: my $newfilename; # $sourcefilename will be renamed to this
110: my $sql_filename; # the mysql backup data file name.
111: if ($file) {
112: $sourcefilename = $file;
113: } else {
114: $sourcefilename = &get_filename($course,$domain);
115: }
116: $sql_filename = $sourcefilename;
117: $sql_filename =~ s|[^/]*$|activitylog.sql|;
118:
119: ##
120: ## There will only be a $newfilename file if a copy of this program is already
121: ## running.
122: my $newfilename = $sourcefilename.'.processing';
123: if (-e $newfilename) {
124: warn "$newfilename exists";
125: exit 2;
126: }
127:
128: if (-e $sourcefilename) {
129: rename($sourcefilename,$newfilename);
130: }
131:
132: ##
133: ## Table definitions
134: ##
135: my $prefix = $course.'_'.$domain.'_';
136: my $student_table = $prefix.'students';
137: my $student_table_def =
138: { id => $student_table,
139: permanent => 'no',
140: columns => [
141: { name => 'student_id',
142: type => 'MEDIUMINT UNSIGNED',
143: restrictions => 'NOT NULL',
144: auto_inc => 'yes', },
145: { name => 'student',
146: type => 'VARCHAR(100) BINARY',
147: restrictions => 'NOT NULL', },
148: ],
149: 'PRIMARY KEY' => ['student_id',],
150: };
151:
152: my $res_table = $prefix.'resource';
153: my $res_table_def =
154: { id => $res_table,
155: permanent => 'no',
156: columns => [{ name => 'res_id',
157: type => 'MEDIUMINT UNSIGNED',
158: restrictions => 'NOT NULL',
159: auto_inc => 'yes', },
160: { name => 'resource',
161: type => 'MEDIUMTEXT',
162: restrictions => 'NOT NULL'},
163: ],
164: 'PRIMARY KEY' => ['res_id'],
165: };
166:
167: my $action_table = $prefix.'actions';
168: my $action_table_def =
169: { id => $action_table,
170: permanent => 'no',
171: columns => [{ name => 'action_id',
172: type => 'MEDIUMINT UNSIGNED',
173: restrictions => 'NOT NULL',
174: auto_inc => 'yes', },
175: { name => 'action',
176: type => 'VARCHAR(100)',
177: restrictions => 'NOT NULL'},
178: ],
179: 'PRIMARY KEY' => ['action_id',],
180: };
181:
182: my $machine_table = $prefix.'machine_table';
183: my $machine_table_def =
184: { id => $machine_table,
185: permanent => 'no',
186: columns => [{ name => 'machine_id',
187: type => 'MEDIUMINT UNSIGNED',
188: restrictions => 'NOT NULL',
189: auto_inc => 'yes', },
190: { name => 'machine',
191: type => 'VARCHAR(100)',
192: restrictions => 'NOT NULL'},
193: ],
194: 'PRIMARY KEY' => ['machine_id',],
195: };
196:
197: my $activity_table = $prefix.'activity';
198: my $activity_table_def =
199: { id => $activity_table,
200: permanent => 'no',
201: columns => [
202: { name => 'res_id',
203: type => 'MEDIUMINT UNSIGNED',
204: restrictions => 'NOT NULL',},
205: { name => 'time',
206: type => 'DATETIME',
207: restrictions => 'NOT NULL',},
208: { name => 'student_id',
209: type => 'VARCHAR(100) BINARY',
210: restrictions => 'NOT NULL',},
211: { name => 'action_id',
212: type => 'VARCHAR(100) BINARY',
213: restrictions => 'NOT NULL',},
214: { name => 'idx', # This is here in case a student
215: type => 'MEDIUMINT UNSIGNED', # has multiple submissions during
216: restrictions => 'NOT NULL', # one second. It happens, trust
217: auto_inc => 'yes', }, # me.
218: { name => 'machine_id',
219: type => 'VARCHAR(100) BINARY',
220: restrictions => 'NOT NULL',},
221: { name => 'action_values',
222: type => 'MEDIUMTEXT', },
223: ],
224: 'PRIMARY KEY' => ['res_id','time','student_id','action_id','idx'],
225: };
226: my @Activity_Tables = ($student_table_def,$res_table_def,
227: $action_table_def,$machine_table_def,
228: $activity_table_def);
229:
230:
231: ##
232: ## End of table definitions
233: ##
234:
235: #
236: &Apache::lonmysql::set_mysql_user_and_password($perlvar{'lonSqlUser'},
237: $perlvar{'lonSqlAccess'});
238: if (!&Apache::lonmysql::verify_sql_connection()) {
239: warn "Unable to connect to MySQL database.";
240: exit 3;
241: }
242:
243: if ($drop) { &drop_tables(); }
244: if (-e $sql_filename) {
245: # if ANY one of the tables does not exist, load the tables from the
246: # backup.
247: my @Current_Tables = &Apache::lonmysql::tables_in_db();
248: my %Found;
249: foreach my $tablename (@Current_Tables) {
250: foreach my $table (@Activity_Tables) {
251: if ($tablename eq $table->{'id'}) {
252: $Found{$tablename}++;
253: }
254: }
255: }
256: foreach my $table (@Activity_Tables) {
257: if (! $Found{$table->{'id'}}) {
258: $time_this->();
259: &load_backup_tables($sql_filename);
260: $time_this->('load backup tables');
261: last;
262: }
263: }
264: }
265:
266: # create_tables does not complain if the tables already exist
267: if (! &create_tables()) {
268: warn "Unable to create tables";
269: exit 4;
270: }
271:
272: &read_id_tables();
273:
274: ##
275: ## Do the main bit of work
276: if (-e $newfilename) {
277: my $result = &process_courselog($newfilename);
278: if (! defined($result)) {
279: # Something went wrong along the way...
280: exit 5;
281: } elsif ($result > 0) {
282: $time_this->();
283: &backup_tables($sql_filename);
284: $time_this->('write backup tables');
285: }
286: }
287:
288: ##
289: ## Clean up the filesystem
290: ##
291: &Apache::lonmysql::disconnect_from_db();
292: unlink($newfilename) if (! $nocleanup);
293:
294: if ($time_run) {
295: print "Overall time: ".(Time::HiRes::time - $initial_time).$/;
296: print &outputtimes();
297: }
298:
299: exit 0; # Everything is okay, so end here before it gets worse.
300:
301: ########################################################
302: ########################################################
303: ##
304: ## Process Course Log
305: ##
306: ########################################################
307: ########################################################
308: #
309: # Returns the number of lines in the activity.log file that were processed.
310: sub process_courselog {
311: my ($inputfile) = @_;
312: if (! open(IN,$inputfile)) {
313: warn "Unable to open '$inputfile' for reading";
314: return undef;
315: }
316: my ($linecount,$insertcount);
317: my $dbh = &Apache::lonmysql::get_dbh();
318: #
319: # Timing variables
320: my @RowData;
321: while (my $line=<IN>){
322: # last if ($linecount > 1000);
323: #
324: # Bulk storage variables
325: $time_this->();
326: chomp($line);
327: $linecount++;
328: # print $linecount++.$/;
329: my ($timestamp,$host,$log)=split(/\:/,$line,3);
330: $time_this->('splitline');
331: #
332: # $log has the actual log entries; currently still escaped, and
333: # %26(timestamp)%3a(url)%3a(user)%3a(domain)
334: # then additionally
335: # %3aPOST%3a(name)%3d(value)%3a(name)%3d(value)
336: # or
337: # %3aCSTORE%3a(name)%3d(value)%26(name)%3d(value)
338: #
339: # get delimiter between timestamped entries to be &&&
340: $log=~s/\%26(\d{9,10})\%3a/\&\&\&$1\%3a/g;
341: $log = &unescape($log);
342: $time_this->('translate_and_unescape');
343: # now go over all log entries
344: my $machine_id = &get_id($machine_table,'machine',$host);
345: foreach (split(/\&\&\&/,$log)) {
346: $time_this->();
347: my ($time,$res,$uname,$udom,$action,@values)= split(/:/,$_);
348: if (! defined($res) || $res =~ /^\s*$/) {
349: $res = '/adm/roles';
350: $action = 'log in';
351: }
352: if ($res =~ m|^/prtspool/|) {
353: $res = '/prtspool/';
354: }
355: if (! defined($action) || $action eq '') {
356: $action = 'view';
357: }
358: my $student = $uname.':'.$udom;
359: $time_this->('split_and_error_check');
360: my $student_id = &get_id($student_table,'student',$student);
361: my $res_id = &get_id($res_table,'resource',$res);
362: my $action_id = &get_id($action_table,'action',$action);
363: my $sql_time = &Apache::lonmysql::sqltime($time);
364: my $values = $dbh->quote(join('',@values));
365: $time_this->('get_ids');
366: #
367: my $row = [$res_id,
368: qq{'$sql_time'},
369: $student_id,
370: $action_id,
371: qq{''}, # idx
372: $machine_id,
373: $values];
374: push(@RowData,$row);
375: $time_this->('push_row');
376: #
377: }
378: $time_this->();
379: if ($linecount % 100 == 0) {
380: my $result = &Apache::lonmysql::bulk_store_rows($activity_table,
381: undef,
382: \@RowData);
383: $time_this->('bulk_store_rows');
384: if (! defined($result)) {
385: warn "Error occured during insert.".
386: &Apache::lonmysql::get_error();
387: }
388: undef(@RowData);
389: }
390: }
391: if (@RowData) {
392: $time_this->();
393: my $result = &Apache::lonmysql::bulk_store_rows($activity_table,
394: undef,
395: \@RowData);
396: $time_this->('bulk_store_rows');
397: if (! defined($result)) {
398: warn "Error occured during insert.".
399: &Apache::lonmysql::get_error();
400: }
401: undef(@RowData);
402: }
403: close IN;
404: # print "Number of lines: ".$linecount.$/;
405: # print "Number of inserts: ".$insertcount.$/;
406: return $linecount;
407: }
408:
409: ##
410: ## Timing routines
411: ##
412: {
413: my %Timing;
414: my $starttime;
415:
416: sub nothing {
417: return;
418: }
419:
420: sub time_action {
421: my ($key) = @_;
422: if (defined($key)) {
423: $Timing{$key}+=Time::HiRes::time-$starttime;
424: $Timing{'count_'.$key}++;
425: }
426: $starttime = Time::HiRes::time;
427: }
428:
429: sub outputtimes {
430: my $Str;
431: if ($time_run) {
432: $Str = "Timing Data:".$/;
433: while (my($k,$v) = each(%Timing)) {
434: next if ($k =~ /^count_/);
435: my $count = $Timing{'count_'.$k};
436: $Str .=
437: ' '.sprintf("%25.25s",$k).
438: ' '.sprintf('% 8d',$count).
439: ' '.sprintf('%12.5f',$v).$/;
440: }
441: }
442: return $Str;
443: }
444:
445: }
446:
447:
448: ##
449: ## Use mysqldump to store backups of the tables
450: ##
451: sub backup_tables {
452: my ($sql_filename) = @_;
453: my $command = qq{mysqldump --opt loncapa };
454:
455: foreach my $table (@Activity_Tables) {
456: my $tablename = $table->{'id'};
457: $command .= $tablename.' ';
458: }
459: $command .= '>'.$sql_filename;
460: warn $command.$/;
461: system($command);
462: }
463:
464: ##
465: ## Load in mysqldumped files
466: ##
467: sub load_backup_tables {
468: my ($sql_filename) = @_;
469: return undef if (! -e $sql_filename);
470: # Check for .my.cnf
471: my $command = 'mysql -e "SOURCE '.$sql_filename.'" loncapa';
472: warn $command.$/;
473: system($command);
474: }
475:
476: ##
477: ##
478: ##
479: sub initialize_configuration {
480: # Fake it for now:
481: $perlvar{'lonSqlUser'} = 'www';
482: $perlvar{'lonSqlAccess'} = 'localhostkey';
483: $perlvar{'lonUsersDir'} = '/home/httpd/lonUsers';
484: $perlvar{'lonDefDomain'} = '103';
485: }
486:
487: sub update_process_name {
488: my ($text) = @_;
489: $0 = 'parse_activity_log.pl: '.$text;
490: }
491:
492: sub get_filename {
493: my ($course,$domain) = @_;
494: my ($a,$b,$c,undef) = split('',$course,4);
495: return "$perlvar{'lonUsersDir'}/$domain/$a/$b/$c/$course/activity.log";
496: }
497:
498: sub create_tables {
499: foreach my $table (@Activity_Tables) {
500: my $table_id = &Apache::lonmysql::create_table($table);
501: if (! defined($table_id)) {
502: warn "Unable to create table ".$table->{'id'}.$/;
503: warn &Apache::lonmysql::build_table_creation_request($table).$/;
504: return 0;
505: }
506: }
507: return 1;
508: }
509:
510: sub drop_tables {
511: foreach my $table (@Activity_Tables) {
512: my $table_id = $table->{'id'};
513: &Apache::lonmysql::drop_table($table_id);
514: }
515: }
516:
517: #################################################################
518: #################################################################
519: ##
520: ## Database item id code
521: ##
522: #################################################################
523: #################################################################
524: { # Scoping for ID lookup code
525: my %IDs;
526:
527: sub read_id_tables {
528: foreach my $table (@Activity_Tables) {
529: my @Data = &Apache::lonmysql::get_rows($table->{'id'});
530: foreach my $row (@Data) {
531: $IDs{$table->{'id'}}->{$row->[1]} = $row->[0];
532: }
533: }
534: }
535:
536: sub get_id {
537: my ($table,$fieldname,$value) = @_;
538: if (exists($IDs{$table}->{$value})) {
539: return $IDs{$table}->{$value};
540: } else {
541: # insert into the table - if the item already exists, that is
542: # okay.
543: my $result = &Apache::lonmysql::store_row($table,[undef,$value]);
544: if (! defined($result)) {
545: warn("Got error on id insert for $value\n".&Apache::lonmysql::get_error());
546: }
547: # get the id
548: my @Data =
549: &Apache::lonmysql::get_rows($table,qq{$fieldname='$value'});
550: if (@Data) {
551: $IDs{$table}->{$value}=$Data[0]->[0];
552: return $IDs{$table}->{$value};
553: } else {
554: warn "Unable to retrieve id for $table $fieldname $value".$/;
555: return undef;
556: }
557: }
558: }
559:
560: } # End of ID scoping
561:
562:
563: ###############################################################
564: ###############################################################
565: ##
566: ## The usual suspects
567: ##
568: ###############################################################
569: ###############################################################
570: sub escape {
571: my $str=shift;
572: $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
573: return $str;
574: }
575:
576: sub unescape {
577: my $str=shift;
578: $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
579: return $str;
580: }
581:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>