#!/usr/bin/perl ############################################################################### =pod =item B --file foo.mrc.xml Takes a load of bibs from a UTF-8 MARC XML file and loads them into mig staging table of bibio_record_entry_legacy. This is done with no checking of file validity so records should be checked before hand and cleaned. Takes three optional arguments: --source Sets an x_source value on the staging table to the one supplied instead of the default of none. --auth foo.mrc.xml This will load bibs into the authority_record_entry_legacy. --serial foo.mrc.xml This will load bibs into the serial_record_entry_legacy. =back =cut ############################################################################### use strict; use warnings; use DBI; #binmode STDIN, ':bytes'; use Env qw( HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR ); use Data::Dumper; use Pod::Usage; use Switch; use Cwd 'abs_path'; use FindBin; use UNIVERSAL; my $mig_bin = "$FindBin::Bin/"; use lib "$FindBin::Bin/"; use Mig; use Getopt::Long; pod2usage(-verbose => 2) if defined $ARGV[0] && $ARGV[0] eq '--help'; pod2usage(-verbose => 1) if ! $ARGV[1]; my $append = 0; my $base_table = 'biblio_record_entry'; my $stage_table = 'biblio_record_entry_legacy'; my $auth = ''; my $serial = ''; my $source = 'default'; my $dbh = Mig::db_connect(); my $infile; my $i = 0; my $batch; binmode STDIN, ':utf8'; my $ret = GetOptions( 'file:s' => \$infile, 'serial:s' => \$serial, 'auth:s' => \$auth, 'source:s' => \$source, 'base_table:s' => \$base_table, 'stage_table:s' => \$stage_table, 'captions:s' => \$captions, 'debug:s' => \$debug ); #if in file is empty then fail #if auth and serial = 1 fail if ($serial == 1) { $base_table = 'authority_record_entry'; $stage_table = 'authority_record_entry_legacy'; } if ($auth == 1) { $base_table = 'serial_record_entry'; $stage_table = 'serial_record_entry_legacy'; } if ($auth == 1 and $serial == 1) { abort('are you sure you want to load these as authorities and serials?'); } my $bre_test = check_for_table($dbh,$base_table); my $bre_legacy_test = check_for_table($dbh,$stage_table); if ($bre_test == 0 and $bre_legacy_test == 0 ) { create_bre($dbh); create_child_bre($dbh); } if ($bre_test == 1 and $bre_legacy_test == 0 ) { create_child_bre($dbh); } my $xmig_test = check_for_column($dbh,$stage_table,'x_migrate'); if ($xmig_test == 0) { add_column($dbh,$stage_table,'x_migrate','BOOLEAN DEFAULT TRUE'); } my $xsource_test = check_for_column($dbh,$stage_table,'x_source'); if ($xsource_test == 0) { add_column($dbh,$stage_table,'x_source','TEXT'); } #flatten out MARC XML FILE open my $xml, "<:encoding(utf8)", $infile or abort('could not open MARC XML file'); $i = 0; my $record = ''; while(my $line = <$xml>) { if ($line =~ /^<\/?collection/) { next; } chomp $line; $record = $record . $line; if ($line =~ /<\/record>$/) { stage_record($dbh,$record,$source); $record = ''; $i++; if (($i % 100) == 0) { report_progress('Records stage', $i); } } } close $xml; if ($i == 0) { print "No XML was processed, are you sure this is an XML file?\n"; } print "Finis.\n"; # beyond here be functions sub create_bre { my $dbh = shift; $dbh->do("DO \$\$ DECLARE t BOOLEAN; BEGIN SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_schema = '$MIGSCHEMA' AND table_name = '$base_table') INTO t; IF t = FALSE THEN PERFORM migration_tools.build_specific_base_staging_table ('$MIGSCHEMA',REGEXP_REPLACE('$base_table','_','.')); END IF; END \$\$;"); return (); } sub create_child_bre { my $dbh = shift; $dbh->do("DO \$\$ BEGIN CREATE TABLE $MIGSCHEMA.$stage_table (x_migrate BOOLEAN DEFAULT TRUE, x_source TEXT) INHERITS ($MIGSCHEMA.$base_table); END \$\$;"); return (); } sub abort { my $msg = shift; print STDERR "$0: $msg", "\n"; exit 1; } sub report_progress { my ($msg, $counter) = @_; if (defined $counter) { print STDERR "$msg: $counter\n"; } else { print STDERR "$msg\n"; } } sub stage_record { my $dbh = shift; my $record = shift; my $source = shift; my $last_xact = "'$MIGSCHEMA'"; $record = '$_$' . $record . '$_$'; my $sql; if ($source eq 'default') { $sql = "INSERT INTO $MIGSCHEMA.$stage_table (last_xact_id,marc) VALUES ($last_xact,$record);"; } else { $sql = "INSERT INTO $MIGSCHEMA.$stage_table (last_xact_id,marc,x_source) VALUES ($last_xact,$record,'$source');"; } my $sth = $dbh->prepare($sql); $sth->execute(); return; } sub check_for_table { my $dbh = shift; my $table = shift; my $sql = "SELECT 1 FROM information_schema.tables WHERE table_schema = '$MIGSCHEMA' AND table_name = '$table';"; my $sth = $dbh->prepare($sql); $sth->execute(); my @sqlresult = $sth->fetchrow_array; my $r = pop @sqlresult; if ($r) { return $r; } else { return 0; } } sub check_for_column { my $dbh = shift; my $table = shift; my $column = shift; my $sql = "SELECT 1 FROM information_schema.columns WHERE table_schema = '$MIGSCHEMA' AND table_name = '$table' AND column_name = '$column';"; my $sth = $dbh->prepare($sql); $sth->execute(); my @sqlresult = $sth->fetchrow_array; my $r = pop @sqlresult; if ($r) { return $r; } else { return 0; } } sub add_column { my $dbh = shift; my $table = shift; my $column = shift; my $column_type = shift; my $sql = "ALTER TABLE $MIGSCHEMA.$table ADD COLUMN $column $column_type;"; my $sth = $dbh->prepare($sql); $sth->execute(); my @sqlresult = $sth->fetchrow_array; my $r = check_for_column($dbh,$table,$column); if ($r == 0) { abort('failed to create column'); } else { return $r; } }