#!/usr/bin/perl ############################################################################### =pod =item B --file foo.xml Takes a load of bibs from a UTF-8 MARC XML file and loads them into mig staging table of bibio_record_entry. This is done with no checking of file validity so records should be checked before hand and cleaned. Takes one optional arguments: --source Sets an x_source value on the staging table to the one supplied instead of the default of none. =back =cut ############################################################################### use strict; use warnings; use DBI; #binmode STDIN, ':bytes'; use Env qw( HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR ); use Data::Dumper; use Pod::Usage; use Switch; use Cwd 'abs_path'; use FindBin; use UNIVERSAL; my $mig_bin = "$FindBin::Bin/"; use lib "$FindBin::Bin/"; use Mig; pod2usage(-verbose => 2) if defined $ARGV[0] && $ARGV[0] eq '--help'; pod2usage(-verbose => 1) if ! $ARGV[1]; my $next_arg_is_file = 0; my $append = 0; my $next_arg_is_source = 0; my $next_arg_is_stage = 0; my $stage_table = 'biblio_record_entry_legacy'; my $source = 'default'; my $file_is_xml = 0; my $dbh = Mig::db_connect(); my $infile; my $i = 0; my $batch; binmode STDIN, ':utf8'; foreach my $arg (@ARGV) { if ($arg eq '--file') { $next_arg_is_file = 1; next; } if ($next_arg_is_file) { $infile = $arg; $next_arg_is_file = 0; next; } if ($arg eq '--source') { $next_arg_is_source = 1; next; } if ($next_arg_is_source) { $source = $arg; $next_arg_is_source = 0; next; } } my $bre_test = check_for_table($dbh,'biblio_record_entry'); my $bre_legacy_test = check_for_table($dbh,'biblio_record_entry_legacy'); if ($bre_test == 0 and $bre_legacy_test == 0 ) { create_child_bre($dbh); rename_child_bre($dbh); } if ($bre_test == 1 and $bre_legacy_test == 0 ) { rename_child_bre($dbh); } my $xmig_test = check_for_column($dbh,'biblio_record_entry','x_migrate'); if ($xmig_test == 0) { add_column($dbh,'biblio_record_entry','x_migrate','BOOLEAN DEFAULT TRUE'); } my $xsource_test = check_for_column($dbh,'biblio_record_entry','x_source'); if ($xsource_test == 0) { add_column($dbh,'biblio_record_entry','x_source','TEXT'); } #flatten out MARC XML FILE open my $xml, "<:encoding(utf8)", $infile or abort('could not open MARC XML file'); $i = 0; my $record = ''; while(my $line = <$xml>) { if ($line =~ /^<\/?collection/) { next; } chomp $line; $record = $record . $line; if ($line =~ /<\/record>$/) { stage_record($dbh,$record,$source); $record = ''; $i++; if (($i % 100) == 0) { report_progress('Records stage', $i); } } } close $xml; if ($i == 0) { print "No XML was processed, are you sure this is an XML file?\n"; } print "Finis.\n"; # beyond here be functions sub create_child_bre { my $dbh = shift; $dbh->do("DO \$\$ DECLARE t BOOLEAN; BEGIN SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_schema = '$MIGSCHEMA' AND table_name = 'biblio_record_entry') INTO t; IF t = FALSE THEN PERFORM migration_tools.build_specific_base_staging_table ('$MIGSCHEMA','biblio.record_entry'); END IF; END \$\$;"); return (); } sub rename_child_bre { my $dbh = shift; $dbh->do("DO \$\$ BEGIN ALTER TABLE biblio_record_entry RENAME TO biblio_record_entry_legacy; END \$\$;"); return (); } sub abort { my $msg = shift; print STDERR "$0: $msg", "\n"; exit 1; } sub report_progress { my ($msg, $counter) = @_; if (defined $counter) { print STDERR "$msg: $counter\n"; } else { print STDERR "$msg\n"; } } sub stage_record { my $dbh = shift; my $record = shift; my $source = shift; my $last_xact = "'$MIGSCHEMA'"; $record = '$_$' . $record . '$_$'; my $sql; if ($source eq 'default') { $sql = "INSERT INTO $MIGSCHEMA.biblio_record_entry (last_xact_id,marc) VALUES ($last_xact,$record);"; } else { $sql = "INSERT INTO $MIGSCHEMA.biblio_record_entry (last_xact_id,marc,x_source) VALUES ($last_xact,$record,'$source');"; } my $sth = $dbh->prepare($sql); $sth->execute(); return; } sub check_for_table { my $dbh = shift; my $table = shift; my $sql = "SELECT 1 FROM information_schema.tables WHERE table_schema = '$MIGSCHEMA' AND table_name = '$table';"; my $sth = $dbh->prepare($sql); $sth->execute(); my @sqlresult = $sth->fetchrow_array; my $r = pop @sqlresult; if ($r) { return $r; } else { return 0; } } sub check_for_column { my $dbh = shift; my $table = shift; my $column = shift; my $sql = "SELECT 1 FROM information_schema.columns WHERE table_schema = '$MIGSCHEMA' AND table_name = '$table' AND column_name = '$column';"; my $sth = $dbh->prepare($sql); $sth->execute(); my @sqlresult = $sth->fetchrow_array; my $r = pop @sqlresult; if ($r) { return $r; } else { return 0; } } sub add_column { my $dbh = shift; my $table = shift; my $column = shift; my $column_type = shift; my $sql = "ALTER TABLE $MIGSCHEMA.$table ADD COLUMN $column $column_type;"; my $sth = $dbh->prepare($sql); $sth->execute(); my @sqlresult = $sth->fetchrow_array; my $r = check_for_column($dbh,$table,$column); if ($r == 0) { abort('failed to create column'); } else { return $r; } }