X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=mig-bin%2Fmig-stagebibs;fp=mig-bin%2Fmig-stagebibs;h=fb76b0c27e92995d524321c201079283ecf4253e;hp=0000000000000000000000000000000000000000;hb=000e8df6405da270e1ca30fd631f251b14ac0ddc;hpb=56f5223cb936c8f62a928eece4b6f40ea5bc9b06 diff --git a/mig-bin/mig-stagebibs b/mig-bin/mig-stagebibs new file mode 100755 index 0000000..fb76b0c --- /dev/null +++ b/mig-bin/mig-stagebibs @@ -0,0 +1,190 @@ +#!/usr/bin/perl + +############################################################################### +=pod + +=item B --file foo.xml + +Takes a load of bibs from a UTF-8 MARC XML file and loads them into mig staging +table of bibio_record_entry. This is done with no checking of file validity +so records should be checked before hand and cleaned. + +Takes one optional arguments: + +--source + +Sets an x_source value on the staging table to the one supplied instead of the +default of none. + +=back + +=cut + +############################################################################### + +use strict; +use warnings; + +use DBI; +#binmode STDIN, ':bytes'; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use Data::Dumper; +use Pod::Usage; +use Switch; +use Cwd 'abs_path'; +use FindBin; +use UNIVERSAL; +my $mig_bin = "$FindBin::Bin/"; +use lib "$FindBin::Bin/"; +use Mig; + +pod2usage(-verbose => 2) if defined $ARGV[0] && $ARGV[0] eq '--help'; +pod2usage(-verbose => 1) if ! $ARGV[1]; + +my $next_arg_is_file = 0; +my $append = 0; +my $next_arg_is_source = 0; +my $next_arg_is_stage = 0; +my $stage_table = 'biblio_record_entry'; +my $source = 'default'; +my $file_is_xml = 0; +my $dbh = Mig::db_connect(); +my $infile; +my $i = 0; +my $batch; +binmode STDIN, ':utf8'; + +foreach my $arg (@ARGV) { + if ($arg eq '--file') { + $next_arg_is_file = 1; + next; + } + if ($next_arg_is_file) { + $infile = $arg; + $next_arg_is_file = 0; + next; + } + if ($arg eq '--source') { + $next_arg_is_source = 1; + next; + } + if ($next_arg_is_source) { + $source = $arg; + $next_arg_is_source = 0; + next; + } +} + +my $bre_test = check_for_table($dbh,'biblio_record_entry'); +if ($bre_test == 0) { create_child_bre($dbh); } + +my $xmig_test = check_for_column($dbh,'biblio_record_entry','x_migrate'); +if ($xmig_test == 0) { add_column($dbh,'biblio_record_entry','x_migrate','BOOLEAN DEFAULT TRUE'); } + +my $xsource_test = check_for_column($dbh,'biblio_record_entry','x_source'); +if ($xsource_test == 0) { add_column($dbh,'biblio_record_entry','x_source','TEXT'); } + +#flatten out MARC XML FILE +open my $xml, "<:encoding(utf8)", $infile or abort('could not open MARC XML file'); +$i = 0; +my $record = ''; +while(my $line = <$xml>) { + if ($line =~ /^<\/?collection/) { next; } + chomp $line; + $record = $record . $line; + if ($line =~ /^<\/record/) { + stage_record($dbh,$record,$source); + $record = ''; + $i++; + if (($i % 100) == 0) { report_progress('Records stage', $i); } + } +} +close $xml; + +print "Finis.\n"; + +# beyond here be functions + +sub create_child_bre { + my $dbh = shift; + $dbh->do("DO \$\$ + DECLARE + t BOOLEAN; + BEGIN + SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_schema = '$MIGSCHEMA' AND table_name = 'biblio_record_entry') INTO t; + IF t = FALSE THEN + PERFORM migration_tools.build_specific_base_staging_table ('$MIGSCHEMA','biblio.record_entry'); + END IF; + END \$\$;"); + + return (); +} + +sub abort { + my $msg = shift; + print STDERR "$0: $msg", "\n"; + exit 1; +} + +sub report_progress { + my ($msg, $counter) = @_; + if (defined $counter) { + print STDERR "$msg: $counter\n"; + } else { + print STDERR "$msg\n"; + } +} + +sub stage_record { + my $dbh = shift; + my $record = shift; + my $source = shift; + my $last_xact = "'$MIGSCHEMA'"; + $record = '$_$' . $record . '$_$'; + my $sql; + if ($source eq 'default') { $sql = "INSERT INTO $MIGSCHEMA.biblio_record_entry (last_xact_id,marc) VALUES ($last_xact,$record);"; } + else { $sql = "INSERT INTO $MIGSCHEMA.biblio_record_entry (last_xact_id,marc,x_source) VALUES ($last_xact,$record,'$source');"; } + my $sth = $dbh->prepare($sql); + $sth->execute(); + return; +} + +sub check_for_table { + my $dbh = shift; + my $table = shift; + my $sql = "SELECT 1 FROM information_schema.tables WHERE table_schema = '$MIGSCHEMA' AND table_name = '$table';"; + my $sth = $dbh->prepare($sql); + $sth->execute(); + my @sqlresult = $sth->fetchrow_array; + my $r = pop @sqlresult; + if ($r) { return $r; } else { return 0; } +} + +sub check_for_column { + my $dbh = shift; + my $table = shift; + my $column = shift; + my $sql = "SELECT 1 FROM information_schema.columns WHERE table_schema = '$MIGSCHEMA' AND table_name = '$table' AND column_name = '$column';"; + my $sth = $dbh->prepare($sql); + $sth->execute(); + my @sqlresult = $sth->fetchrow_array; + my $r = pop @sqlresult; + if ($r) { return $r; } else { return 0; } +} + +sub add_column { + my $dbh = shift; + my $table = shift; + my $column = shift; + my $column_type = shift; + my $sql = "ALTER TABLE $MIGSCHEMA.$table ADD COLUMN $column $column_type;"; + my $sth = $dbh->prepare($sql); + $sth->execute(); + my @sqlresult = $sth->fetchrow_array; + my $r = check_for_column($dbh,$table,$column); + if ($r == 0) { abort('failed to create column'); } else { return $r; } +} +