use MARC::Batch;
use MARC::File;
use MARC::File::XML;
+use MARC::Charset 'marc8_to_utf8';
binmode STDIN, ':bytes';
use Env qw(
HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA
my $batch;
binmode STDIN, ':utf8';
+my $ignore = MARC::Charset->ignore_errors();
+MARC::Charset->ignore_errors(1);
+my $setting = MARC::Charset->assume_unicode();
+MARC::Charset->assume_unicode(1);
+MARC::Charset->ignore_errors(1);
+
foreach my $arg (@ARGV) {
if ($arg eq '--stage_file') {
$next_arg_is_file = 1;
$next_arg_is_source = 1;
next;
}
- if ($next_arg_is_file) {
+ if ($next_arg_is_source) {
$source = $arg;
$next_arg_is_source = 0;
next;
my @warnings = $batch->warnings();
my $warning_string;
if (@warnings) { $warning_string = "'" . join(':',@warnings) . "'"; } else { $warning_string = "'none'"; }
- my $sql = "INSERT INTO $MIGSCHEMA.biblio_record_entry_stage (marc,x_source,x_warnings) VALUES ($xml,$source,$warning_string);";
+ my $sql = "INSERT INTO $MIGSCHEMA.biblio_record_entry_stage (marc,x_source,x_warnings) VALUES ($xml,'$source',$warning_string);";
my $sth = $dbh->prepare($sql);
$sth->execute();
report_progress("Records staged", $i) if 0 != $i % 100;
sub clean_marc {
my $xml = shift;
+ $xml = marc8_to_utf8($xml);
$xml =~ s/\n//sog;
$xml =~ s/^<\?xml.+\?\s*>//go;
$xml =~ s/>\s+</></go;