#!/usr/bin/perl
+
+# Copyright 2009-2012, Equinox Software, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
use strict;
use warnings;
use open ':utf8';
use Unicode::Normalize;
use MARC::File::XML ( BinaryEncoding => 'utf-8' );
use Equinox::Migration::SubfieldMapper;
+use Equinox::Migration::Utils qw/normalize_oclc_number/;
my $conf = {}; # configuration hashref
my $count = 0; my $scount = 0;
initialize($conf);
open OF, '>', $conf->{output} or die "$0: cannot open output file $conf->{output}: $!\n";
-open XF, '>', $conf->{exception} or die "$0: cannot open exception file $conf->{output}: $!\n";
+open XF, '>', $conf->{exception} or die "$0: cannot open exception file $conf->{exception}: $!\n";
for my $file (@ARGV) {
print XF "Processing $file\n";
- my $batch = undef; my $record = undef;
- $batch = MARC::Batch->new($conf->{marctype}, $file);
+ my $batch = MARC::Batch->new($conf->{marctype}, $file);
$batch->strict_off();
$batch->warnings_off();
# oclc
$marc{oclc} = [];
- push @{ $marc{oclc} }, $record->field('001')->as_string()
- if ($record->field('001') and $record->field('003') and
- $record->field('003')->as_string() =~ /OCo{0,1}LC/);
+ if ($record->field('001') &&
+ $record->field('003') &&
+ $record->field('003')->as_string() =~ /OCo{0,1}LC/ &&
+ defined normalize_oclc_number($record->field('001')->as_string())) {
+ push @{ $marc{oclc} }, normalize_oclc_number($record->field('001')->as_string());
+ }
for ($record->field('035')) {
my $oclc = $_->subfield('a');
- push @{ $marc{oclc} }, $oclc
- if (defined $oclc and $oclc =~ /\(OCoLC\)/ and $oclc =~/([0-9]+)/);
+ if (defined $oclc &&
+ ($oclc =~ /\(OCoLC\)/ || $oclc =~ /(ocm|ocl7|ocn|on)/) &&
+ defined normalize_oclc_number($oclc)) {
+ push @{ $marc{oclc} }, normalize_oclc_number($oclc);
+ }
}
if ($record->field('999')) {
}
}
- if ($conf->{fingerprints}{edition} and $marc->{edition}) {
+ if ($conf->{fingerprints}{edition} and $marc->{edition} and $marc->{author}) {
print OF join("\t", $marc->{score}, $marc->{id}, "edition",
$marc->{item_form}, $marc->{date1},
$marc->{record_type}, $marc->{bib_lvl},
- $marc->{title}, $marc->{edition}), "\n";
+ $marc->{title}, $marc->{author}, $marc->{edition}), "\n";
}
if ($conf->{fingerprints}{issn} and $marc->{issn}) {
--tag=N -t Which tag to use (default 903)
--subfield=X -s Which subfield to use (default 'a')
--quiet -q Don't write status messages to STDOUT
- --ignoresubtitle -i Ignore 245$b and construct the title from 245$a alone.
+ --ignoresubtitle -i Ignore 245\$b and construct the title from 245\$a alone.
--fingerprints=LIST Fingerprints to generate, comma separated
Default: oclc,isbn,edition,issn,lccn,accomp,authpub