3 # Copyright 2009-2012, Equinox Software, Inc.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
25 my $conf = {}; # configuration hashref
28 # build exclusion hash
29 open FP, '<', shift or die "Can't open matchset file: $!\n";
33 my ($lead,$sub) = split /\t/;
34 $sub =~ s/\s//g; # any whitespace is extraneous
35 $exclude{$sub} = 1 unless ($sub < $conf->{lowerbound});
39 # strip exclusions from marcxml file
40 open MI, '<', $conf->{input} or die "Can't open input file: $!\n";
41 open MO, '>', $conf->{output} or die "Can't open output file: $!\n";
43 m/tag="$conf->{tag}".+?<subfield code="$conf->{subfield}">(\d+)</;
44 next unless defined $1;
45 if ($conf->{reverse}) {
46 print MO if $exclude{$1};
48 print MO unless $exclude{$1};
57 # set mode on existing filehandles
58 binmode(STDIN, ':utf8');
60 my $rc = GetOptions( $c,
69 show_help() unless $rc;
70 show_help() if ($c->{help});
72 $conf->{tag} = $conf->{tag} || 903;
73 $conf->{subfield} = $conf->{subfield} || 'a';
75 my @keys = keys %{$c};
76 show_help() unless (@ARGV and @keys);
77 for my $key ('output', 'lowerbound', 'input')
78 { push @missing, $key unless $c->{$key} }
80 print "Required option: ", join(', ', @missing), " missing!\n";
87 Usage is: extract_loadset -l BOUND -i INPUTXML -o OUTPUTXML MATCHSET
89 --lowerbound -l Lowest record ID which will be included in the loadset
90 --input -i MARCXML input file
91 --output -o MARCXML output file
92 --tag -t MARC tag to use as identifier (default: 903)
93 --subfield -s Subfield of --tag argument (default: 'a')
94 --reverse -r Output subordinate bibs rather than lead bibs