# subfields can't be non-alphanumeric
if ($record[$ptr] =~ /<subfield code="(.*?)"/) {
if ($1 =~ /\P{IsAlnum}/ or $1 eq '') {
- edit("Junk in subfield code/Null subfield code");
+ edit("Junk in subfield code/Null subfield code ($1)");
next;
}
}
message("Short leader padded");
}
}
+ if ($c->{'force-utf8'}) {
+ if ($record[$ptr] =~ m|<leader>(.........).(.+)</leader>|) {
+ $record[$ptr] = "<leader>$1a$2</leader>\n";
+ }
+ }
if ($record[$ptr] =~ m|<controlfield tag="008">(.+?)</control|) {
#pad short 008
my $content = $1;
}
# automatable subfield maladies
- $record[$ptr] =~ s/code=" ">c/code="c">/;
- $record[$ptr] =~ s/code=" ">\$/code="c">\$/;
+ if ($record[$ptr] =~ /code=" ">c/) {
+ message('Fixing probable subfield c, scenario 1');
+ $record[$ptr] =~ s/code=" ">c/code="c">/;
+ }
+ if ($record[$ptr] =~ /code=" ">\$/) {
+ message('Fixing probable subfield c, scenario 2');
+ $record[$ptr] =~ s/code=" ">\$/code="c">\$/;
+ }
if ($c->{'fix-subfield'}) {
- $record[$ptr] =~ s/code="&">/code="$c->{'fix-subfield'}">/;
- $record[$ptr] =~ s/code="\P{IsAlnum}">/code="$c->{'fix-subfield'}">/;
- $record[$ptr] =~ s/code="">/code="$c->{'fix-subfield'}">/;
+ if ($record[$ptr] =~ /code="&">/) {
+ message('Fixing & for subfield code');
+ $record[$ptr] =~ s/code="&">/code="$c->{'fix-subfield'}">/;
+ }
+ if ($record[$ptr] =~ /code="(.*?\P{IsAlnum}.*?)">/) {
+ message("Fixing non-alphanumeric subfield code: $1 -> " . $c->{'fix-subfield'});
+ $record[$ptr] =~ s/code=".*?\P{IsAlnum}.*?">/code="$c->{'fix-subfield'}">/;
+ }
+ if ($record[$ptr] =~ /code="">/) {
+ message('Fixing null subfield code');
+ $record[$ptr] =~ s/code="">/code="$c->{'fix-subfield'}">/;
+ }
}
}
return 0;
'original-tag|ot=i',
'original-subfield|os=s',
'fix-subfield|fs=s',
+ 'force-utf8',
'script',
'no-strip9',
'trashfile|t=s',
and renumbering is in effect, an old-to-new mapping
file (old2new.map) will be generated.
+ --force-utf8 Rewrite each record so that they describe themselves as
+ UTF-8 encoded
--autoscrub -a Automatically remove non-numeric tags in data
--fix-subfield -fs Subfield code to use in place of non-alphanumeric
or empty subfield codes