bibliofile/parse_db.pl

   1 #!/usr/bin/perl -w
   2
   3 # Parses Bibliofile files.
   4 # Usage: parse_db.pl TITLE.DB [--ignore-indexes]
   5 # Choosing --ignore-indexes will find data you'd otherwise miss, but also grabs a lot of junk you'll need to filter out.
   6
   7 use strict;
   8 use POSIX;
   9 use Getopt::Long;
  10
  11 my $ignoreIndexes = '';
  12
  13 my $opts = GetOptions('ignore-indexes' => \$ignoreIndexes);
  14
  15 $/ = undef;
  16
  17 my $startOfColumnTypes = 8;
  18 my $startOfRealData = 4096;
  19 my $blockSize = 4096;
  20 my $initialOffset = 6;
  21 my %dataTypes = (
  22   'A' => 'Text',
  23   'N' => 'Numeric',
  24   'S' => 'Integer'
  25 );
  26
  27 my $rowLength;
  28 my @fieldLengths;
  29 my @fieldNames;
  30 my @fieldTypes;
  31
  32 my $db = $ARGV[0];
  33 my $dbd = $db . "D";
  34
  35 open (DBD, $dbd);
  36
  37 while (<DBD>) {
  38
  39   my $data = $_;
  40
  41   $rowLength = ord(substr($data, 0, 1)) + (256 * (ord(substr($data, 1, 1))));
  42   #print STDERR "Row length: $rowLength\n";
  43
  44   my $numColumns = ord substr($data, 2, 1);
  45   #print STDERR "Columns:    $numColumns\n";
  46
  47   my $namedata = substr($data, $startOfColumnTypes + ($numColumns * 7) - 2);
  48   @fieldNames = split(/\x00/, $namedata);
  49
  50   for (my $i = 0; $i < $numColumns; $i++) {
  51     $fieldTypes[$i] = substr($data, ($i * 7) + $startOfColumnTypes, 1);
  52     $fieldLengths[$i] = ord substr($data, ($i * 7) + $startOfColumnTypes + 1, 1);
  53   }
  54
  55 }
  56
  57 close(DBD);
  58
  59 print join("\t", @fieldNames) . "\n";
  60
  61 open (DB, $db);
  62
  63 my $blocks = 0;
  64
  65 while (read DB, my $data, $blockSize) {
  66   $blocks++;
  67   next if ($blocks == 1);
  68   my $maxRecords = POSIX::floor($blockSize / $rowLength);
  69   unless $ignoreIndexes {
  70     my $indexIndicator1 = ord substr($data, 1, 1);
  71     next if ($indexIndicator1 != 0);
  72     my $indexIndicator2 = ord substr($data, 7, 1);
  73     next if ($indexIndicator2 == 0);
  74   }
  75
  76   for (my $r = 0; $r < $maxRecords; $r++) {
  77
  78     my $pos = 0;
  79     my @field;
  80
  81     #print STDERR "Record " . ($r+1) . " of $maxRecords\n";
  82
  83     for (my $f = 0; $f < scalar(@fieldLengths); $f++) {
  84       $field[$f] = substr($data, $initialOffset + ($r * $rowLength) + $pos, $fieldLengths[$f]);
  85       if ($fieldTypes[$f] eq 'S') { $field[$f] = ord $field[$f]; }
  86       $pos += $fieldLengths[$f];
  87     }
  88
  89     if ($field[0] =~ m/[^\x00]/) {
  90       print join("\t", @field) . "\n";
  91     }
  92
  93   }
  94
  95 }
  96
  97 close(DB);