authorities - enabled DOM indexing
authorGalen Charlton <galen.charlton@liblime.com>
Thu, 7 Feb 2008 06:11:32 +0000 (00:11 -0600)
committerJoshua Ferraro <jmf@liblime.com>
Fri, 8 Feb 2008 11:48:51 +0000 (05:48 -0600)
All new authority features will be based on the DOM indexing.

To update an existing installation, do the following:

[1] run perl Makefile.PL
[2] make
[3] make update_zebra_conf
[4] copy the new koha-conf.xml to $KOHA_CONF

Signed-off-by: Chris Cormack <crc@liblime.com>
Signed-off-by: Joshua Ferraro <jmf@liblime.com>

Makefile.PL
etc/koha-conf.xml
etc/zebradb/authorities/etc/dom-config.xml [new file with mode: 0644]
etc/zebradb/retrieval-info-auth-dom.xml [new file with mode: 0644]
etc/zebradb/retrieval-info-auth-grs1.xml [new file with mode: 0644]
etc/zebradb/xsl/identity.xsl [new file with mode: 0644]
etc/zebradb/xsl/zebra.xsl [new file with mode: 0644]
etc/zebradb/zebra-authorities-dom.cfg [new file with mode: 0644]
rewrite-config.PL

index 01122e4..bf08ff4 100644 (file)
@@ -370,6 +370,7 @@ my %config_defaults = (
   'DB_PASS'           => 'katikoan',
   'INSTALL_ZEBRA'     => 'yes',
   'INSTALL_SRU'       => 'yes',
+  'AUTH_INDEX_MODE'   => 'grs1',
   'ZEBRA_MARC_FORMAT' => 'marc21',
   'ZEBRA_LANGUAGE'    => 'en',
   'ZEBRA_USER'        => 'kohauser',
@@ -411,6 +412,7 @@ my %valid_config_values = (
   'DB_TYPE' => { 'mysql' => 1, 'Pg' => 1 },
   'INSTALL_ZEBRA' => { 'yes' => 1, 'no' => 1 },
   'INSTALL_SRU' => { 'yes' => 1, 'no' => 1 },
+  'AUTH_INDEX_MODE' => { 'grs1' => 1, 'dom' => 1 },
   'ZEBRA_MARC_FORMAT' => { 'marc21' => 1, 'unimarc' => 1 }, # FIXME should generate from contents of distributation
   'ZEBRA_LANGUAGE'    => { 'en' => 1, 'fr' => 1 }, # FIXME should generate from contents of distribution
 );
@@ -436,8 +438,11 @@ if ($config{'INSTALL_ZEBRA'} eq "yes") {
         'blib/ZEBRA_CONF_DIR/etc/passwd',
         'blib/ZEBRA_CONF_DIR/zebra-biblios.cfg',
         'blib/ZEBRA_CONF_DIR/zebra-authorities.cfg',
+        'blib/ZEBRA_CONF_DIR/zebra-authorities-dom.cfg',
         'blib/ZEBRA_CONF_DIR/explain-authorities.xml',
-        'blib/ZEBRA_CONF_DIR/explain-biblios.xml'
+        'blib/ZEBRA_CONF_DIR/explain-biblios.xml',
+        'blib/ZEBRA_CONF_DIR/retrieval-info-auth-grs1.xml',
+        'blib/ZEBRA_CONF_DIR/retrieval-info-auth-dom.xml',
     );
     if ($config{'INSTALL_MODE'} ne 'dev') {
         push @{ $pl_files->{'rewrite-config.PL'} }, (
@@ -445,6 +450,9 @@ if ($config{'INSTALL_ZEBRA'} eq "yes") {
             'blib/SCRIPT_DIR/koha-zebraqueue-ctl.sh',
         );
     }
+    $config{'ZEBRA_AUTH_CFG'} = $config{'AUTH_INDEX_MODE'} eq 'dom' ? 'zebra-authorities-dom.cfg' : 'zebra-authorities.cfg';
+    $config{'AUTH_RETRIEVAL_CFG'} = 
+        $config{'AUTH_INDEX_MODE'} eq 'dom' ? 'retrieval-info-auth-dom.xml' : 'retrieval-info-auth-grs1.xml';
 }
 
 if ($config{'INSTALL_MODE'} ne "dev") {
@@ -864,7 +872,20 @@ records.
 Primary language for Zebra indexing);
         $msg .= _add_valid_values_disp('ZEBRA_LANGUAGE', $valid_values);
         $config{'ZEBRA_LANGUAGE'} = _get_value('ZEBRA_LANGUAGE', $msg, $defaults->{'ZEBRA_LANGUAGE'}, $valid_values);
-
+   
+        $msg = q(
+Koha can use one of  two different indexing modes 
+for the MARC authorities records:
+
+grs1 - uses the Zebra GRS-1 filter, available 
+       for legacy support
+dom  - uses the DOM XML filter; offers improved
+       functionality.
+
+Authorities indexing mode);
+        $msg .= _add_valid_values_disp('AUTH_INDEX_MODE', $valid_values);
+        $config{'AUTH_INDEX_MODE'} = _get_value('AUTH_INDEX_MODE', $msg, $defaults->{'AUTH_INDEX_MODE'}, $valid_values);
+       
         $msg = q(
 Please specify Zebra database user);
         $config{'ZEBRA_USER'} = _get_value('ZEBRA_USER', $msg, $defaults->{'ZEBRA_USER'}, $valid_values);
index 4ef0eb7..6b2b9a3 100644 (file)
 <!-- AUTHORITY RECORDS -->
 <server id="authorityserver"  listenref="authorityserver" >
     <directory>__ZEBRA_DATA_DIR__/authorities</directory>
-    <config>__ZEBRA_CONF_DIR__/zebra-authorities.cfg</config>
+    <config>__ZEBRA_CONF_DIR__/__ZEBRA_AUTH_CFG__</config>
     <cql2rpn>__ZEBRA_CONF_DIR__/pqf.properties</cql2rpn>
      <!-- <docpath>xsl</docpath> -->
      <!-- <stylesheet>xsl/default.xsl</stylesheet> -->
      <!-- <maximumrecordsize>2000000</maximumrecordsize> -->
+    <xi:include href="__KOHA_CONF_DIR__/zebradb/__AUTH_RETRIEVAL_CFG__"
+                xmlns:xi="http://www.w3.org/2001/XInclude">
+      <xi:fallback>
      <retrievalinfo>
        <retrieval syntax="usmarc" name="F"/>
        <retrieval syntax="usmarc" name="B"/>
          </backend>
        </retrieval>
      </retrievalinfo>
+      </xi:fallback>
+    </xi:include>
     <xi:include href="__KOHA_CONF_DIR__/zebradb/explain-authorities.xml"
                 xmlns:xi="http://www.w3.org/2001/XInclude">
       <xi:fallback>
diff --git a/etc/zebradb/authorities/etc/dom-config.xml b/etc/zebradb/authorities/etc/dom-config.xml
new file mode 100644 (file)
index 0000000..8691616
--- /dev/null
@@ -0,0 +1,44 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!-- 
+$Id: dom-config.xml,v 1.1 2007-12-13 17:42:28 adam Exp $
+   Copyright (C) 1995-2006
+   Index Data ApS
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra.  If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+-->
+<dom>
+  <extract name="index">
+    <xslt stylesheet="authority-zebra-indexdefs.xsl"/>
+  </extract>
+  <retrieve name="F">
+    <xslt stylesheet="identity.xsl" />
+  </retrieve>
+  <retrieve name="usmarc">
+    <xslt stylesheet="identity.xsl" />
+  </retrieve>
+  <retrieve name="marc">
+    <xslt stylesheet="identity.xsl" />
+  </retrieve>
+  <retrieve name="zebra">
+    <xslt stylesheet="zebra.xsl"/>
+  </retrieve>
+  <input>
+    <marc inputcharset="utf-8"/>
+  </input>
+</dom>
diff --git a/etc/zebradb/retrieval-info-auth-dom.xml b/etc/zebradb/retrieval-info-auth-dom.xml
new file mode 100644 (file)
index 0000000..3fe4547
--- /dev/null
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<retrievalinfo xmlns="http://indexdata.com/yaz">
+  <retrieval syntax="usmarc" name="F">
+    <backend syntax="xml" name="marc">
+      <marc inputformat="xml" outputformat="marc"
+            inputcharset="utf-8"
+            outputcharset="utf-8"/>
+    </backend>
+  </retrieval>
+  <retrieval syntax="usmarc" name="B">
+    <backend syntax="xml" name="marc">
+      <marc inputformat="xml" outputformat="marc"
+            inputcharset="utf-8"
+            outputcharset="utf-8"/>
+    </backend>
+  </retrieval>
+  <retrieval syntax="xml" name="marcxml"
+             identifier="info:srw/schema/1/marcxml-v1.1"/>
+</retrievalinfo>
diff --git a/etc/zebradb/retrieval-info-auth-grs1.xml b/etc/zebradb/retrieval-info-auth-grs1.xml
new file mode 100644 (file)
index 0000000..58f319d
--- /dev/null
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<retrievalinfo xmlns="http://indexdata.com/yaz">
+  <retrieval syntax="usmarc" name="F"/>
+  <retrieval syntax="usmarc" name="B"/>
+  <retrieval syntax="xml" name="marcxml"
+             identifier="info:srw/schema/1/marcxml-v1.1">
+    <backend syntax="usmarc" name="F">
+      <marc inputformat="marc" outputformat="marcxml"
+            inputcharset="utf-8"/>
+    </backend>
+  </retrieval>
+  <retrieval syntax="xml" name="dc">
+    <backend syntax="usmarc" name="F">
+      <marc inputformat="marc" outputformat="marcxml"
+            inputcharset="utf-8"/>
+      <xslt stylesheet="__INTRANET_TMPL_DIR__/prog/en/xslt/MARC21slim2DC.xsl"/>
+    </backend>
+  </retrieval>
+  <retrieval syntax="xml" name="mods">
+    <backend syntax="usmarc" name="F">
+      <marc inputformat="marc" outputformat="marcxml"
+            inputcharset="utf-8"/>
+      <xslt stylesheet="__INTRANET_TMPL_DIR__/prog/en/xslt/MARC21slim2MODS.xsl"/>
+    </backend>
+  </retrieval>
+  <retrieval syntax="xml" name="rdfdc">
+    <backend syntax="usmarc" name="F">
+      <marc inputformat="marc" outputformat="marcxml"
+            inputcharset="utf-8"/>
+      <xslt stylesheet="__INTRANET_TMPL_DIR__/prog/en/xslt/MARC21slim2RDFDC.xsl"/>
+    </backend>
+  </retrieval>
+  <retrieval syntax="xml" name="utils">
+    <backend syntax="usmarc" name="F">
+      <marc inputformat="marc" outputformat="marcxml"
+            inputcharset="utf-8"/>
+      <xslt stylesheet="__INTRANET_TMPL_DIR__/prog/en/xslt/MARC21slimUtils.xsl"/>
+    </backend>
+  </retrieval>
+</retrievalinfo>
diff --git a/etc/zebradb/xsl/identity.xsl b/etc/zebradb/xsl/identity.xsl
new file mode 100644 (file)
index 0000000..21d6d6b
--- /dev/null
@@ -0,0 +1,16 @@
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+<!-- Identity transform stylesheet -->
+
+<xsl:output indent="yes"
+      method="xml"
+      version="1.0"
+      encoding="UTF-8"/>
+
+ <xsl:template match="node()|@*">
+   <xsl:copy>
+     <xsl:apply-templates select="@*|node()"/>
+   </xsl:copy>
+ </xsl:template>
+
+</xsl:stylesheet>
diff --git a/etc/zebradb/xsl/zebra.xsl b/etc/zebradb/xsl/zebra.xsl
new file mode 100644 (file)
index 0000000..f0745e8
--- /dev/null
@@ -0,0 +1,49 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!-- 
+$Id: zebra.xsl,v 1.2 2007-12-13 17:42:28 adam Exp $
+   Copyright (C) 1995-2006
+   Index Data ApS
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra.  If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+-->
+
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+                xmlns:z="http://indexdata.com/zebra-2.0" 
+                version="1.0">
+
+  <xsl:param name="id" select="''"/>
+  <xsl:param name="filename" select="''"/>
+  <xsl:param name="rank" select="''"/>
+  <xsl:param name="score" select="''"/>
+  <xsl:param name="schema" select="''"/>
+  <xsl:param name="size" select="''"/>
+
+  <xsl:output indent="yes" method="xml" version="1.0" encoding="UTF-8"/>
+
+  <!-- match on any record -->
+  <xsl:template match="/">
+    <z:info z:id="{$id}"
+            z:filename="{$filename}"
+            z:rank="{$rank}"
+            z:score="{$score}"
+            z:schema="{$schema}"
+            z:size="{$size}"/>
+  </xsl:template>
+
+</xsl:stylesheet>
diff --git a/etc/zebradb/zebra-authorities-dom.cfg b/etc/zebradb/zebra-authorities-dom.cfg
new file mode 100644 (file)
index 0000000..ff56653
--- /dev/null
@@ -0,0 +1,48 @@
+
+# Simple Zebra configuration file that defines
+# a database with MARCXML records.
+# $Id: zebra.cfg,v 1.1.2.2 2006/05/09 12:03:16 rangi Exp $
+#
+# Where are the config files located?
+profilePath:__ZEBRA_CONF_DIR__/authorities/etc:__ZEBRA_CONF_DIR__/etc:__ZEBRA_CONF_DIR__/marc_defs/__ZEBRA_MARC_FORMAT__/authorities:__ZEBRA_CONF_DIR__/lang_defs/__ZEBRA_LANGUAGE__:__ZEBRA_CONF_DIR__/xsl
+
+encoding: UTF-8
+# modulePath - where to look for loadable zebra modules
+modulePath: /usr/lib/idzebra-2.0/modules
+
+# Files that describe the attribute sets supported.
+attset: bib1.att
+attset: explain.att
+attset: gils.att
+
+#systag sysno rank
+
+# Specify record type
+# group   .recordType[  .name  ]:  type
+# type is split into fundamental type. file-read-type . argument
+# http://www.indexdata.dk/zebra/doc/zebra-cfg.tkl
+# http://www.indexdata.dk/zebra/doc/grs.tkl
+recordtype: dom.__ZEBRA_CONF_DIR__/authorities/etc/dom-config.xml
+
+recordId: (bib1,Local-Number)  
+# was auth1
+storeKeys:1
+storeData:1
+
+# Lock File Area
+lockDir: __ZEBRA_LOCK_DIR__/authorities
+perm.anonymous:r
+perm.__ZEBRA_USER__:rw
+register: __ZEBRA_DATA_DIR__/authorities/register:4G
+shadow: __ZEBRA_DATA_DIR__/authorities/shadow:1G 
+
+# Temp File area for result sets
+setTmpDir: __ZEBRA_DATA_DIR__/authorities/tmp
+
+# Temp File area for index program
+keyTmpDir: __ZEBRA_DATA_DIR__/authorities/key
+
+# Approx. Memory usage during indexing
+memMax: 50M
+rank:rank-1
+
index a55ed1f..dbee4da 100644 (file)
@@ -109,6 +109,8 @@ $prefix = $ENV{'INSTALL_BASE'} || "/usr";
   '__ZEBRA_RUN_DIR__' => "$prefix/var/run/zebradb",
   '__ZEBRA_MARC_FORMAT__' => 'marc21',
   '__ZEBRA_LANGUAGE__' => 'en',
+  '__ZEBRA_AUTH_CFG__' => 'zebra-authorities.cfg',
+  '__AUTH_RETRIEVAL_CFG__' => 'retrieval-info-auth-grs1.xml',
 );
 
 # Override configuration from the environment