3 # Copyright Biblibre 2008
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # Koha is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
24 use CGI qw( :standard -oldstyle_urls -utf8 );
30 eval { require PerlIO::gzip };
34 unless ( C4::Context->preference('OAI-PMH') ) {
37 -type => 'text/plain; charset=utf-8',
39 -status => '404 OAI-PMH service is disabled',
41 "OAI-PMH service is disabled";
45 my @encodings = http('HTTP_ACCEPT_ENCODING');
46 if ( $GZIP && grep { defined($_) && $_ eq 'gzip' } @encodings ) {
48 -type => 'text/xml; charset=utf-8',
50 -Content-Encoding => 'gzip',
52 binmode( STDOUT, ":gzip" );
56 -type => 'text/xml; charset=utf-8',
61 binmode STDOUT, ':encoding(UTF-8)';
62 my $repository = C4::OAI::Repository->new();
68 # Extends HTTP::OAI::ResumptionToken
69 # A token is identified by:
75 package C4::OAI::ResumptionToken;
81 use base ("HTTP::OAI::ResumptionToken");
85 my ($class, %args) = @_;
87 my $self = $class->SUPER::new(%args);
89 my ($metadata_prefix, $offset, $from, $until, $set);
90 if ( $args{ resumptionToken } ) {
91 ($metadata_prefix, $offset, $from, $until, $set)
92 = split( '/', $args{resumptionToken} );
95 $metadata_prefix = $args{ metadataPrefix };
96 $from = $args{ from } || '1970-01-01';
97 $until = $args{ until };
99 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) = gmtime( time );
100 $until = sprintf( "%.4d-%.2d-%.2d", $year+1900, $mon+1,$mday );
102 #Add times to the arguments, when necessary, so they correctly match against the DB timestamps
103 $from .= 'T00:00:00Z' if length($from) == 10;
104 $until .= 'T23:59:59Z' if length($until) == 10;
105 $offset = $args{ offset } || 0;
109 $self->{ metadata_prefix } = $metadata_prefix;
110 $self->{ offset } = $offset;
111 $self->{ from } = $from;
112 $self->{ until } = $until;
113 $self->{ set } = $set;
114 $self->{ from_arg } = _strip_UTC_designators($from);
115 $self->{ until_arg } = _strip_UTC_designators($until);
117 $self->resumptionToken(
118 join( '/', $metadata_prefix, $offset, $from, $until, $set ) );
119 $self->cursor( $offset );
124 sub _strip_UTC_designators {
125 my ( $timestamp ) = @_;
126 $timestamp =~ s/T/ /g;
127 $timestamp =~ s/Z//g;
131 # __END__ C4::OAI::ResumptionToken
135 package C4::OAI::Identify;
142 use base ("HTTP::OAI::Identify");
145 my ($class, $repository) = @_;
147 my ($baseURL) = $repository->self_url() =~ /(.*)\?.*/;
148 my $self = $class->SUPER::new(
150 repositoryName => C4::Context->preference("LibraryName"),
151 adminEmail => C4::Context->preference("KohaAdminEmailAddress"),
152 MaxCount => C4::Context->preference("OAI-PMH:MaxCount"),
153 granularity => 'YYYY-MM-DD',
154 earliestDatestamp => '0001-01-01',
155 deletedRecord => C4::Context->preference("OAI-PMH:DeletedRecord") || 'no',
158 # FIXME - alas, the description element is not so simple; to validate
159 # against the OAI-PMH schema, it cannot contain just a string,
160 # but one or more elements that validate against another XML schema.
161 # For now, simply omitting it.
162 # $self->description( "Koha OAI Repository" );
164 $self->compression( 'gzip' );
169 # __END__ C4::OAI::Identify
173 package C4::OAI::ListMetadataFormats;
179 use base ("HTTP::OAI::ListMetadataFormats");
182 my ($class, $repository) = @_;
184 my $self = $class->SUPER::new();
186 if ( $repository->{ conf } ) {
187 foreach my $name ( @{ $repository->{ koha_metadata_format } } ) {
188 my $format = $repository->{ conf }->{ format }->{ $name };
189 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
190 metadataPrefix => $format->{metadataPrefix},
191 schema => $format->{schema},
192 metadataNamespace => $format->{metadataNamespace}, ) );
196 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
197 metadataPrefix => 'oai_dc',
198 schema => 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
199 metadataNamespace => 'http://www.openarchives.org/OAI/2.0/oai_dc/'
201 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
202 metadataPrefix => 'marcxml',
203 schema => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim.xsd',
204 metadataNamespace => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim'
211 # __END__ C4::OAI::ListMetadataFormats
215 package C4::OAI::Record;
220 use HTTP::OAI::Metadata::OAI_DC;
222 use base ("HTTP::OAI::Record");
225 my ($class, $repository, $marcxml, $timestamp, $setSpecs, %args) = @_;
227 my $self = $class->SUPER::new(%args);
229 $timestamp =~ s/ /T/, $timestamp .= 'Z';
230 $self->header( new HTTP::OAI::Header(
231 identifier => $args{identifier},
232 datestamp => $timestamp,
235 foreach my $setSpec (@$setSpecs) {
236 $self->header->setSpec($setSpec);
239 my $parser = XML::LibXML->new();
240 my $record_dom = $parser->parse_string( $marcxml );
241 my $format = $args{metadataPrefix};
242 if ( $format ne 'marcxml' ) {
244 OPACBaseURL => "'" . C4::Context->preference('OPACBaseURL') . "'"
246 $record_dom = $repository->stylesheet($format)->transform($record_dom, %args);
248 $self->metadata( HTTP::OAI::Metadata->new( dom => $record_dom ) );
253 # __END__ C4::OAI::Record
255 package C4::OAI::DeletedRecord;
259 use HTTP::OAI::Metadata::OAI_DC;
261 use base ("HTTP::OAI::Record");
264 my ($class, $timestamp, $setSpecs, %args) = @_;
266 my $self = $class->SUPER::new(%args);
268 $timestamp =~ s/ /T/, $timestamp .= 'Z';
269 $self->header( new HTTP::OAI::Header(
271 identifier => $args{identifier},
272 datestamp => $timestamp,
275 foreach my $setSpec (@$setSpecs) {
276 $self->header->setSpec($setSpec);
282 # __END__ C4::OAI::DeletedRecord
286 package C4::OAI::GetRecord;
295 use base ("HTTP::OAI::GetRecord");
299 my ($class, $repository, %args) = @_;
301 my $self = HTTP::OAI::GetRecord->new(%args);
303 my $dbh = C4::Context->dbh;
304 my $sth = $dbh->prepare("
307 WHERE biblionumber=? " );
308 my $prefix = $repository->{koha_identifier} . ':';
309 my ($biblionumber) = $args{identifier} =~ /^$prefix(.*)/;
310 $sth->execute( $biblionumber );
312 unless ( ($timestamp) = $sth->fetchrow ) {
313 unless ( ($timestamp) = $dbh->selectrow_array(q/
316 WHERE biblionumber=? /, undef, $biblionumber ))
318 return HTTP::OAI::Response->new(
319 requestURL => $repository->self_url(),
320 errors => [ new HTTP::OAI::Error(
321 code => 'idDoesNotExist',
322 message => "There is no biblio record with this identifier",
331 # We fetch it using this method, rather than the database directly,
332 # so it'll include the item data
335 my $record = GetMarcBiblio($biblionumber, 1);
336 $marcxml = $record->as_xml();
338 my $oai_sets = GetOAISetsBiblio($biblionumber);
340 foreach (@$oai_sets) {
341 push @setSpecs, $_->{spec};
344 #$self->header( HTTP::OAI::Header->new( identifier => $args{identifier} ) );
347 ? C4::OAI::DeletedRecord->new($timestamp, \@setSpecs, %args)
348 : C4::OAI::Record->new($repository, $marcxml, $timestamp, \@setSpecs, %args);
353 # __END__ C4::OAI::GetRecord
357 package C4::OAI::ListIdentifiers;
364 use base ("HTTP::OAI::ListIdentifiers");
368 my ($class, $repository, %args) = @_;
370 my $self = HTTP::OAI::ListIdentifiers->new(%args);
372 my $token = new C4::OAI::ResumptionToken( %args );
373 my $dbh = C4::Context->dbh;
375 if(defined $token->{'set'}) {
376 $set = GetOAISetBySpec($token->{'set'});
378 my $max = $repository->{koha_max_count};
380 (SELECT biblioitems.biblionumber, biblioitems.timestamp
383 $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
384 $sql .= " WHERE timestamp >= ? AND timestamp <= ? ";
385 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
387 (SELECT deletedbiblio.biblionumber, timestamp FROM deletedbiblio";
388 $sql .= " JOIN oai_sets_biblios ON deletedbiblio.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
389 $sql .= " WHERE DATE(timestamp) >= ? AND DATE(timestamp) <= ? ";
390 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
392 $sql .= ") ORDER BY biblionumber
393 LIMIT " . ($max+1) . "
394 OFFSET $token->{offset}
396 my $sth = $dbh->prepare( $sql );
397 my @bind_params = ($token->{'from_arg'}, $token->{'until_arg'});
398 push @bind_params, $set->{'id'} if defined $set;
399 push @bind_params, ($token->{'from'}, $token->{'until'});
400 push @bind_params, $set->{'id'} if defined $set;
401 $sth->execute( @bind_params );
404 while ( my ($biblionumber, $timestamp) = $sth->fetchrow ) {
406 if ( $count > $max ) {
407 $self->resumptionToken(
408 new C4::OAI::ResumptionToken(
409 metadataPrefix => $token->{metadata_prefix},
410 from => $token->{from},
411 until => $token->{until},
412 offset => $token->{offset} + $max,
418 $timestamp =~ s/ /T/, $timestamp .= 'Z';
419 $self->identifier( new HTTP::OAI::Header(
420 identifier => $repository->{ koha_identifier} . ':' . $biblionumber,
421 datestamp => $timestamp,
425 # Return error if no results
427 return HTTP::OAI::Response->new(
428 requestURL => $repository->self_url(),
429 errors => [ new HTTP::OAI::Error( code => 'noRecordsMatch' ) ],
436 # __END__ C4::OAI::ListIdentifiers
438 package C4::OAI::Description;
443 use HTTP::OAI::SAXHandler qw/ :SAX /;
446 my ( $class, %args ) = @_;
450 if(my $setDescription = $args{setDescription}) {
451 $self->{setDescription} = $setDescription;
453 if(my $handler = $args{handler}) {
454 $self->{handler} = $handler;
462 my ( $self, $handler ) = @_;
464 $self->{handler} = $handler if $handler;
472 g_data_element($self->{handler}, 'http://www.openarchives.org/OAI/2.0/', 'setDescription', {}, $self->{setDescription});
477 # __END__ C4::OAI::Description
479 package C4::OAI::ListSets;
486 use base ("HTTP::OAI::ListSets");
489 my ( $class, $repository, %args ) = @_;
491 my $self = HTTP::OAI::ListSets->new(%args);
493 my $token = C4::OAI::ResumptionToken->new(%args);
494 my $sets = GetOAISets;
496 foreach my $set (@$sets) {
497 if ($pos < $token->{offset}) {
502 foreach my $desc (@{$set->{'descriptions'}}) {
503 push @descriptions, C4::OAI::Description->new(
504 setDescription => $desc,
509 setSpec => $set->{'spec'},
510 setName => $set->{'name'},
511 setDescription => \@descriptions,
515 last if ($pos + 1 - $token->{offset}) > $repository->{koha_max_count};
518 $self->resumptionToken(
519 new C4::OAI::ResumptionToken(
520 metadataPrefix => $token->{metadata_prefix},
523 ) if ( $pos > $token->{offset} );
528 # __END__ C4::OAI::ListSets;
530 package C4::OAI::ListRecords;
539 use base ("HTTP::OAI::ListRecords");
543 my ($class, $repository, %args) = @_;
545 my $self = HTTP::OAI::ListRecords->new(%args);
547 my $token = new C4::OAI::ResumptionToken( %args );
548 my $dbh = C4::Context->dbh;
550 if(defined $token->{'set'}) {
551 $set = GetOAISetBySpec($token->{'set'});
553 my $max = $repository->{koha_max_count};
555 SELECT biblioitems.biblionumber, biblioitems.timestamp
558 $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
559 $sql .= " WHERE timestamp >= ? AND timestamp <= ? ";
560 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
562 (SELECT deletedbiblio.biblionumber, null as marcxml, timestamp FROM deletedbiblio";
563 $sql .= " JOIN oai_sets_biblios ON deletedbiblio.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
564 $sql .= " WHERE DATE(timestamp) >= ? AND DATE(timestamp) <= ? ";
565 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
567 $sql .= ") ORDER BY biblionumber
568 LIMIT " . ($max + 1) . "
569 OFFSET $token->{offset}
571 my $sth = $dbh->prepare( $sql );
572 my @bind_params = ($token->{'from_arg'}, $token->{'until_arg'});
573 push @bind_params, $set->{'id'} if defined $set;
574 push @bind_params, ($token->{'from'}, $token->{'until'});
575 push @bind_params, $set->{'id'} if defined $set;
576 $sth->execute( @bind_params );
579 while ( my ($biblionumber, $timestamp) = $sth->fetchrow ) {
581 if ( $count > $max ) {
582 $self->resumptionToken(
583 new C4::OAI::ResumptionToken(
584 metadataPrefix => $token->{metadata_prefix},
585 from => $token->{from},
586 until => $token->{until},
587 offset => $token->{offset} + $max,
593 my $record = GetMarcBiblio($biblionumber, 1, 1);
594 my $marcxml = $record->as_xml();
595 my $oai_sets = GetOAISetsBiblio($biblionumber);
597 foreach (@$oai_sets) {
598 push @setSpecs, $_->{spec};
601 $self->record( C4::OAI::Record->new(
602 $repository, $marcxml, $timestamp, \@setSpecs,
603 identifier => $repository->{ koha_identifier } . ':' . $biblionumber,
604 metadataPrefix => $token->{metadata_prefix}
607 $self->record( C4::OAI::DeletedRecord->new(
608 $timestamp, \@setSpecs, identifier => $repository->{ koha_identifier } . ':' . $biblionumber ) );
612 # Return error if no results
614 return HTTP::OAI::Response->new(
615 requestURL => $repository->self_url(),
616 errors => [ new HTTP::OAI::Error( code => 'noRecordsMatch' ) ],
623 # __END__ C4::OAI::ListRecords
627 package C4::OAI::Repository;
629 use base ("HTTP::OAI::Repository");
635 use HTTP::OAI::Repository qw/:validate/;
637 use XML::SAX::Writer;
640 use YAML::Syck qw( LoadFile );
641 use CGI qw/:standard -oldstyle_urls/;
648 my ($class, %args) = @_;
649 my $self = $class->SUPER::new(%args);
651 $self->{ koha_identifier } = C4::Context->preference("OAI-PMH:archiveID");
652 $self->{ koha_max_count } = C4::Context->preference("OAI-PMH:MaxCount");
653 $self->{ koha_metadata_format } = ['oai_dc', 'marcxml'];
654 $self->{ koha_stylesheet } = { }; # Build when needed
656 # Load configuration file if defined in OAI-PMH:ConfFile syspref
657 if ( my $file = C4::Context->preference("OAI-PMH:ConfFile") ) {
658 $self->{ conf } = LoadFile( $file );
659 my @formats = keys %{ $self->{conf}->{format} };
660 $self->{ koha_metadata_format } = \@formats;
663 # Check for grammatical errors in the request
664 my @errs = validate_request( CGI::Vars() );
666 # Is metadataPrefix supported by the respository?
667 my $mdp = param('metadataPrefix') || '';
668 if ( $mdp && !grep { $_ eq $mdp } @{$self->{ koha_metadata_format }} ) {
669 push @errs, new HTTP::OAI::Error(
670 code => 'cannotDisseminateFormat',
671 message => "Dissemination as '$mdp' is not supported",
677 $response = HTTP::OAI::Response->new(
678 requestURL => self_url(),
683 my %attr = CGI::Vars();
684 my $verb = delete( $attr{verb} );
685 if ( $verb eq 'ListSets' ) {
686 $response = C4::OAI::ListSets->new($self, %attr);
688 elsif ( $verb eq 'Identify' ) {
689 $response = C4::OAI::Identify->new( $self );
691 elsif ( $verb eq 'ListMetadataFormats' ) {
692 $response = C4::OAI::ListMetadataFormats->new( $self );
694 elsif ( $verb eq 'GetRecord' ) {
695 $response = C4::OAI::GetRecord->new( $self, %attr );
697 elsif ( $verb eq 'ListRecords' ) {
698 $response = C4::OAI::ListRecords->new( $self, %attr );
700 elsif ( $verb eq 'ListIdentifiers' ) {
701 $response = C4::OAI::ListIdentifiers->new( $self, %attr );
705 $response->set_handler( XML::SAX::Writer->new( Output => *STDOUT ) );
714 my ( $self, $format ) = @_;
716 my $stylesheet = $self->{ koha_stylesheet }->{ $format };
717 unless ( $stylesheet ) {
718 my $xsl_file = $self->{ conf }
719 ? $self->{ conf }->{ format }->{ $format }->{ xsl_file }
720 : ( C4::Context->config('intrahtdocs') .
722 C4::Context->preference('marcflavour') .
724 my $parser = XML::LibXML->new();
725 my $xslt = XML::LibXSLT->new();
726 my $style_doc = $parser->parse_file( $xsl_file );
727 $stylesheet = $xslt->parse_stylesheet( $style_doc );
728 $self->{ koha_stylesheet }->{ $format } = $stylesheet;
738 C4::OAI::Repository - Handles OAI-PMH requests for a Koha database.
742 use C4::OAI::Repository;
744 my $repository = C4::OAI::Repository->new();
748 This object extend HTTP::OAI::Repository object.
749 It accepts OAI-PMH HTTP requests and returns result.
751 This OAI-PMH server can operate in a simple mode and extended one.
753 In simple mode, repository configuration comes entirely from Koha system
754 preferences (OAI-PMH:archiveID and OAI-PMH:MaxCount) and the server returns
755 records in marcxml or dublin core format. Dublin core records are created from
756 koha marcxml records tranformed with XSLT. Used XSL file is located in
757 koha-tmpl/intranet-tmpl/prog/en/xslt directory and choosed based on marcflavour,
758 respecively MARC21slim2OAIDC.xsl for MARC21 and MARC21slim2OAIDC.xsl for
761 In extende mode, it's possible to parameter other format than marcxml or Dublin
762 Core. A new syspref OAI-PMH:ConfFile specify a YAML configuration file which
763 list available metadata formats and XSL file used to create them from marcxml
764 records. If this syspref isn't set, Koha OAI server works in simple mode. A
765 configuration file koha-oai.conf can look like that:
771 metadataNamespace: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs
772 schema: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs.xsd
773 xsl_file: /usr/local/koha/xslt/vs.xsl
775 metadataPrefix: marxml
776 metadataNamespace: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim
777 schema: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd
779 metadataPrefix: oai_dc
780 metadataNamespace: http://www.openarchives.org/OAI/2.0/oai_dc/
781 schema: http://www.openarchives.org/OAI/2.0/oai_dc.xsd
782 xsl_file: /usr/local/koha/koha-tmpl/intranet-tmpl/xslt/UNIMARCslim2OAIDC.xsl