1 package Koha::SearchEngine::Elasticsearch::Indexer;
3 # Copyright 2013 Catalyst IT
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it under the
8 # terms of the GNU General Public License as published by the Free Software
9 # Foundation; either version 3 of the License, or (at your option) any later
12 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
14 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License along
17 # with Koha; if not, write to the Free Software Foundation, Inc.,
18 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 use List::Util qw(any);
24 use base qw(Koha::SearchEngine::Elasticsearch);
27 # For now just marc, but we can do anything here really
28 use Catmandu::Importer::MARC;
29 use Catmandu::Store::ElasticSearch;
34 Koha::SearchEngine::Elasticsearch::Indexer->mk_accessors(qw( store ));
38 Koha::SearchEngine::Elasticsearch::Indexer - handles adding new records to the index
42 my $indexer = Koha::SearchEngine::Elasticsearch::Indexer->new(
43 { index => Koha::SearchEngine::BIBLIOS_INDEX } );
44 $indexer->drop_index();
45 $indexer->update_index(\@biblionumbers, \@records);
52 =item C<Koha::SearchEngine::Elasticsearch::Indexer::INDEX_STATUS_OK>
54 Represents an index state where index is created and in a working state.
56 =item C<Koha::SearchEngine::Elasticsearch::Indexer::INDEX_STATUS_REINDEX_REQUIRED>
58 Not currently used, but could be useful later, for example if can detect when new field or mapping added.
60 =item C<Koha::SearchEngine::Elasticsearch::Indexer::INDEX_STATUS_RECREATE_REQUIRED>
62 Representings an index state where index needs to be recreated and is not in a working state.
70 INDEX_STATUS_REINDEX_REQUIRED => 1,
71 INDEX_STATUS_RECREATE_REQUIRED => 2,
76 =head2 update_index($biblionums, $records)
79 $self->update_index($biblionums, $records);
81 die("Something went wrong trying to update index:" . $_[0]);
84 Converts C<MARC::Records> C<$records> to Elasticsearch documents and performs
85 an update request for these records on the Elasticsearch index.
87 The values in the arrays must match up, and the 999$c value in the MARC record
88 will be rewritten using the values in C<$biblionums> to ensure they are correct.
89 If C<$biblionums> is C<undef>, this won't happen, so in that case you should make
90 sure that 999$c is correct.
92 Note that this will modify the original record if C<$biblionums> is supplied.
93 If that's a problem, clone them first.
99 Arrayref of biblio numbers for the C<$records>, the order must be the same as
100 and match up with C<$records>.
104 Arrayref of C<MARC::Record>s.
111 my ($self, $biblionums, $records) = @_;
114 $self->_sanitise_records($biblionums, $records);
117 my $conf = $self->get_elasticsearch_params();
118 my $elasticsearch = $self->get_elasticsearch();
119 my $documents = $self->marc_records_to_documents($records);
122 foreach my $document_info (@{$documents}) {
123 my ($id, $document) = @{$document_info};
129 push @body, $document;
132 my $response = $elasticsearch->bulk(
133 index => $conf->{index_name},
134 type => 'data', # is just hard coded in Indexer.pm?
138 # TODO: handle response
142 =head2 set_index_status_ok
144 Convenience method for setting index status to C<INDEX_STATUS_OK>.
148 sub set_index_status_ok {
150 $self->index_status(INDEX_STATUS_OK);
153 =head2 is_index_status_ok
155 Convenience method for checking if index status is C<INDEX_STATUS_OK>.
159 sub is_index_status_ok {
161 return $self->index_status == INDEX_STATUS_OK;
164 =head2 set_index_status_reindex_required
166 Convenience method for setting index status to C<INDEX_REINDEX_REQUIRED>.
170 sub set_index_status_reindex_required {
172 $self->index_status(INDEX_STATUS_REINDEX_REQUIRED);
175 =head2 is_index_status_reindex_required
177 Convenience method for checking if index status is C<INDEX_STATUS_REINDEX_REQUIRED>.
181 sub is_index_status_reindex_required {
183 return $self->index_status == INDEX_STATUS_REINDEX_REQUIRED;
186 =head2 set_index_status_recreate_required
188 Convenience method for setting index status to C<INDEX_STATUS_RECREATE_REQUIRED>.
192 sub set_index_status_recreate_required {
194 $self->index_status(INDEX_STATUS_RECREATE_REQUIRED);
197 =head2 is_index_status_recreate_required
199 Convenience method for checking if index status is C<INDEX_STATUS_RECREATE_REQUIRED>.
203 sub is_index_status_recreate_required {
205 return $self->index_status == INDEX_STATUS_RECREATE_REQUIRED;
208 =head2 index_status($status)
210 Will either set the current index status to C<$status> and return C<$status>,
211 or return the current index status if called with no arguments.
217 Optional argument. If passed will set current index status to C<$status> if C<$status> is
218 a valid status. See L</CONSTANTS>.
225 my ($self, $status) = @_;
226 my $key = 'ElasticsearchIndexStatus_' . $self->index;
228 if (defined $status) {
229 unless (any { $status == $_ } (
231 INDEX_STATUS_REINDEX_REQUIRED,
232 INDEX_STATUS_RECREATE_REQUIRED,
235 Koha::Exceptions::Exception->throw("Invalid index status: $status");
237 C4::Context->set_preference($key, $status);
241 return C4::Context->preference($key);
245 =head2 update_mappings
247 Generate Elasticsearch mappings from mappings stored in database and
248 perform a request to update Elasticsearch index mappings. Will throw an
249 error and set index status to C<INDEX_STATUS_RECREATE_REQUIRED> if update
254 sub update_mappings {
256 my $conf = $self->get_elasticsearch_params();
257 my $elasticsearch = $self->get_elasticsearch();
258 my $mappings = $self->get_elasticsearch_mappings();
260 foreach my $type (keys %{$mappings}) {
262 my $response = $elasticsearch->indices->put_mapping(
263 index => $conf->{index_name},
266 $type => $mappings->{$type}
270 $self->set_index_status_recreate_required();
271 my $reason = $_[0]->{vars}->{body}->{error}->{reason};
272 Koha::Exceptions::Exception->throw(
273 error => "Unable to update mappings for index \"$conf->{index_name}\". Reason was: \"$reason\". Index needs to be recreated and reindexed",
277 $self->set_index_status_ok();
280 =head2 update_index_background($biblionums, $records)
282 This has exactly the same API as C<update_index> however it'll
283 return immediately. It'll start a background process that does the adding.
285 If it fails to add to Elasticsearch then it'll add to a queue that will cause
286 it to be updated by a regular index cron job in the future.
290 # TODO implement in the future - I don't know the best way of doing this yet.
291 # If fork: make sure process group is changed so apache doesn't wait for us.
293 sub update_index_background {
295 $self->update_index(@_);
298 =head2 delete_index($biblionums)
300 C<$biblionums> is an arrayref of biblionumbers to delete from the index.
305 my ($self, $biblionums) = @_;
307 if ( !$self->store ) {
308 my $params = $self->get_elasticsearch_params();
310 Catmandu::Store::ElasticSearch->new(
312 index_settings => $self->get_elasticsearch_settings(),
313 index_mappings => $self->get_elasticsearch_mappings(),
317 $self->store->bag->delete($_) foreach @$biblionums;
318 $self->store->bag->commit;
321 =head2 delete_index_background($biblionums)
323 Identical to L</delete_index($biblionums)>
327 # TODO: Should be made async
328 sub delete_index_background {
330 $self->delete_index(@_);
335 Drops the index from the Elasticsearch server.
341 if ($self->index_exists) {
342 my $conf = $self->get_elasticsearch_params();
343 my $elasticsearch = $self->get_elasticsearch();
344 $elasticsearch->indices->delete(index => $conf->{index_name});
345 $self->set_index_status_recreate_required();
351 Creates the index (including mappings) on the Elasticsearch server.
357 my $conf = $self->get_elasticsearch_params();
358 my $settings = $self->get_elasticsearch_settings();
359 my $elasticsearch = $self->get_elasticsearch();
360 $elasticsearch->indices->create(
361 index => $conf->{index_name},
363 settings => $settings
366 $self->update_mappings();
371 Checks if index has been created on the Elasticsearch server. Returns C<1> or the
372 empty string to indicate whether index exists or not.
378 my $conf = $self->get_elasticsearch_params();
379 my $elasticsearch = $self->get_elasticsearch();
380 return $elasticsearch->indices->exists(
381 index => $conf->{index_name},
385 sub _sanitise_records {
386 my ($self, $biblionums, $records) = @_;
388 confess "Unequal number of values in \$biblionums and \$records." if (@$biblionums != @$records);
390 my $c = @$biblionums;
391 for (my $i=0; $i<$c; $i++) {
392 my $bibnum = $biblionums->[$i];
393 my $rec = $records->[$i];
394 # I've seen things you people wouldn't believe. Attack ships on fire
395 # off the shoulder of Orion. I watched C-beams glitter in the dark near
396 # the Tannhauser gate. MARC records where 999$c doesn't match the
397 # biblionumber column. All those moments will be lost in time... like
400 $rec->delete_fields($rec->field('999'));
401 # Make sure biblionumber is a string. Elasticsearch would consider int and string different IDs.
402 $rec->append_fields(MARC::Field->new('999','','','c' => "" . $bibnum, 'd' => "" . $bibnum));
415 =item Chris Cormack C<< <chrisc@catalyst.net.nz> >>
417 =item Robin Sheat C<< <robin@catalyst.net.nz> >>