Revert "Bug 20073: Move Elasticsearch configs to yaml files and improve the default...
authorJonathan Druart <jonathan.druart@bugs.koha-community.org>
Mon, 21 May 2018 14:02:04 +0000 (11:02 -0300)
committerJonathan Druart <jonathan.druart@bugs.koha-community.org>
Mon, 21 May 2018 14:02:16 +0000 (11:02 -0300)
This reverts commit f489d2034bbc3bff67230423a7986d5ea34060ec.

This commit breaks the install process when using debian packages.
Reverting as we are very close to the 18.05.00 release

Signed-off-by: Jonathan Druart <jonathan.druart@bugs.koha-community.org>

Koha/SearchEngine/Elasticsearch.pm
admin/searchengine/elasticsearch/mappings.yaml [moved from etc/searchengine/elasticsearch/mappings.yaml with 99% similarity]
debian/templates/koha-conf-site.xml.in
etc/koha-conf.xml
etc/searchengine/elasticsearch/field_config.yaml [deleted file]
etc/searchengine/elasticsearch/index_config.yaml [deleted file]
installer/data/mysql/kohastructure.sql
koha-tmpl/intranet-tmpl/prog/en/modules/admin/searchengine/elasticsearch/mappings.tt
t/Koha/SearchEngine/Elasticsearch.t

index e90af21..fe2e0ea 100644 (file)
@@ -34,6 +34,8 @@ use Search::Elasticsearch;
 use Try::Tiny;
 use YAML::Syck;
 
+use Data::Dumper;    # TODO remove
+
 __PACKAGE__->mk_ro_accessors(qw( index ));
 __PACKAGE__->mk_accessors(qw( sort_fields ));
 
@@ -135,14 +137,24 @@ A hashref containing the settings is returned.
 sub get_elasticsearch_settings {
     my ($self) = @_;
 
-    # Use state to speed up repeated calls
-    state $settings = undef;
-    if (!defined $settings) {
-        my $config_file = C4::Context->config('elasticsearch_index_config');
-        $config_file ||= C4::Context->config('intranetdir') . '/etc/searchengine/elasticsearch/index_config.yaml';
-        $settings = LoadFile( $config_file );
-    }
-
+    # Ultimately this should come from a file or something, and not be
+    # hardcoded.
+    my $settings = {
+        index => {
+            analysis => {
+                analyzer => {
+                    analyser_phrase => {
+                        tokenizer => 'icu_tokenizer',
+                        filter    => ['icu_folding'],
+                    },
+                    analyser_standard => {
+                        tokenizer => 'icu_tokenizer',
+                        filter    => ['icu_folding'],
+                    },
+                },
+            }
+        }
+    };
     return $settings;
 }
 
@@ -158,97 +170,116 @@ created.
 sub get_elasticsearch_mappings {
     my ($self) = @_;
 
-    # Use state to speed up repeated calls
-    state %all_mappings;
-    state %sort_fields;
-
-    if (!defined $all_mappings{$self->index}) {
-        $sort_fields{$self->index} = {};
-        my $mappings = {
-            data => _get_elasticsearch_mapping('general', '')
-        };
-        my $marcflavour = lc C4::Context->preference('marcflavour');
-        $self->_foreach_mapping(
-            sub {
-                my ( $name, $type, $facet, $suggestible, $sort, $marc_type ) = @_;
-                return if $marc_type ne $marcflavour;
-                # TODO if this gets any sort of complexity to it, it should
-                # be broken out into its own function.
-
-                # TODO be aware of date formats, but this requires pre-parsing
-                # as ES will simply reject anything with an invalid date.
-                my $es_type = 'text';
-                if ($type eq 'boolean') {
-                    $es_type = 'boolean';
-                } elsif ($type eq 'number' || $type eq 'sum') {
-                    $es_type = 'integer';
-                } elsif ($type eq 'isbn' || $type eq 'stdno') {
-                    $es_type = 'stdno';
-                }
-
-                $mappings->{data}{properties}{$name} = _get_elasticsearch_mapping('search', $es_type);
-
-                if ($facet) {
-                    $mappings->{data}{properties}{ $name . '__facet' } = _get_elasticsearch_mapping('facet', $es_type);
-                }
-                if ($suggestible) {
-                    $mappings->{data}{properties}{ $name . '__suggestion' } = _get_elasticsearch_mapping('suggestible', $es_type);
-                }
-                # Sort is a bit special as it can be true, false, undef.
-                # We care about "true" or "undef",
-                # "undef" means to do the default thing, which is make it sortable.
-                if (!defined $sort || $sort) {
-                    $mappings->{data}{properties}{ $name . '__sort' } = _get_elasticsearch_mapping('sort', $es_type);
-                    $sort_fields{$self->index}{$name} = 1;
-                }
+    # TODO cache in the object?
+    my $mappings = {
+        data => {
+            _all => {type => "string", analyzer => "analyser_standard"},
+            properties => {
+                record => {
+                    store          => "true",
+                    include_in_all => JSON::false,
+                    type           => "text",
+                },
+            }
+        }
+    };
+    my %sort_fields;
+    my $marcflavour = lc C4::Context->preference('marcflavour');
+    $self->_foreach_mapping(
+        sub {
+            my ( $name, $type, $facet, $suggestible, $sort, $marc_type ) = @_;
+            return if $marc_type ne $marcflavour;
+            # TODO if this gets any sort of complexity to it, it should
+            # be broken out into its own function.
+
+            # TODO be aware of date formats, but this requires pre-parsing
+            # as ES will simply reject anything with an invalid date.
+            my $es_type =
+              $type eq 'boolean'
+              ? 'boolean'
+              : 'text';
+
+            if ($es_type eq 'boolean') {
+                $mappings->{data}{properties}{$name} = _elasticsearch_mapping_for_boolean( $name, $es_type, $facet, $suggestible, $sort, $marc_type );
+                return; #Boolean cannot have facets nor sorting nor suggestions
+            } else {
+                $mappings->{data}{properties}{$name} = _elasticsearch_mapping_for_default( $name, $es_type, $facet, $suggestible, $sort, $marc_type );
             }
-        );
-        $all_mappings{$self->index} = $mappings;
-    }
-    $self->sort_fields(\%{$sort_fields{$self->index}});
 
-    return $all_mappings{$self->index};
+            if ($facet) {
+                $mappings->{data}{properties}{ $name . '__facet' } = {
+                    type  => "keyword",
+                };
+            }
+            if ($suggestible) {
+                $mappings->{data}{properties}{ $name . '__suggestion' } = {
+                    type => 'completion',
+                    analyzer => 'simple',
+                    search_analyzer => 'simple',
+                };
+            }
+            # Sort is a bit special as it can be true, false, undef.
+            # We care about "true" or "undef",
+            # "undef" means to do the default thing, which is make it sortable.
+            if ($sort || !defined $sort) {
+                $mappings->{data}{properties}{ $name . '__sort' } = {
+                    search_analyzer => "analyser_phrase",
+                    analyzer  => "analyser_phrase",
+                    type            => "text",
+                    include_in_all  => JSON::false,
+                    fields          => {
+                        phrase => {
+                            type            => "keyword",
+                        },
+                    },
+                };
+                $sort_fields{$name} = 1;
+            }
+        }
+    );
+    $self->sort_fields(\%sort_fields);
+    return $mappings;
 }
 
-=head2 _get_elasticsearch_mapping
+=head2 _elasticsearch_mapping_for_*
 
-Get the ES mappings for the given purpose and data type
+Get the ES mappings for the given data type or a special mapping case
 
-$mapping = _get_elasticsearch_mapping('search', 'text');
+Receives the same parameters from the $self->_foreach_mapping() dispatcher
 
 =cut
 
-sub _get_elasticsearch_mapping {
-
-    my ( $purpose, $type ) = @_;
+sub _elasticsearch_mapping_for_boolean {
+    my ( $name, $type, $facet, $suggestible, $sort, $marc_type ) = @_;
 
-    # Use state to speed up repeated calls
-    state $settings = undef;
-    if (!defined $settings) {
-        my $config_file = C4::Context->config('elasticsearch_field_config');
-        $config_file ||= C4::Context->config('intranetdir') . '/etc/searchengine/elasticsearch/field_config.yaml';
-        $settings = LoadFile( $config_file );
-    }
+    return {
+        type            => $type,
+        null_value      => 0,
+    };
+}
 
-    if (!defined $settings->{$purpose}) {
-        die "Field purpose $purpose not defined in field config";
-    }
-    if ($type eq '') {
-        return $settings->{$purpose};
-    }
-    if (defined $settings->{$purpose}{$type}) {
-        return $settings->{$purpose}{$type};
-    }
-    if (defined $settings->{$purpose}{'default'}) {
-        return $settings->{$purpose}{'default'};
-    }
-    return undef;
+sub _elasticsearch_mapping_for_default {
+    my ( $name, $type, $facet, $suggestible, $sort, $marc_type ) = @_;
+
+    return {
+        search_analyzer => "analyser_standard",
+        analyzer        => "analyser_standard",
+        type            => $type,
+        fields          => {
+            phrase => {
+                search_analyzer => "analyser_phrase",
+                analyzer        => "analyser_phrase",
+                type            => "text",
+            },
+            raw => {
+                type    => "keyword",
+            }
+        },
+    };
 }
 
 sub reset_elasticsearch_mappings {
-    my ( $reset_fields ) = @_;
-    my $mappings_yaml = C4::Context->config('elasticsearch_index_mappings');
-    $mappings_yaml ||= C4::Context->config('intranetdir') . '/etc/searchengine/elasticsearch/mappings.yaml';
+    my $mappings_yaml = C4::Context->config('intranetdir') . '/admin/searchengine/elasticsearch/mappings.yaml';
     my $indexes = LoadFile( $mappings_yaml );
 
     while ( my ( $index_name, $fields ) = each %$indexes ) {
@@ -1,5 +1,4 @@
 ---
-# Basic mappings from MARC fields to Elasticsearch fields.
 authorities:
   Corporate-name-see-also-from:
     label: Corporate-name-see-also-from
@@ -2163,7 +2162,7 @@ biblios:
         marc_type: unimarc
         sort: ~
         suggestible: ''
-    type: 'stdno'
+    type: ''
   isbn:
     label: isbn
     mappings:
@@ -2182,7 +2181,7 @@ biblios:
         marc_type: unimarc
         sort: ~
         suggestible: ''
-    type: 'isbn'
+    type: ''
   issn:
     label: issn
     mappings:
@@ -2201,7 +2200,7 @@ biblios:
         marc_type: unimarc
         sort: ~
         suggestible: ''
-    type: 'stdno'
+    type: ''
   issues:
     label: issues
     mappings:
index d2b47fc..9b11c71 100644 (file)
@@ -328,21 +328,10 @@ __END_SRU_PUBLICSERVER__
  <plack_max_requests>50</plack_max_requests>
  <plack_workers>2</plack_workers>
 
- <!-- Elasticsearch Configuration -->
  <elasticsearch>
      <server>localhost:9200</server>
      <index_name>koha___KOHASITE__</index_name>
  </elasticsearch>
- <!-- Uncomment the following line if you want to override the Elasticsearch default index settings -->
- <!-- <elasticsearch_index_config>__KOHA_CONF_DIR__/searchengine/elasticsearch/index_config.yaml</elasticsearch_index_config> -->
- <!-- Uncomment the following line if you want to override the Elasticsearch default field settings -->
- <!-- <elasticsearch_field_config>__KOHA_CONF_DIR__/searchengine/elasticsearch/field_config.yaml</elasticsearch_field_config> -->
- <!-- Uncomment the following line if you want to override the Elasticsearch index default settings.
-      Note that any changes made to the mappings file only take effect if you reset the mappings in
-      by visiting /cgi-bin/koha/admin/searchengine/elasticsearch/mappings.pl?op=reset&i_know_what_i_am_doing=1&reset_fields=1.
-      Resetting mappings will override any changes made in the Search engine configuration UI.
- -->
- <!-- <elasticsearch_index_mappings>__KOHA_CONF_DIR__/searchengine/elasticsearch/mappings.yaml</elasticsearch_index_mappings> -->
 
  <interlibrary_loans>
      <!-- Path to where Illbackends are located on the system
index 5416065..0dcf486 100644 (file)
@@ -159,21 +159,10 @@ __PAZPAR2_TOGGLE_XML_POST__
  <plack_max_requests>50</plack_max_requests>
  <plack_workers>2</plack_workers>
 
- <!-- Elasticsearch Configuration -->
  <elasticsearch>
      <server>localhost:9200</server>
      <index_name>koha___DB_NAME__</index_name>
  </elasticsearch>
- <!-- Uncomment the following line if you want to override the Elasticsearch default index settings -->
- <!-- <elasticsearch_index_config>__KOHA_CONF_DIR__/searchengine/elasticsearch/index_config.yaml</elasticsearch_index_config> -->
- <!-- Uncomment the following line if you want to override the Elasticsearch default field settings -->
- <!-- <elasticsearch_field_config>__KOHA_CONF_DIR__/searchengine/elasticsearch/field_config.yaml</elasticsearch_field_config> -->
- <!-- Uncomment the following line if you want to override the Elasticsearch index default settings.
-      Note that any changes made to the mappings file only take effect if you reset the mappings in
-      by visiting /cgi-bin/koha/admin/searchengine/elasticsearch/mappings.pl?op=reset&i_know_what_i_am_doing=1&reset_fields=1.
-      Resetting mappings will override any changes made in the Search engine configuration UI.
- -->
- <!-- <elasticsearch_index_mappings>__KOHA_CONF_DIR__/searchengine/elasticsearch/mappings.yaml</elasticsearch_index_mappings> -->
 
  <interlibrary_loans>
      <!-- Path to where Illbackends are located on the system
diff --git a/etc/searchengine/elasticsearch/field_config.yaml b/etc/searchengine/elasticsearch/field_config.yaml
deleted file mode 100644 (file)
index 535a43c..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
----
-# General field configuration
-general:
-  _all:
-    type: string
-    analyzer: analyser_standard
-  properties:
-    record:
-      store: true
-      type: text
-# Search fields
-search:
-  boolean:
-    type: boolean
-    null_value: false
-  integer:
-    type: integer
-    null_value: 0
-  stdno:
-    type: text
-    analyzer: analyser_stdno
-    search_analyzer: analyser_stdno
-    fields:
-      phrase:
-        type: text
-        analyzer: analyser_stdno
-        search_analyzer: analyser_stdno
-      raw:
-        type: keyword
-    copy_to: _all
-  default:
-    type: text
-    analyzer: analyser_standard
-    search_analyzer: analyser_standard
-    fields:
-      phrase:
-        type: text
-        analyzer: analyser_phrase
-        search_analyzer: analyser_phrase
-      raw:
-        type: keyword
-    copy_to: _all
-# Facets
-facet:
-  default:
-    type: keyword
-# Suggestible
-suggestible:
-  default:
-    type: completion
-    analyzer: simple
-    search_analyzer: simple
-# Sort
-sort:
-  default:
-    type: text
-    analyzer: analyser_phrase
-    search_analyzer: analyser_phrase
-    fields:
-      phrase:
-        type: keyword
diff --git a/etc/searchengine/elasticsearch/index_config.yaml b/etc/searchengine/elasticsearch/index_config.yaml
deleted file mode 100644 (file)
index bdfcdf5..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
----
-# Index configuration that defines how different analyzers work.
-index:
-  analysis:
-    analyzer:
-      # Phrase analyzer is used for phrases (phrase match, sorting)
-      analyser_phrase:
-        tokenizer: keyword
-        filter:
-          - icu_folding
-        char_filter:
-          - punctuation
-      analyser_standard:
-        tokenizer: icu_tokenizer
-        filter:
-          - icu_folding
-      analyser_stdno:
-        tokenizer: whitespace
-        filter:
-          - icu_folding
-        char_filter:
-          - punctuation
-    normalizer:
-      normalizer_keyword:
-        type: custom
-        filter:
-          - icu_folding
-    char_filter:
-      # The punctuation filter is used to remove any punctuation chars in fields that don't use icu_tokenizer.
-      punctuation:
-        type: pattern_replace
-        # The pattern contains all ASCII punctuation characters.
-        pattern: '([\x00-\x1F,\x21-\x2F,\x3A-\x40,\x5B-\x60,\x7B-\x89,\x8B,\x8D,\x8F,\x90-\x99,\x9B,\x9D,\xA0-\xBF,\xD7,\xF7])'
-        replacement: ''
index f7970c0..909eda2 100644 (file)
@@ -1459,7 +1459,7 @@ CREATE TABLE `search_field` (
   `id` int(11) NOT NULL AUTO_INCREMENT,
   `name` varchar(255) NOT NULL COMMENT 'the name of the field as it will be stored in the search engine',
   `label` varchar(255) NOT NULL COMMENT 'the human readable name of the field, for display',
-  `type` ENUM('', 'string', 'date', 'number', 'boolean', 'sum', 'isbn', 'stdno') NOT NULL COMMENT 'what type of data this holds, relevant when storing it in the search engine',
+  `type` ENUM('', 'string', 'date', 'number', 'boolean', 'sum') NOT NULL COMMENT 'what type of data this holds, relevant when storing it in the search engine',
   PRIMARY KEY (`id`),
   UNIQUE KEY (`name` (191))
 ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
index 1cb3c41..74466db 100644 (file)
@@ -164,16 +164,6 @@ a.add, a.delete {
                           [% ELSE %]
                             <option value="sum">Sum</option>
                           [% END %]
-                          [% IF search_field.type == "isbn" %]
-                            <option value="isbn" selected="selected">ISBN</option>
-                          [% ELSE %]
-                            <option value="isbn">ISBN</option>
-                          [% END %]
-                          [% IF search_field.type == "stdno" %]
-                            <option value="stdno" selected="selected">Std. Number</option>
-                          [% ELSE %]
-                            <option value="stdno">Std. Number</option>
-                          [% END %]
                         </select>
                       </td>
                     </tr>
index b91fcba..f7a7f27 100644 (file)
@@ -17,7 +17,7 @@
 
 use Modern::Perl;
 
-use Test::More tests => 3;
+use Test::More tests => 1;
 use Test::Exception;
 
 use t::lib::Mocks;
@@ -84,27 +84,3 @@ subtest '_read_configuration() tests' => sub {
     is( $configuration->{index_name}, 'index', 'Index configuration parsed correctly' );
     is_deeply( $configuration->{nodes}, \@servers , 'Server configuration parsed correctly' );
 };
-
-subtest 'get_elasticsearch_settings() tests' => sub {
-
-    plan tests => 1;
-
-    my $settings;
-
-    # test reading index settings
-    my $es = Koha::SearchEngine::Elasticsearch->new( {index => $Koha::SearchEngine::Elasticsearch::BIBLIOS_INDEX} );
-    $settings = $es->get_elasticsearch_settings();
-    is( $settings->{index}{analysis}{analyzer}{analyser_phrase}{tokenizer}, 'keyword', 'Index settings parsed correctly' );
-};
-
-subtest 'get_elasticsearch_mappings() tests' => sub {
-
-    plan tests => 1;
-
-    my $mappings;
-
-    # test reading mappings
-    my $es = Koha::SearchEngine::Elasticsearch->new( {index => $Koha::SearchEngine::Elasticsearch::BIBLIOS_INDEX} );
-    $mappings = $es->get_elasticsearch_mappings();
-    is( $mappings->{data}{_all}{type}, 'string', 'Field mappings parsed correctly' );
-};