From 60b7b62b3f4c34fce79573413fa8f40e64e36c68 Mon Sep 17 00:00:00 2001 From: Galen Charlton Date: Thu, 27 Apr 2023 18:34:45 +0000 Subject: [PATCH] LP#1997485: (follow-up) flesh out reingest intructions Signed-off-by: Galen Charlton --- .../YYYY.schema.DYM-authority-data-dictionary.sql | 119 ++++++++++++++++++-- .../OPAC/DidYouMean_multi-term.adoc | 7 + 2 files changed, 118 insertions(+), 8 deletions(-) diff --git a/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.DYM-authority-data-dictionary.sql b/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.DYM-authority-data-dictionary.sql index 06229f8..6fca3b1 100644 --- a/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.DYM-authority-data-dictionary.sql +++ b/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.DYM-authority-data-dictionary.sql @@ -153,10 +153,87 @@ CREATE TRIGGER maintain_symspell_entries_tgr COMMIT; --- Generate symspell sideloader data with authority headings included. +\qecho '' +\qecho 'If the Evergreen database has authority records, a reingest of' +\qecho 'the search suggestion dictionary is recommended.' +\qecho '' +\qecho 'The following should be run at the end of the upgrade before any' +\qecho 'reingest occurs. Because new triggers are installed already,' +\qecho 'updates to indexed strings will cause zero-count dictionary entries' +\qecho 'to be recorded which will require updating every row again (or' +\qecho 'starting from scratch) so best to do this before other batch' +\qecho 'changes. A later reingest that does not significantly change' +\qecho 'indexed strings will /not/ cause table bloat here, and will be' +\qecho 'as fast as normal. A copy of the SQL in a ready-to-use, non-escaped' +\qecho 'form is available inside a comment at the end of this upgrade sub-' +\qecho 'script so you do not need to copy this comment from the psql ouptut.' +\qecho '' +\qecho '\\a' +\qecho '\\t' +\qecho '' +\qecho '\\o title' +\qecho 'select value from metabib.title_field_entry;' +\qecho 'select h.value' +\qecho ' from authority.simple_heading h' +\qecho ' join authority.control_set_auth_field_metabib_field_map_refs a on (a.authority_field = h.atag)' +\qecho ' join config.metabib_field m on (a.metabib_field=m.id and m.field_class=\'title\');' +\qecho '\\o author' +\qecho 'select value from metabib.author_field_entry;' +\qecho 'select h.value' +\qecho ' from authority.simple_heading h' +\qecho ' join authority.control_set_auth_field_metabib_field_map_refs a on (a.authority_field = h.atag)' +\qecho ' join config.metabib_field m on (a.metabib_field=m.id and m.field_class=\'author\');' +\qecho '\\o subject' +\qecho 'select value from metabib.subject_field_entry;' +\qecho 'select h.value' +\qecho ' from authority.simple_heading h' +\qecho ' join authority.control_set_auth_field_metabib_field_map_refs a on (a.authority_field = h.atag)' +\qecho ' join config.metabib_field m on (a.metabib_field=m.id and m.field_class=\'subject\');' +\qecho '\\o series' +\qecho 'select value from metabib.series_field_entry;' +\qecho '\\o identifier' +\qecho 'select value from metabib.identifier_field_entry;' +\qecho '\\o keyword' +\qecho 'select value from metabib.keyword_field_entry;' +\qecho '' +\qecho '\\o' +\qecho '\\a' +\qecho '\\t' +\qecho '' +\qecho '// Then, at the command line:' +\qecho '' +\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl title > title.sql' +\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl author > author.sql' +\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl subject > subject.sql' +\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl series > series.sql' +\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl identifier > identifier.sql' +\qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl keyword > keyword.sql' +\qecho '' +\qecho '// And, back in psql' +\qecho '' +\qecho 'ALTER TABLE search.symspell_dictionary SET UNLOGGED;' +\qecho 'TRUNCATE search.symspell_dictionary;' +\qecho '' +\qecho '\\i identifier.sql' +\qecho '\\i author.sql' +\qecho '\\i title.sql' +\qecho '\\i subject.sql' +\qecho '\\i series.sql' +\qecho '\\i keyword.sql' +\qecho '' +\qecho 'CLUSTER search.symspell_dictionary USING symspell_dictionary_pkey;' +\qecho 'REINDEX TABLE search.symspell_dictionary;' +\qecho 'ALTER TABLE search.symspell_dictionary SET LOGGED;' +\qecho 'VACUUM ANALYZE search.symspell_dictionary;' +\qecho '' +\qecho 'DROP TABLE search.symspell_dictionary_partial_title;' +\qecho 'DROP TABLE search.symspell_dictionary_partial_author;' +\qecho 'DROP TABLE search.symspell_dictionary_partial_subject;' +\qecho 'DROP TABLE search.symspell_dictionary_partial_series;' +\qecho 'DROP TABLE search.symspell_dictionary_partial_identifier;' +\qecho 'DROP TABLE search.symspell_dictionary_partial_keyword;' /* - \a \t @@ -166,27 +243,22 @@ select h.value from authority.simple_heading h join authority.control_set_auth_field_metabib_field_map_refs a on (a.authority_field = h.atag) join config.metabib_field m on (a.metabib_field=m.id and m.field_class='title'); - \o author select value from metabib.author_field_entry; select h.value from authority.simple_heading h join authority.control_set_auth_field_metabib_field_map_refs a on (a.authority_field = h.atag) join config.metabib_field m on (a.metabib_field=m.id and m.field_class='author'); - \o subject select value from metabib.subject_field_entry; select h.value from authority.simple_heading h join authority.control_set_auth_field_metabib_field_map_refs a on (a.authority_field = h.atag) join config.metabib_field m on (a.metabib_field=m.id and m.field_class='subject'); - \o series select value from metabib.series_field_entry; - \o identifier select value from metabib.identifier_field_entry; - \o keyword select value from metabib.keyword_field_entry; @@ -194,5 +266,36 @@ select value from metabib.keyword_field_entry; \a \t -*/ +// Then, at the command line: +$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl title > title.sql +$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl author > author.sql +$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl subject > subject.sql +$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl series > series.sql +$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl identifier > identifier.sql +$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl keyword > keyword.sql + +// And, back in psql + +ALTER TABLE search.symspell_dictionary SET UNLOGGED; +TRUNCATE search.symspell_dictionary; + +\i identifier.sql +\i author.sql +\i title.sql +\i subject.sql +\i series.sql +\i keyword.sql + +CLUSTER search.symspell_dictionary USING symspell_dictionary_pkey; +REINDEX TABLE search.symspell_dictionary; +ALTER TABLE search.symspell_dictionary SET LOGGED; +VACUUM ANALYZE search.symspell_dictionary; + +DROP TABLE search.symspell_dictionary_partial_title; +DROP TABLE search.symspell_dictionary_partial_author; +DROP TABLE search.symspell_dictionary_partial_subject; +DROP TABLE search.symspell_dictionary_partial_series; +DROP TABLE search.symspell_dictionary_partial_identifier; +DROP TABLE search.symspell_dictionary_partial_keyword; +*/ diff --git a/docs/RELEASE_NOTES_NEXT/OPAC/DidYouMean_multi-term.adoc b/docs/RELEASE_NOTES_NEXT/OPAC/DidYouMean_multi-term.adoc index bbf5000..fcda1a5 100644 --- a/docs/RELEASE_NOTES_NEXT/OPAC/DidYouMean_multi-term.adoc +++ b/docs/RELEASE_NOTES_NEXT/OPAC/DidYouMean_multi-term.adoc @@ -36,4 +36,11 @@ whereas unquoted input (or the portion that is not quoted) does not. * soundex_weight Weight of the soundex similarity metric; 0 avoids calculation costs * keyboard_distance_weight Weight of the keyboard distance similarity metric; 0 avoids calculation costs +=== Upgrade === +If the databse has authority records that are linked to bilbiographic +records, a reingest of the search suggestion dictionary is recommended. + +Instructions for performing that reingest are included in the database +update scripts and will be output to the log when those scripts are +run. -- 1.7.2.5