LP#1997485: (follow-up) flesh out reingest intructions
[evergreen-equinox.git] / Open-ILS / src / sql / Pg / upgrade / YYYY.schema.DYM-authority-data-dictionary.sql
1 BEGIN;
2
3 -- Bring authorized headings into the symspell dictionary. Side
4 -- loader should be used for Real Sites.  See below the COMMIT.
5 /*
6 SELECT  search.symspell_build_and_merge_entries(h.value, m.field_class, NULL)
7   FROM  authority.simple_heading h
8         JOIN authority.control_set_auth_field_metabib_field_map_refs a ON (a.authority_field = h.atag)
9         JOIN config.metabib_field m ON (a.metabib_field=m.id);
10 */
11
12 -- ensure that this function is in place; it hitherto had not been
13 -- present in baseline
14
15 CREATE OR REPLACE FUNCTION search.symspell_build_and_merge_entries (
16     full_input      TEXT,
17     source_class    TEXT,
18     old_input       TEXT DEFAULT NULL,
19     include_phrases BOOL DEFAULT FALSE
20 ) RETURNS SETOF search.symspell_dictionary AS $F$
21 DECLARE
22     new_entry       RECORD;
23     conflict_entry  RECORD;
24 BEGIN
25
26     IF full_input = old_input THEN -- neither NULL, and are the same
27         RETURN;
28     END IF;
29
30     FOR new_entry IN EXECUTE $q$
31         SELECT  count,
32                 prefix_key,
33                 s AS suggestions
34           FROM  (SELECT prefix_key,
35                         ARRAY_AGG(DISTINCT $q$ || source_class || $q$_suggestions[1]) s,
36                         SUM($q$ || source_class || $q$_count) count
37                   FROM  search.symspell_build_entries($1, $2, $3, $4)
38                   GROUP BY 1) x
39         $q$ USING full_input, source_class, old_input, include_phrases
40     LOOP
41         EXECUTE $q$
42             SELECT  prefix_key,
43                     $q$ || source_class || $q$_suggestions suggestions,
44                     $q$ || source_class || $q$_count count
45               FROM  search.symspell_dictionary
46               WHERE prefix_key = $1 $q$
47             INTO conflict_entry
48             USING new_entry.prefix_key;
49
50         IF new_entry.count <> 0 THEN -- Real word, and count changed
51             IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating
52                 IF conflict_entry.count > 0 THEN -- it's a real word
53                     RETURN QUERY EXECUTE $q$
54                         UPDATE  search.symspell_dictionary
55                            SET  $q$ || source_class || $q$_count = $2
56                           WHERE prefix_key = $1
57                           RETURNING * $q$
58                         USING new_entry.prefix_key, GREATEST(0, new_entry.count + conflict_entry.count);
59                 ELSE -- it was a prefix key or delete-emptied word before
60                     IF conflict_entry.suggestions @> new_entry.suggestions THEN -- already have all suggestions here...
61                         RETURN QUERY EXECUTE $q$
62                             UPDATE  search.symspell_dictionary
63                                SET  $q$ || source_class || $q$_count = $2
64                               WHERE prefix_key = $1
65                               RETURNING * $q$
66                             USING new_entry.prefix_key, GREATEST(0, new_entry.count);
67                     ELSE -- new suggestion!
68                         RETURN QUERY EXECUTE $q$
69                             UPDATE  search.symspell_dictionary
70                                SET  $q$ || source_class || $q$_count = $2,
71                                     $q$ || source_class || $q$_suggestions = $3
72                               WHERE prefix_key = $1
73                               RETURNING * $q$
74                             USING new_entry.prefix_key, GREATEST(0, new_entry.count), evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions);
75                     END IF;
76                 END IF;
77             ELSE
78                 -- We keep the on-conflict clause just in case...
79                 RETURN QUERY EXECUTE $q$
80                     INSERT INTO search.symspell_dictionary AS d (
81                         $q$ || source_class || $q$_count,
82                         prefix_key,
83                         $q$ || source_class || $q$_suggestions
84                     ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO
85                         UPDATE SET  $q$ || source_class || $q$_count = d.$q$ || source_class || $q$_count + EXCLUDED.$q$ || source_class || $q$_count,
86                                     $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions)
87                         RETURNING * $q$
88                     USING new_entry.count, new_entry.prefix_key, new_entry.suggestions;
89             END IF;
90         ELSE -- key only, or no change
91             IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating
92                 IF NOT conflict_entry.suggestions @> new_entry.suggestions THEN -- There are new suggestions
93                     RETURN QUERY EXECUTE $q$
94                         UPDATE  search.symspell_dictionary
95                            SET  $q$ || source_class || $q$_suggestions = $2
96                           WHERE prefix_key = $1
97                           RETURNING * $q$
98                         USING new_entry.prefix_key, evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions);
99                 END IF;
100             ELSE
101                 RETURN QUERY EXECUTE $q$
102                     INSERT INTO search.symspell_dictionary AS d (
103                         $q$ || source_class || $q$_count,
104                         prefix_key,
105                         $q$ || source_class || $q$_suggestions
106                     ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO -- key exists, suggestions may be added due to this entry
107                         UPDATE SET  $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions)
108                     RETURNING * $q$
109                     USING new_entry.count, new_entry.prefix_key, new_entry.suggestions;
110             END IF;
111         END IF;
112     END LOOP;
113 END;
114 $F$ LANGUAGE PLPGSQL;
115
116 CREATE OR REPLACE FUNCTION search.symspell_maintain_entries () RETURNS TRIGGER AS $f$
117 DECLARE
118     search_class    TEXT;
119     new_value       TEXT := NULL;
120     old_value       TEXT := NULL;
121 BEGIN
122
123     IF TG_TABLE_SCHEMA = 'authority' THEN
124         SELECT  m.field_class INTO search_class
125           FROM  authority.control_set_auth_field_metabib_field_map_refs a
126                 JOIN config.metabib_field m ON (a.metabib_field=m.id)
127           WHERE a.authority_field = NEW.atag;
128
129         IF NOT FOUND THEN
130             RETURN NULL;
131         END IF;
132     ELSE
133         search_class := COALESCE(TG_ARGV[0], SPLIT_PART(TG_TABLE_NAME,'_',1));
134     END IF;
135
136     IF TG_OP IN ('INSERT', 'UPDATE') THEN
137         new_value := NEW.value;
138     END IF;
139
140     IF TG_OP IN ('DELETE', 'UPDATE') THEN
141         old_value := OLD.value;
142     END IF;
143
144     PERFORM * FROM search.symspell_build_and_merge_entries(new_value, search_class, old_value);
145
146     RETURN NULL; -- always fired AFTER
147 END;
148 $f$ LANGUAGE PLPGSQL;
149
150 CREATE TRIGGER maintain_symspell_entries_tgr
151     AFTER INSERT OR UPDATE OR DELETE ON authority.simple_heading
152     FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
153
154 COMMIT;
155
156 \qecho ''
157 \qecho 'If the Evergreen database has authority records, a reingest of'
158 \qecho 'the search suggestion dictionary is recommended.'
159 \qecho ''
160 \qecho 'The following should be run at the end of the upgrade before any'
161 \qecho 'reingest occurs.  Because new triggers are installed already,'
162 \qecho 'updates to indexed strings will cause zero-count dictionary entries'
163 \qecho 'to be recorded which will require updating every row again (or'
164 \qecho 'starting from scratch) so best to do this before other batch'
165 \qecho 'changes.  A later reingest that does not significantly change'
166 \qecho 'indexed strings will /not/ cause table bloat here, and will be'
167 \qecho 'as fast as normal.  A copy of the SQL in a ready-to-use, non-escaped'
168 \qecho 'form is available inside a comment at the end of this upgrade sub-'
169 \qecho 'script so you do not need to copy this comment from the psql ouptut.'
170 \qecho ''
171 \qecho '\\a'
172 \qecho '\\t'
173 \qecho ''
174 \qecho '\\o title'
175 \qecho 'select value from metabib.title_field_entry;'
176 \qecho 'select  h.value'
177 \qecho '  from  authority.simple_heading h'
178 \qecho '        join authority.control_set_auth_field_metabib_field_map_refs a on (a.authority_field = h.atag)'
179 \qecho '        join config.metabib_field m on (a.metabib_field=m.id and m.field_class=\'title\');'
180 \qecho '\\o author'
181 \qecho 'select value from metabib.author_field_entry;'
182 \qecho 'select  h.value'
183 \qecho '  from  authority.simple_heading h'
184 \qecho '        join authority.control_set_auth_field_metabib_field_map_refs a on (a.authority_field = h.atag)'
185 \qecho '        join config.metabib_field m on (a.metabib_field=m.id and m.field_class=\'author\');'
186 \qecho '\\o subject'
187 \qecho 'select value from metabib.subject_field_entry;'
188 \qecho 'select  h.value'
189 \qecho '  from  authority.simple_heading h'
190 \qecho '        join authority.control_set_auth_field_metabib_field_map_refs a on (a.authority_field = h.atag)'
191 \qecho '        join config.metabib_field m on (a.metabib_field=m.id and m.field_class=\'subject\');'
192 \qecho '\\o series'
193 \qecho 'select value from metabib.series_field_entry;'
194 \qecho '\\o identifier'
195 \qecho 'select value from metabib.identifier_field_entry;'
196 \qecho '\\o keyword'
197 \qecho 'select value from metabib.keyword_field_entry;'
198 \qecho ''
199 \qecho '\\o'
200 \qecho '\\a'
201 \qecho '\\t'
202 \qecho ''
203 \qecho '// Then, at the command line:'
204 \qecho ''
205 \qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl title > title.sql'
206 \qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl author > author.sql'
207 \qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl subject > subject.sql'
208 \qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl series > series.sql'
209 \qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl identifier > identifier.sql'
210 \qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl keyword > keyword.sql'
211 \qecho ''
212 \qecho '// And, back in psql'
213 \qecho ''
214 \qecho 'ALTER TABLE search.symspell_dictionary SET UNLOGGED;'
215 \qecho 'TRUNCATE search.symspell_dictionary;'
216 \qecho ''
217 \qecho '\\i identifier.sql'
218 \qecho '\\i author.sql'
219 \qecho '\\i title.sql'
220 \qecho '\\i subject.sql'
221 \qecho '\\i series.sql'
222 \qecho '\\i keyword.sql'
223 \qecho ''
224 \qecho 'CLUSTER search.symspell_dictionary USING symspell_dictionary_pkey;'
225 \qecho 'REINDEX TABLE search.symspell_dictionary;'
226 \qecho 'ALTER TABLE search.symspell_dictionary SET LOGGED;'
227 \qecho 'VACUUM ANALYZE search.symspell_dictionary;'
228 \qecho ''
229 \qecho 'DROP TABLE search.symspell_dictionary_partial_title;'
230 \qecho 'DROP TABLE search.symspell_dictionary_partial_author;'
231 \qecho 'DROP TABLE search.symspell_dictionary_partial_subject;'
232 \qecho 'DROP TABLE search.symspell_dictionary_partial_series;'
233 \qecho 'DROP TABLE search.symspell_dictionary_partial_identifier;'
234 \qecho 'DROP TABLE search.symspell_dictionary_partial_keyword;'
235
236 /*
237 \a
238 \t
239
240 \o title
241 select value from metabib.title_field_entry;
242 select  h.value
243   from  authority.simple_heading h
244         join authority.control_set_auth_field_metabib_field_map_refs a on (a.authority_field = h.atag)
245         join config.metabib_field m on (a.metabib_field=m.id and m.field_class='title');
246 \o author
247 select value from metabib.author_field_entry;
248 select  h.value
249   from  authority.simple_heading h
250         join authority.control_set_auth_field_metabib_field_map_refs a on (a.authority_field = h.atag)
251         join config.metabib_field m on (a.metabib_field=m.id and m.field_class='author');
252 \o subject
253 select value from metabib.subject_field_entry;
254 select  h.value
255   from  authority.simple_heading h
256         join authority.control_set_auth_field_metabib_field_map_refs a on (a.authority_field = h.atag)
257         join config.metabib_field m on (a.metabib_field=m.id and m.field_class='subject');
258 \o series
259 select value from metabib.series_field_entry;
260 \o identifier
261 select value from metabib.identifier_field_entry;
262 \o keyword
263 select value from metabib.keyword_field_entry;
264
265 \o
266 \a
267 \t
268
269 // Then, at the command line:
270
271 $ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl title > title.sql
272 $ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl author > author.sql
273 $ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl subject > subject.sql
274 $ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl series > series.sql
275 $ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl identifier > identifier.sql
276 $ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl keyword > keyword.sql
277
278 // And, back in psql
279
280 ALTER TABLE search.symspell_dictionary SET UNLOGGED;
281 TRUNCATE search.symspell_dictionary;
282
283 \i identifier.sql
284 \i author.sql
285 \i title.sql
286 \i subject.sql
287 \i series.sql
288 \i keyword.sql
289
290 CLUSTER search.symspell_dictionary USING symspell_dictionary_pkey;
291 REINDEX TABLE search.symspell_dictionary;
292 ALTER TABLE search.symspell_dictionary SET LOGGED;
293 VACUUM ANALYZE search.symspell_dictionary;
294
295 DROP TABLE search.symspell_dictionary_partial_title;
296 DROP TABLE search.symspell_dictionary_partial_author;
297 DROP TABLE search.symspell_dictionary_partial_subject;
298 DROP TABLE search.symspell_dictionary_partial_series;
299 DROP TABLE search.symspell_dictionary_partial_identifier;
300 DROP TABLE search.symspell_dictionary_partial_keyword;
301 */