2 * Copyright (C) 2007-2010 Equinox Software, Inc.
3 * Mike Rylander <miker@esilibrary.com>
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
18 DROP SCHEMA IF EXISTS search CASCADE;
24 CREATE OR REPLACE FUNCTION evergreen.pg_statistics (tab TEXT, col TEXT) RETURNS TABLE(element TEXT, frequency INT) AS $$
26 -- This query will die on PG < 9.2, but the function can be created. We just won't use it where we can't.
30 FROM (SELECT ROW_NUMBER() OVER (),
32 FROM (SELECT UNNEST(most_common_elem_freqs) AS f
38 JOIN (SELECT ROW_NUMBER() OVER (),
40 FROM (SELECT UNNEST(most_common_elems::text::text[]) AS e
45 ) AS elems USING (row_number);
49 CREATE OR REPLACE FUNCTION evergreen.query_int_wrapper (INT[],TEXT) RETURNS BOOL AS $$
51 RETURN $1 @@ $2::query_int;
53 $$ LANGUAGE PLPGSQL STABLE;
55 CREATE TABLE search.relevance_adjustment (
56 id SERIAL PRIMARY KEY,
57 active BOOL NOT NULL DEFAULT TRUE,
58 field INT NOT NULL REFERENCES config.metabib_field (id) DEFERRABLE INITIALLY DEFERRED,
59 bump_type TEXT NOT NULL CHECK (bump_type IN ('word_order','first_word','full_match')),
60 multiplier NUMERIC NOT NULL DEFAULT 1.0
62 CREATE UNIQUE INDEX bump_once_per_field_idx ON search.relevance_adjustment ( field, bump_type );
64 CREATE OR REPLACE FUNCTION search.facets_for_record_set(ignore_facet_classes text[], hits bigint[]) RETURNS TABLE(id integer, value text, count bigint)
66 SELECT id, value, count
68 SELECT mfae.field AS id,
70 COUNT(DISTINCT mfae.source),
72 PARTITION BY mfae.field ORDER BY COUNT(DISTINCT mfae.source) DESC
74 FROM metabib.facet_entry mfae
75 JOIN config.metabib_field cmf ON (cmf.id = mfae.field)
76 WHERE mfae.source = ANY ($2)
78 AND cmf.field_class NOT IN (SELECT * FROM unnest($1))
83 (SELECT value::INT FROM config.global_flag WHERE name = 'search.max_facets_per_field' AND enabled),
89 CREATE OR REPLACE FUNCTION search.facets_for_metarecord_set(ignore_facet_classes TEXT[], hits BIGINT[]) RETURNS TABLE (id INT, value TEXT, count BIGINT) AS $$
90 SELECT id, value, count FROM (
91 SELECT mfae.field AS id,
93 COUNT(DISTINCT mmrsm.metarecord),
95 PARTITION BY mfae.field ORDER BY COUNT(distinct mmrsm.metarecord) DESC
97 FROM metabib.facet_entry mfae
98 JOIN metabib.metarecord_source_map mmrsm ON (mfae.source = mmrsm.source)
99 JOIN config.metabib_field cmf ON (cmf.id = mfae.field)
100 WHERE mmrsm.metarecord IN (SELECT * FROM unnest($2))
102 AND cmf.field_class NOT IN (SELECT * FROM unnest($1))
105 WHERE rownum <= (SELECT COALESCE((SELECT value::INT FROM config.global_flag WHERE name = 'search.max_facets_per_field' AND enabled), 1000));
108 CREATE OR REPLACE FUNCTION search.calculate_visibility_attribute ( value INT, attr TEXT ) RETURNS INT AS $f$
111 WHEN 'luri_org' THEN 0 -- "b" attr
112 WHEN 'bib_source' THEN 1 -- "b" attr
114 WHEN 'copy_flags' THEN 0 -- "c" attr
115 WHEN 'owning_lib' THEN 1 -- "c" attr
116 WHEN 'circ_lib' THEN 2 -- "c" attr
117 WHEN 'status' THEN 3 -- "c" attr
118 WHEN 'location' THEN 4 -- "c" attr
119 WHEN 'location_group' THEN 5 -- "c" attr
123 /* copy_flags bit positions, LSB-first:
125 0: asset.copy.opac_visible
128 When adding flags, you must update asset.all_visible_flags()
130 Because bib and copy values are stored separately, we can reuse
131 shifts, saving us some space. We could probably take back a bit
132 too, but I'm not sure its worth squeezing that last one out. We'd
133 be left with just 2 slots for copy attrs, rather than 10.
136 $f$ LANGUAGE SQL IMMUTABLE;
138 CREATE OR REPLACE FUNCTION search.calculate_visibility_attribute_list ( attr TEXT, value INT[] ) RETURNS INT[] AS $f$
139 SELECT ARRAY_AGG(search.calculate_visibility_attribute(x, $1)) FROM UNNEST($2) AS X;
140 $f$ LANGUAGE SQL IMMUTABLE;
142 CREATE OR REPLACE FUNCTION search.calculate_visibility_attribute_test ( attr TEXT, value INT[], negate BOOL DEFAULT FALSE ) RETURNS TEXT AS $f$
143 SELECT CASE WHEN $3 THEN '!' ELSE '' END || '(' || ARRAY_TO_STRING(search.calculate_visibility_attribute_list($1,$2),'|') || ')';
144 $f$ LANGUAGE SQL IMMUTABLE;
146 CREATE OR REPLACE FUNCTION asset.calculate_copy_visibility_attribute_set ( copy_id BIGINT ) RETURNS INT[] AS $f$
148 copy_row asset.copy%ROWTYPE;
149 lgroup_map asset.copy_location_group_map%ROWTYPE;
150 attr_set INT[] := '{}'::INT[];
152 SELECT * INTO copy_row FROM asset.copy WHERE id = copy_id;
154 attr_set := attr_set || search.calculate_visibility_attribute(copy_row.opac_visible::INT, 'copy_flags');
155 attr_set := attr_set || search.calculate_visibility_attribute(copy_row.circ_lib, 'circ_lib');
156 attr_set := attr_set || search.calculate_visibility_attribute(copy_row.status, 'status');
157 attr_set := attr_set || search.calculate_visibility_attribute(copy_row.location, 'location');
161 search.calculate_visibility_attribute(owning_lib, 'owning_lib')
163 FROM asset.call_number
164 WHERE id = copy_row.call_number;
166 FOR lgroup_map IN SELECT * FROM asset.copy_location_group_map WHERE location = copy_row.location LOOP
167 attr_set := attr_set || search.calculate_visibility_attribute(lgroup_map.lgroup, 'location_group');
172 $f$ LANGUAGE PLPGSQL;
174 CREATE OR REPLACE FUNCTION biblio.calculate_bib_visibility_attribute_set ( bib_id BIGINT, new_source INT DEFAULT NULL, force_source BOOL DEFAULT FALSE ) RETURNS INT[] AS $f$
176 bib_row biblio.record_entry%ROWTYPE;
177 cn_row asset.call_number%ROWTYPE;
178 attr_set INT[] := '{}'::INT[];
180 SELECT * INTO bib_row FROM biblio.record_entry WHERE id = bib_id;
183 IF new_source IS NOT NULL THEN
184 attr_set := attr_set || search.calculate_visibility_attribute(new_source, 'bib_source');
186 ELSIF bib_row.source IS NOT NULL THEN
187 attr_set := attr_set || search.calculate_visibility_attribute(bib_row.source, 'bib_source');
192 FROM asset.call_number
193 WHERE record = bib_id
194 AND label = '##URI##'
197 attr_set := attr_set || search.calculate_visibility_attribute(cn_row.owning_lib, 'luri_org');
202 $f$ LANGUAGE PLPGSQL;
204 CREATE OR REPLACE FUNCTION asset.cache_copy_visibility () RETURNS TRIGGER as $func$
206 ocn asset.call_number%ROWTYPE;
207 ncn asset.call_number%ROWTYPE;
212 SELECT enabled = FALSE INTO dobib FROM config.internal_flag WHERE name = 'ingest.reingest.force_on_same_marc';
214 IF TG_TABLE_NAME = 'peer_bib_copy_map' THEN -- Only needs ON INSERT OR DELETE, so handle separately
215 IF TG_OP = 'INSERT' THEN
216 INSERT INTO asset.copy_vis_attr_cache (record, target_copy, vis_attr_vector) VALUES (
219 asset.calculate_copy_visibility_attribute_set(NEW.target_copy)
223 ELSIF TG_OP = 'DELETE' THEN
224 DELETE FROM asset.copy_vis_attr_cache
225 WHERE record = OLD.peer_record AND target_copy = OLD.target_copy;
231 IF TG_OP = 'INSERT' THEN -- Handles ON INSERT. ON UPDATE is below.
232 IF TG_TABLE_NAME IN ('copy', 'unit') THEN
233 SELECT * INTO ncn FROM asset.call_number cn WHERE id = NEW.call_number;
234 INSERT INTO asset.copy_vis_attr_cache (record, target_copy, vis_attr_vector) VALUES (
237 asset.calculate_copy_visibility_attribute_set(NEW.id)
239 ELSIF TG_TABLE_NAME = 'record_entry' THEN
240 NEW.vis_attr_vector := biblio.calculate_bib_visibility_attribute_set(NEW.id, NEW.source, TRUE);
241 ELSIF TG_TABLE_NAME = 'call_number' AND NEW.label = '##URI##' AND dobib THEN -- New located URI
242 UPDATE biblio.record_entry
243 SET vis_attr_vector = biblio.calculate_bib_visibility_attribute_set(NEW.record)
244 WHERE id = NEW.record;
251 -- handle items first, since with circulation activity
252 -- their statuses change frequently
253 IF TG_TABLE_NAME IN ('copy', 'unit') THEN -- This handles ON UPDATE OR DELETE. ON INSERT above
255 IF TG_OP = 'DELETE' THEN -- Shouldn't get here, normally
256 DELETE FROM asset.copy_vis_attr_cache WHERE target_copy = OLD.id;
260 SELECT * INTO ncn FROM asset.call_number cn WHERE id = NEW.call_number;
262 IF OLD.deleted <> NEW.deleted THEN
264 DELETE FROM asset.copy_vis_attr_cache WHERE target_copy = OLD.id;
266 INSERT INTO asset.copy_vis_attr_cache (record, target_copy, vis_attr_vector) VALUES (
269 asset.calculate_copy_visibility_attribute_set(NEW.id)
274 ELSIF OLD.location <> NEW.location OR
275 OLD.status <> NEW.status OR
276 OLD.opac_visible <> NEW.opac_visible OR
277 OLD.circ_lib <> NEW.circ_lib OR
278 OLD.call_number <> NEW.call_number
280 IF OLD.call_number <> NEW.call_number THEN -- Special check since it's more expensive than the next branch
281 SELECT * INTO ocn FROM asset.call_number cn WHERE id = OLD.call_number;
283 IF ncn.record <> ocn.record THEN
284 -- We have to use a record-specific WHERE clause
285 -- to avoid modifying the entries for peer-bib copies.
286 UPDATE asset.copy_vis_attr_cache
287 SET target_copy = NEW.id,
289 WHERE target_copy = OLD.id
290 AND record = ocn.record;
294 -- Any of these could change visibility, but
295 -- we'll save some queries and not try to calculate
296 -- the change directly. We want to update peer-bib
297 -- entries in this case, unlike above.
298 UPDATE asset.copy_vis_attr_cache
299 SET target_copy = NEW.id,
300 vis_attr_vector = asset.calculate_copy_visibility_attribute_set(NEW.id)
301 WHERE target_copy = OLD.id;
305 ELSIF TG_TABLE_NAME = 'call_number' THEN
307 IF TG_OP = 'DELETE' AND OLD.label = '##URI##' AND dobib THEN -- really deleted located URI, if the delete protection rule is disabled...
308 UPDATE biblio.record_entry
309 SET vis_attr_vector = biblio.calculate_bib_visibility_attribute_set(OLD.record)
310 WHERE id = OLD.record;
314 IF OLD.label = '##URI##' AND dobib THEN -- Located URI
315 IF OLD.deleted <> NEW.deleted OR OLD.record <> NEW.record OR OLD.owning_lib <> NEW.owning_lib THEN
316 UPDATE biblio.record_entry
317 SET vis_attr_vector = biblio.calculate_bib_visibility_attribute_set(NEW.record)
318 WHERE id = NEW.record;
320 IF OLD.record <> NEW.record THEN -- maybe on merge?
321 UPDATE biblio.record_entry
322 SET vis_attr_vector = biblio.calculate_bib_visibility_attribute_set(OLD.record)
323 WHERE id = OLD.record;
327 ELSIF OLD.record <> NEW.record OR OLD.owning_lib <> NEW.owning_lib THEN
328 UPDATE asset.copy_vis_attr_cache
329 SET record = NEW.record,
330 vis_attr_vector = asset.calculate_copy_visibility_attribute_set(target_copy)
331 WHERE target_copy IN (SELECT id FROM asset.copy WHERE call_number = NEW.id)
332 AND record = OLD.record;
336 ELSIF TG_TABLE_NAME = 'record_entry' AND OLD.source IS DISTINCT FROM NEW.source THEN -- Only handles ON UPDATE, INSERT above
337 NEW.vis_attr_vector := biblio.calculate_bib_visibility_attribute_set(NEW.id, NEW.source, TRUE);
342 $func$ LANGUAGE PLPGSQL;
344 CREATE TRIGGER z_opac_vis_mat_view_tgr BEFORE INSERT OR UPDATE ON biblio.record_entry FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
345 CREATE TRIGGER z_opac_vis_mat_view_tgr AFTER INSERT OR DELETE ON biblio.peer_bib_copy_map FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
346 CREATE TRIGGER z_opac_vis_mat_view_tgr AFTER INSERT OR UPDATE OR DELETE ON asset.call_number FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
347 CREATE TRIGGER z_opac_vis_mat_view_del_tgr BEFORE DELETE ON asset.copy FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
348 CREATE TRIGGER z_opac_vis_mat_view_del_tgr BEFORE DELETE ON serial.unit FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
349 CREATE TRIGGER z_opac_vis_mat_view_tgr AFTER INSERT OR UPDATE ON asset.copy FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
350 CREATE TRIGGER z_opac_vis_mat_view_tgr AFTER INSERT OR UPDATE ON serial.unit FOR EACH ROW EXECUTE PROCEDURE asset.cache_copy_visibility();
352 CREATE OR REPLACE FUNCTION asset.all_visible_flags () RETURNS TEXT AS $f$
353 SELECT '(' || STRING_AGG(search.calculate_visibility_attribute(1 << x, 'copy_flags')::TEXT,'&') || ')'
354 FROM GENERATE_SERIES(0,0) AS x; -- increment as new flags are added.
355 $f$ LANGUAGE SQL STABLE;
357 CREATE OR REPLACE FUNCTION asset.visible_orgs (otype TEXT) RETURNS TEXT AS $f$
358 SELECT '(' || STRING_AGG(search.calculate_visibility_attribute(id, $1)::TEXT,'|') || ')'
361 $f$ LANGUAGE SQL STABLE;
363 CREATE OR REPLACE FUNCTION asset.invisible_orgs (otype TEXT) RETURNS TEXT AS $f$
364 SELECT '!(' || STRING_AGG(search.calculate_visibility_attribute(id, $1)::TEXT,'|') || ')'
366 WHERE NOT opac_visible;
367 $f$ LANGUAGE SQL STABLE;
369 -- Bib-oriented defaults for search
370 CREATE OR REPLACE FUNCTION asset.bib_source_default () RETURNS TEXT AS $f$
371 SELECT '(' || STRING_AGG(search.calculate_visibility_attribute(id, 'bib_source')::TEXT,'|') || ')'
372 FROM config.bib_source
374 $f$ LANGUAGE SQL IMMUTABLE;
376 CREATE OR REPLACE FUNCTION asset.luri_org_default () RETURNS TEXT AS $f$
377 SELECT * FROM asset.invisible_orgs('luri_org');
378 $f$ LANGUAGE SQL STABLE;
380 -- Copy-oriented defaults for search
381 CREATE OR REPLACE FUNCTION asset.location_group_default () RETURNS TEXT AS $f$
382 SELECT '!()'::TEXT; -- For now, as there's no way to cause a location group to hide all copies.
384 SELECT '!(' || STRING_AGG(search.calculate_visibility_attribute(id, 'location_group')::TEXT,'|') || ')'
385 FROM asset.copy_location_group
386 WHERE NOT opac_visible;
388 $f$ LANGUAGE SQL IMMUTABLE;
390 CREATE OR REPLACE FUNCTION asset.location_default () RETURNS TEXT AS $f$
391 SELECT '!(' || STRING_AGG(search.calculate_visibility_attribute(id, 'location')::TEXT,'|') || ')'
392 FROM asset.copy_location
393 WHERE NOT opac_visible;
394 $f$ LANGUAGE SQL STABLE;
396 CREATE OR REPLACE FUNCTION asset.status_default () RETURNS TEXT AS $f$
397 SELECT '!(' || STRING_AGG(search.calculate_visibility_attribute(id, 'status')::TEXT,'|') || ')'
398 FROM config.copy_status
399 WHERE NOT opac_visible;
400 $f$ LANGUAGE SQL STABLE;
402 CREATE OR REPLACE FUNCTION asset.owning_lib_default () RETURNS TEXT AS $f$
403 SELECT * FROM asset.invisible_orgs('owning_lib');
404 $f$ LANGUAGE SQL STABLE;
406 CREATE OR REPLACE FUNCTION asset.circ_lib_default () RETURNS TEXT AS $f$
407 SELECT * FROM asset.invisible_orgs('circ_lib');
408 $f$ LANGUAGE SQL STABLE;
410 CREATE OR REPLACE FUNCTION asset.patron_default_visibility_mask () RETURNS TABLE (b_attrs TEXT, c_attrs TEXT) AS $f$
412 copy_flags TEXT; -- "c" attr
414 owning_lib TEXT; -- "c" attr
415 circ_lib TEXT; -- "c" attr
416 status TEXT; -- "c" attr
417 location TEXT; -- "c" attr
418 location_group TEXT; -- "c" attr
420 luri_org TEXT; -- "b" attr
421 bib_sources TEXT; -- "b" attr
423 bib_tests TEXT := '';
425 copy_flags := asset.all_visible_flags(); -- Will always have at least one
427 owning_lib := NULLIF(asset.owning_lib_default(),'!()');
429 circ_lib := NULLIF(asset.circ_lib_default(),'!()');
430 status := NULLIF(asset.status_default(),'!()');
431 location := NULLIF(asset.location_default(),'!()');
432 location_group := NULLIF(asset.location_group_default(),'!()');
434 -- LURIs will be handled at the perl layer directly
435 -- luri_org := NULLIF(asset.luri_org_default(),'!()');
436 bib_sources := NULLIF(asset.bib_source_default(),'()');
439 IF luri_org IS NOT NULL AND bib_sources IS NOT NULL THEN
440 bib_tests := '('||ARRAY_TO_STRING( ARRAY[luri_org,bib_sources], '|')||')&('||luri_org||')&';
441 ELSIF luri_org IS NOT NULL THEN
442 bib_tests := luri_org || '&';
443 ELSIF bib_sources IS NOT NULL THEN
444 bib_tests := bib_sources || '|';
447 RETURN QUERY SELECT bib_tests,
448 '('||ARRAY_TO_STRING(
449 ARRAY[copy_flags,owning_lib,circ_lib,status,location,location_group]::TEXT[],
453 $f$ LANGUAGE PLPGSQL STABLE ROWS 1;
455 CREATE OR REPLACE FUNCTION metabib.suggest_browse_entries(raw_query_text text, search_class text, headline_opts text, visibility_org integer, query_limit integer, normalization integer)
456 RETURNS TABLE(value text, field integer, buoyant_and_class_match boolean, field_match boolean, field_weight integer, rank real, buoyant boolean, match text)
459 prepared_query_texts TEXT[];
462 opac_visibility_join TEXT;
463 search_class_join TEXT;
467 prepared_query_texts := metabib.autosuggest_prepare_tsquery(raw_query_text);
469 query := TO_TSQUERY('keyword', prepared_query_texts[1]);
470 plain_query := TO_TSQUERY('keyword', prepared_query_texts[2]);
472 visibility_org := NULLIF(visibility_org,-1);
473 IF visibility_org IS NOT NULL THEN
474 PERFORM FROM actor.org_unit WHERE id = visibility_org AND parent_ou IS NULL;
476 opac_visibility_join := '';
478 PERFORM 1 FROM config.internal_flag WHERE enabled AND name = 'opac.located_uri.act_as_copy';
480 b_tests := search.calculate_visibility_attribute_test(
482 (SELECT ARRAY_AGG(id) FROM actor.org_unit_full_path(visibility_org))
485 b_tests := search.calculate_visibility_attribute_test(
487 (SELECT ARRAY_AGG(id) FROM actor.org_unit_ancestors(visibility_org))
490 opac_visibility_join := '
491 LEFT JOIN asset.copy_vis_attr_cache acvac ON (acvac.record = x.source)
492 LEFT JOIN biblio.record_entry b ON (b.id = x.source)
493 JOIN vm ON (acvac.vis_attr_vector @@
494 (vm.c_attrs || $$&$$ ||
495 search.calculate_visibility_attribute_test(
497 (SELECT ARRAY_AGG(id) FROM actor.org_unit_descendants($4))
500 ) OR (b.vis_attr_vector @@ $$' || b_tests || '$$::query_int)
504 opac_visibility_join := '';
507 -- The following determines whether we only provide suggestsons matching
508 -- the user's selected search_class, or whether we show other suggestions
509 -- too. The reason for MIN() is that for search_classes like
510 -- 'title|proper|uniform' you would otherwise get multiple rows. The
511 -- implication is that if title as a class doesn't have restrict,
512 -- nor does the proper field, but the uniform field does, you're going
513 -- to get 'false' for your overall evaluation of 'should we restrict?'
514 -- To invert that, change from MIN() to MAX().
518 MIN(cmc.restrict::INT) AS restrict_class,
519 MIN(cmf.restrict::INT) AS restrict_field
520 FROM metabib.search_class_to_registered_components(search_class)
521 AS _registered (field_class TEXT, field INT)
523 config.metabib_class cmc ON (cmc.name = _registered.field_class)
525 config.metabib_field cmf ON (cmf.id = _registered.field);
527 -- evaluate 'should we restrict?'
528 IF r_fields.restrict_field::BOOL OR r_fields.restrict_class::BOOL THEN
529 search_class_join := '
531 metabib.search_class_to_registered_components($2)
532 AS _registered (field_class TEXT, field INT) ON (
533 (_registered.field IS NULL AND
534 _registered.field_class = cmf.field_class) OR
535 (_registered.field = cmf.id)
539 search_class_join := '
541 metabib.search_class_to_registered_components($2)
542 AS _registered (field_class TEXT, field INT) ON (
543 _registered.field_class = cmc.name
548 RETURN QUERY EXECUTE '
549 WITH vm AS ( SELECT * FROM asset.patron_default_visibility_mask() ),
550 mbe AS (SELECT * FROM metabib.browse_entry WHERE index_vector @@ $1 LIMIT 10000)
559 TS_HEADLINE(value, $7, $3)
560 FROM (SELECT DISTINCT
563 cmc.buoyant AND _registered.field_class IS NOT NULL AS push,
564 _registered.field = cmf.id AS restrict,
566 TS_RANK_CD(mbe.index_vector, $1, $6),
569 FROM metabib.browse_entry_def_map mbedm
570 JOIN mbe ON (mbe.id = mbedm.entry)
571 JOIN config.metabib_field cmf ON (cmf.id = mbedm.def)
572 JOIN config.metabib_class cmc ON (cmf.field_class = cmc.name)
573 ' || search_class_join || '
574 ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
576 ' || opac_visibility_join || '
577 ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
579 ' -- sic, repeat the order by clause in the outer select too
581 query, search_class, headline_opts,
582 visibility_org, query_limit, normalization, plain_query
586 -- buoyant AND chosen class = match class
587 -- chosen field = match field
594 $f$ LANGUAGE plpgsql ROWS 10;
596 CREATE OR REPLACE FUNCTION metabib.staged_browse(query text, fields integer[], context_org integer, context_locations integer[], staff boolean, browse_superpage_size integer, count_up_from_zero boolean, result_limit integer, next_pivot_pos integer)
597 RETURNS SETOF metabib.flat_browse_entry_appearance
606 result_row metabib.flat_browse_entry_appearance%ROWTYPE;
607 results_skipped INT := 0;
608 row_counter INT := 0;
613 all_records BIGINT[];
614 all_brecords BIGINT[];
615 all_arecords BIGINT[];
616 superpage_of_records BIGINT[];
621 unauthorized_entry RECORD;
623 IF count_up_from_zero THEN
630 SELECT x.c_attrs, x.b_attrs INTO c_tests, b_tests FROM asset.patron_default_visibility_mask() x;
633 -- b_tests supplies its own query_int operator, c_tests does not
634 IF c_tests <> '' THEN c_tests := c_tests || '&'; END IF;
636 SELECT ARRAY_AGG(id) INTO c_orgs FROM actor.org_unit_descendants(context_org);
638 c_tests := c_tests || search.calculate_visibility_attribute_test('circ_lib',c_orgs)
639 || '&' || search.calculate_visibility_attribute_test('owning_lib',c_orgs);
641 PERFORM 1 FROM config.internal_flag WHERE enabled AND name = 'opac.located_uri.act_as_copy';
643 b_tests := b_tests || search.calculate_visibility_attribute_test(
645 (SELECT ARRAY_AGG(id) FROM actor.org_unit_full_path(context_org) x)
648 b_tests := b_tests || search.calculate_visibility_attribute_test(
650 (SELECT ARRAY_AGG(id) FROM actor.org_unit_ancestors(context_org) x)
654 IF context_locations THEN
655 IF c_tests <> '' THEN c_tests := c_tests || '&'; END IF;
656 c_tests := c_tests || search.calculate_visibility_attribute_test('location',context_locations);
659 OPEN curs NO SCROLL FOR EXECUTE query;
664 IF result_row.pivot_point IS NOT NULL THEN
665 RETURN NEXT result_row;
671 SELECT INTO unauthorized_entry *
672 FROM metabib.browse_entry_simple_heading_map mbeshm
673 INNER JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
674 INNER JOIN authority.control_set_authority_field acsaf ON ( acsaf.id = ash.atag )
675 JOIN authority.heading_field ahf ON (ahf.id = acsaf.heading_field)
676 WHERE mbeshm.entry = rec.id
677 AND ahf.heading_purpose = 'variant';
679 -- Gather aggregate data based on the MBE row we're looking at now, authority axis
680 IF (unauthorized_entry.record IS NOT NULL) THEN
681 --unauthorized term belongs to an auth linked to a bib?
682 SELECT INTO all_arecords, result_row.sees, afields
683 ARRAY_AGG(DISTINCT abl.bib),
684 STRING_AGG(DISTINCT abl.authority::TEXT, $$,$$),
685 ARRAY_AGG(DISTINCT map.metabib_field)
686 FROM authority.bib_linking abl
687 INNER JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
688 map.authority_field = unauthorized_entry.atag
689 AND map.metabib_field = ANY(fields)
691 WHERE abl.authority = unauthorized_entry.record;
694 SELECT INTO all_arecords, result_row.sees, afields
695 ARRAY_AGG(DISTINCT abl.bib), -- bibs to check for visibility
696 STRING_AGG(DISTINCT aal.source::TEXT, $$,$$), -- authority record ids
697 ARRAY_AGG(DISTINCT map.metabib_field) -- authority-tag-linked CMF rows
699 FROM metabib.browse_entry_simple_heading_map mbeshm
700 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
701 JOIN authority.authority_linking aal ON ( ash.record = aal.source )
702 JOIN authority.bib_linking abl ON ( aal.target = abl.authority )
703 JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
704 ash.atag = map.authority_field
705 AND map.metabib_field = ANY(fields)
707 JOIN authority.control_set_authority_field acsaf ON (
708 map.authority_field = acsaf.id
710 JOIN authority.heading_field ahf ON (ahf.id = acsaf.heading_field)
711 WHERE mbeshm.entry = rec.id
712 AND ahf.heading_purpose = 'variant';
716 -- Gather aggregate data based on the MBE row we're looking at now, bib axis
717 SELECT INTO all_brecords, result_row.authorities, bfields
718 ARRAY_AGG(DISTINCT source),
719 STRING_AGG(DISTINCT authority::TEXT, $$,$$),
720 ARRAY_AGG(DISTINCT def)
721 FROM metabib.browse_entry_def_map
723 AND def = ANY(fields);
725 SELECT INTO result_row.fields STRING_AGG(DISTINCT x::TEXT, $$,$$) FROM UNNEST(afields || bfields) x;
727 result_row.sources := 0;
728 result_row.asources := 0;
730 -- Bib-linked vis checking
731 IF ARRAY_UPPER(all_brecords,1) IS NOT NULL THEN
733 SELECT INTO result_row.sources COUNT(DISTINCT b.id)
734 FROM biblio.record_entry b
735 LEFT JOIN asset.copy_vis_attr_cache acvac ON (acvac.record = b.id)
736 WHERE b.id = ANY(all_brecords[1:browse_superpage_size])
738 acvac.vis_attr_vector @@ c_tests::query_int
739 OR b.vis_attr_vector @@ b_tests::query_int
742 result_row.accurate := TRUE;
746 -- Authority-linked vis checking
747 IF ARRAY_UPPER(all_arecords,1) IS NOT NULL THEN
749 SELECT INTO result_row.asources COUNT(DISTINCT b.id)
750 FROM biblio.record_entry b
751 LEFT JOIN asset.copy_vis_attr_cache acvac ON (acvac.record = b.id)
752 WHERE b.id = ANY(all_arecords[1:browse_superpage_size])
754 acvac.vis_attr_vector @@ c_tests::query_int
755 OR b.vis_attr_vector @@ b_tests::query_int
758 result_row.aaccurate := TRUE;
762 IF result_row.sources > 0 OR result_row.asources > 0 THEN
764 -- The function that calls this function needs row_number in order
765 -- to correctly order results from two different runs of this
767 result_row.row_number := row_number;
769 -- Now, if row_counter is still less than limit, return a row. If
770 -- not, but it is less than next_pivot_pos, continue on without
771 -- returning actual result rows until we find
772 -- that next pivot, and return it.
774 IF row_counter < result_limit THEN
775 result_row.browse_entry := rec.id;
776 result_row.value := rec.value;
778 RETURN NEXT result_row;
780 result_row.browse_entry := NULL;
781 result_row.authorities := NULL;
782 result_row.fields := NULL;
783 result_row.value := NULL;
784 result_row.sources := NULL;
785 result_row.sees := NULL;
786 result_row.accurate := NULL;
787 result_row.aaccurate := NULL;
788 result_row.pivot_point := rec.id;
790 IF row_counter >= next_pivot_pos THEN
791 RETURN NEXT result_row;
796 IF count_up_from_zero THEN
797 row_number := row_number + 1;
799 row_number := row_number - 1;
802 -- row_counter is different from row_number.
803 -- It simply counts up from zero so that we know when
804 -- we've reached our limit.
805 row_counter := row_counter + 1;
809 $f$ LANGUAGE plpgsql ROWS 10;
811 CREATE OR REPLACE FUNCTION metabib.browse(search_field integer[], browse_term text, context_org integer DEFAULT NULL::integer, context_loc_group integer DEFAULT NULL::integer, staff boolean DEFAULT false, pivot_id bigint DEFAULT NULL::bigint, result_limit integer DEFAULT 10)
812 RETURNS SETOF metabib.flat_browse_entry_appearance
818 pivot_sort_value TEXT;
819 pivot_sort_fallback TEXT;
820 context_locations INT[];
821 browse_superpage_size INT;
822 results_skipped INT := 0;
826 forward_to_pivot INT;
828 -- First, find the pivot if we were given a browse term but not a pivot.
829 IF pivot_id IS NULL THEN
830 pivot_id := metabib.browse_pivot(search_field, browse_term);
833 SELECT INTO pivot_sort_value, pivot_sort_fallback
834 sort_value, value FROM metabib.browse_entry WHERE id = pivot_id;
836 -- Bail if we couldn't find a pivot.
837 IF pivot_sort_value IS NULL THEN
841 -- Transform the context_loc_group argument (if any) (logc at the
842 -- TPAC layer) into a form we'll be able to use.
843 IF context_loc_group IS NOT NULL THEN
844 SELECT INTO context_locations ARRAY_AGG(location)
845 FROM asset.copy_location_group_map
846 WHERE lgroup = context_loc_group;
849 -- Get the configured size of browse superpages.
850 SELECT INTO browse_superpage_size COALESCE(value::INT,100) -- NULL ok
851 FROM config.global_flag
852 WHERE enabled AND name = 'opac.browse.holdings_visibility_test_limit';
854 -- First we're going to search backward from the pivot, then we're going
855 -- to search forward. In each direction, we need two limits. At the
856 -- lesser of the two limits, we delineate the edge of the result set
857 -- we're going to return. At the greater of the two limits, we find the
858 -- pivot value that would represent an offset from the current pivot
859 -- at a distance of one "page" in either direction, where a "page" is a
860 -- result set of the size specified in the "result_limit" argument.
862 -- The two limits in each direction make four derived values in total,
863 -- and we calculate them now.
864 back_limit := CEIL(result_limit::FLOAT / 2);
865 back_to_pivot := result_limit;
866 forward_limit := result_limit / 2;
867 forward_to_pivot := result_limit - 1;
869 -- This is the meat of the SQL query that finds browse entries. We'll
870 -- pass this to a function which uses it with a cursor, so that individual
871 -- rows may be fetched in a loop until some condition is satisfied, without
872 -- waiting for a result set of fixed size to be collected all at once.
877 FROM metabib.browse_entry mbe
879 EXISTS ( -- are there any bibs using this mbe via the requested fields?
881 FROM metabib.browse_entry_def_map mbedm
882 WHERE mbedm.entry = mbe.id AND mbedm.def = ANY(' || quote_literal(search_field) || ')
883 ) OR EXISTS ( -- are there any authorities using this mbe via the requested fields?
885 FROM metabib.browse_entry_simple_heading_map mbeshm
886 JOIN authority.simple_heading ash ON ( mbeshm.simple_heading = ash.id )
887 JOIN authority.control_set_auth_field_metabib_field_map_refs map ON (
888 ash.atag = map.authority_field
889 AND map.metabib_field = ANY(' || quote_literal(search_field) || ')
891 JOIN authority.control_set_authority_field acsaf ON (
892 map.authority_field = acsaf.id
894 JOIN authority.heading_field ahf ON (ahf.id = acsaf.heading_field)
895 WHERE mbeshm.entry = mbe.id
896 AND ahf.heading_purpose IN (' || $$'variant'$$ || ')
897 -- and authority that variant is coming from is linked to a bib
900 FROM metabib.browse_entry_def_map mbedm2
901 WHERE mbedm2.authority = ash.record AND mbedm2.def = ANY(' || quote_literal(search_field) || ')
907 -- This is the variant of the query for browsing backward.
908 back_query := core_query ||
909 ' mbe.sort_value <= ' || quote_literal(pivot_sort_value) ||
910 ' ORDER BY mbe.sort_value DESC, mbe.value DESC LIMIT 1000';
912 -- This variant browses forward.
913 forward_query := core_query ||
914 ' mbe.sort_value > ' || quote_literal(pivot_sort_value) ||
915 ' ORDER BY mbe.sort_value, mbe.value LIMIT 1000';
917 -- We now call the function which applies a cursor to the provided
918 -- queries, stopping at the appropriate limits and also giving us
919 -- the next page's pivot.
921 SELECT * FROM metabib.staged_browse(
922 back_query, search_field, context_org, context_locations,
923 staff, browse_superpage_size, TRUE, back_limit, back_to_pivot
925 SELECT * FROM metabib.staged_browse(
926 forward_query, search_field, context_org, context_locations,
927 staff, browse_superpage_size, FALSE, forward_limit, forward_to_pivot
928 ) ORDER BY row_number DESC;
931 $f$ LANGUAGE plpgsql ROWS 10;
933 CREATE OR REPLACE FUNCTION metabib.browse(
936 context_org INT DEFAULT NULL,
937 context_loc_group INT DEFAULT NULL,
938 staff BOOL DEFAULT FALSE,
939 pivot_id BIGINT DEFAULT NULL,
940 result_limit INT DEFAULT 10
941 ) RETURNS SETOF metabib.flat_browse_entry_appearance AS $p$
943 RETURN QUERY SELECT * FROM metabib.browse(
944 (SELECT COALESCE(ARRAY_AGG(id), ARRAY[]::INT[])
945 FROM config.metabib_field WHERE field_class = search_class),
954 $p$ LANGUAGE PLPGSQL ROWS 10;
956 CREATE OR REPLACE VIEW search.best_tsconfig AS
958 COALESCE(f.ts_config, c.ts_config, 'simple') AS ts_config
959 FROM config.metabib_field m
960 LEFT JOIN config.metabib_class_ts_map c ON (c.field_class = m.field_class AND c.index_weight = 'C')
961 LEFT JOIN config.metabib_field_ts_map f ON (f.metabib_field = m.id AND f.index_weight = 'C');
963 CREATE TYPE search.highlight_result AS ( id BIGINT, source BIGINT, field INT, value TEXT, highlight TEXT );
965 CREATE OR REPLACE FUNCTION search.highlight_display_fields_impl(
968 field_list INT[] DEFAULT '{}'::INT[],
969 css_class TEXT DEFAULT 'oils_SH',
970 hl_all BOOL DEFAULT TRUE,
971 minwords INT DEFAULT 5,
972 maxwords INT DEFAULT 25,
973 shortwords INT DEFAULT 0,
974 maxfrags INT DEFAULT 0,
975 delimiter TEXT DEFAULT ' ... '
976 ) RETURNS SETOF search.highlight_result AS $f$
979 v_css_class TEXT := css_class;
980 v_delimiter TEXT := delimiter;
981 v_field_list INT[] := field_list;
984 IF v_delimiter LIKE $$%'%$$ OR v_delimiter LIKE '%"%' THEN --"
985 v_delimiter := ' ... ';
989 opts := opts || 'MinWords=' || minwords;
990 opts := opts || ', MaxWords=' || maxwords;
991 opts := opts || ', ShortWords=' || shortwords;
992 opts := opts || ', MaxFragments=' || maxfrags;
993 opts := opts || ', FragmentDelimiter="' || delimiter || '"';
995 opts := opts || 'HighlightAll=TRUE';
998 IF v_css_class LIKE $$%'%$$ OR v_css_class LIKE '%"%' THEN -- "
999 v_css_class := 'oils_SH';
1002 opts := opts || $$, StopSel=</mark>, StartSel="<mark class='$$ || v_css_class; -- "
1004 IF v_field_list = '{}'::INT[] THEN
1005 SELECT ARRAY_AGG(id) INTO v_field_list FROM config.metabib_field WHERE display_field;
1012 evergreen.escape_for_html(de.value) AS value,
1014 ts_config::REGCONFIG,
1015 evergreen.escape_for_html(de.value),
1016 $$ || quote_literal(tsq) || $$,
1017 $1 || ' ' || mf.field_class || ' ' || mf.name || $xx$'>"$xx$ -- "'
1019 FROM metabib.display_entry de
1020 JOIN config.metabib_field mf ON (mf.id = de.field)
1021 JOIN search.best_tsconfig t ON (t.id = de.field)
1022 WHERE de.source = $2
1023 AND field = ANY ($3)
1026 RETURN QUERY EXECUTE hl_query USING opts, rid, v_field_list;
1028 $f$ LANGUAGE PLPGSQL;
1030 CREATE OR REPLACE FUNCTION evergreen.escape_for_html (TEXT) RETURNS TEXT AS $$
1031 SELECT regexp_replace(
1047 $$ LANGUAGE SQL IMMUTABLE LEAKPROOF STRICT COST 10;
1049 CREATE OR REPLACE FUNCTION search.highlight_display_fields(
1051 tsq_map TEXT, -- { '(a | b) & c' => '1,2,3,4', ...}
1052 css_class TEXT DEFAULT 'oils_SH',
1053 hl_all BOOL DEFAULT TRUE,
1054 minwords INT DEFAULT 5,
1055 maxwords INT DEFAULT 25,
1056 shortwords INT DEFAULT 0,
1057 maxfrags INT DEFAULT 0,
1058 delimiter TEXT DEFAULT ' ... '
1059 ) RETURNS SETOF search.highlight_result AS $f$
1067 IF (tsq_map ILIKE 'hstore%') THEN
1068 EXECUTE 'SELECT ' || tsq_map INTO tsq_hstore;
1070 tsq_hstore := tsq_map::HSTORE;
1073 FOR tsq, fields IN SELECT key, value FROM each(tsq_hstore::HSTORE) LOOP
1074 SELECT ARRAY_AGG(unnest::INT) INTO afields
1075 FROM unnest(regexp_split_to_array(fields,','));
1076 seen := seen || afields;
1079 SELECT * FROM search.highlight_display_fields_impl(
1080 rid, tsq, afields, css_class, hl_all,minwords,
1081 maxwords, shortwords, maxfrags, delimiter
1089 evergreen.escape_for_html(value) AS value,
1090 evergreen.escape_for_html(value) AS highlight
1091 FROM metabib.display_entry
1093 AND NOT (field = ANY (seen));
1095 $f$ LANGUAGE PLPGSQL ROWS 10;
1097 -- SymSpell implementation follows
1099 -- We don't pass this function arrays with nulls, so we save 5% not testing for that
1100 CREATE OR REPLACE FUNCTION evergreen.text_array_merge_unique (
1102 ) RETURNS TEXT[] AS $F$
1103 SELECT NULLIF(ARRAY(
1104 SELECT * FROM UNNEST($1) x
1106 SELECT * FROM UNNEST($2) y
1110 CREATE OR REPLACE FUNCTION evergreen.qwerty_keyboard_distance ( a TEXT, b TEXT ) RETURNS NUMERIC AS $F$
1111 use String::KeyboardDistance qw(:all);
1112 return qwerty_keyboard_distance(@_);
1113 $F$ LANGUAGE PLPERLU STRICT IMMUTABLE;
1115 CREATE OR REPLACE FUNCTION evergreen.qwerty_keyboard_distance_match ( a TEXT, b TEXT ) RETURNS NUMERIC AS $F$
1116 use String::KeyboardDistance qw(:all);
1117 return qwerty_keyboard_distance_match(@_);
1118 $F$ LANGUAGE PLPERLU STRICT IMMUTABLE;
1120 CREATE OR REPLACE FUNCTION evergreen.levenshtein_damerau_edistance ( a TEXT, b TEXT, INT ) RETURNS NUMERIC AS $F$
1121 use Text::Levenshtein::Damerau::XS qw/xs_edistance/;
1122 return xs_edistance(@_);
1123 $F$ LANGUAGE PLPERLU STRICT IMMUTABLE;
1125 CREATE TABLE search.symspell_dictionary (
1126 keyword_count INT NOT NULL DEFAULT 0,
1127 title_count INT NOT NULL DEFAULT 0,
1128 author_count INT NOT NULL DEFAULT 0,
1129 subject_count INT NOT NULL DEFAULT 0,
1130 series_count INT NOT NULL DEFAULT 0,
1131 identifier_count INT NOT NULL DEFAULT 0,
1133 prefix_key TEXT PRIMARY KEY,
1135 keyword_suggestions TEXT[],
1136 title_suggestions TEXT[],
1137 author_suggestions TEXT[],
1138 subject_suggestions TEXT[],
1139 series_suggestions TEXT[],
1140 identifier_suggestions TEXT[]
1141 ) WITH (fillfactor = 80);
1143 -- INSERT-only table that catches updates to be reconciled
1144 CREATE UNLOGGED TABLE search.symspell_dictionary_updates (
1145 transaction_id BIGINT,
1146 keyword_count INT NOT NULL DEFAULT 0,
1147 title_count INT NOT NULL DEFAULT 0,
1148 author_count INT NOT NULL DEFAULT 0,
1149 subject_count INT NOT NULL DEFAULT 0,
1150 series_count INT NOT NULL DEFAULT 0,
1151 identifier_count INT NOT NULL DEFAULT 0,
1153 prefix_key TEXT NOT NULL,
1155 keyword_suggestions TEXT[],
1156 title_suggestions TEXT[],
1157 author_suggestions TEXT[],
1158 subject_suggestions TEXT[],
1159 series_suggestions TEXT[],
1160 identifier_suggestions TEXT[]
1162 CREATE INDEX symspell_dictionary_updates_tid_idx ON search.symspell_dictionary_updates (transaction_id);
1164 CREATE OR REPLACE FUNCTION search.symspell_dictionary_reify () RETURNS SETOF search.symspell_dictionary AS $f$
1166 DELETE FROM search.symspell_dictionary_updates WHERE transaction_id = txid_current() RETURNING *
1167 ), computed_rows AS ( -- this collapses the rows deleted into the format we need for UPSERT
1168 SELECT SUM(keyword_count) AS keyword_count,
1169 SUM(title_count) AS title_count,
1170 SUM(author_count) AS author_count,
1171 SUM(subject_count) AS subject_count,
1172 SUM(series_count) AS series_count,
1173 SUM(identifier_count) AS identifier_count,
1177 ARRAY_REMOVE(ARRAY_AGG(DISTINCT keyword_suggestions[1]), NULL) AS keyword_suggestions,
1178 ARRAY_REMOVE(ARRAY_AGG(DISTINCT title_suggestions[1]), NULL) AS title_suggestions,
1179 ARRAY_REMOVE(ARRAY_AGG(DISTINCT author_suggestions[1]), NULL) AS author_suggestions,
1180 ARRAY_REMOVE(ARRAY_AGG(DISTINCT subject_suggestions[1]), NULL) AS subject_suggestions,
1181 ARRAY_REMOVE(ARRAY_AGG(DISTINCT series_suggestions[1]), NULL) AS series_suggestions,
1182 ARRAY_REMOVE(ARRAY_AGG(DISTINCT identifier_suggestions[1]), NULL) AS identifier_suggestions
1186 INSERT INTO search.symspell_dictionary AS d SELECT * FROM computed_rows
1187 ON CONFLICT (prefix_key) DO UPDATE SET
1188 keyword_count = GREATEST(0, d.keyword_count + EXCLUDED.keyword_count),
1189 keyword_suggestions = evergreen.text_array_merge_unique(EXCLUDED.keyword_suggestions,d.keyword_suggestions),
1191 title_count = GREATEST(0, d.title_count + EXCLUDED.title_count),
1192 title_suggestions = evergreen.text_array_merge_unique(EXCLUDED.title_suggestions,d.title_suggestions),
1194 author_count = GREATEST(0, d.author_count + EXCLUDED.author_count),
1195 author_suggestions = evergreen.text_array_merge_unique(EXCLUDED.author_suggestions,d.author_suggestions),
1197 subject_count = GREATEST(0, d.subject_count + EXCLUDED.subject_count),
1198 subject_suggestions = evergreen.text_array_merge_unique(EXCLUDED.subject_suggestions,d.subject_suggestions),
1200 series_count = GREATEST(0, d.series_count + EXCLUDED.series_count),
1201 series_suggestions = evergreen.text_array_merge_unique(EXCLUDED.series_suggestions,d.series_suggestions),
1203 identifier_count = GREATEST(0, d.identifier_count + EXCLUDED.identifier_count),
1204 identifier_suggestions = evergreen.text_array_merge_unique(EXCLUDED.identifier_suggestions,d.identifier_suggestions)
1207 EXCLUDED.keyword_count <> 0 OR
1208 EXCLUDED.title_count <> 0 OR
1209 EXCLUDED.author_count <> 0 OR
1210 EXCLUDED.subject_count <> 0 OR
1211 EXCLUDED.series_count <> 0 OR
1212 EXCLUDED.identifier_count <> 0 OR
1213 NOT (EXCLUDED.keyword_suggestions <@ d.keyword_suggestions) OR
1214 NOT (EXCLUDED.title_suggestions <@ d.title_suggestions) OR
1215 NOT (EXCLUDED.author_suggestions <@ d.author_suggestions) OR
1216 NOT (EXCLUDED.subject_suggestions <@ d.subject_suggestions) OR
1217 NOT (EXCLUDED.series_suggestions <@ d.series_suggestions) OR
1218 NOT (EXCLUDED.identifier_suggestions <@ d.identifier_suggestions)
1223 CREATE OR REPLACE FUNCTION search.disable_symspell_reification () RETURNS VOID AS $f$
1224 INSERT INTO config.internal_flag (name,enabled)
1225 VALUES ('ingest.disable_symspell_reification',TRUE)
1226 ON CONFLICT (name) DO UPDATE SET enabled = TRUE;
1229 CREATE OR REPLACE FUNCTION search.enable_symspell_reification () RETURNS VOID AS $f$
1230 UPDATE config.internal_flag SET enabled = FALSE WHERE name = 'ingest.disable_symspell_reification';
1233 CREATE OR REPLACE FUNCTION search.symspell_dictionary_full_reify () RETURNS SETOF search.symspell_dictionary AS $f$
1235 DELETE FROM search.symspell_dictionary_updates RETURNING *
1236 ), computed_rows AS ( -- this collapses the rows deleted into the format we need for UPSERT
1237 SELECT SUM(keyword_count) AS keyword_count,
1238 SUM(title_count) AS title_count,
1239 SUM(author_count) AS author_count,
1240 SUM(subject_count) AS subject_count,
1241 SUM(series_count) AS series_count,
1242 SUM(identifier_count) AS identifier_count,
1246 ARRAY_REMOVE(ARRAY_AGG(DISTINCT keyword_suggestions[1]), NULL) AS keyword_suggestions,
1247 ARRAY_REMOVE(ARRAY_AGG(DISTINCT title_suggestions[1]), NULL) AS title_suggestions,
1248 ARRAY_REMOVE(ARRAY_AGG(DISTINCT author_suggestions[1]), NULL) AS author_suggestions,
1249 ARRAY_REMOVE(ARRAY_AGG(DISTINCT subject_suggestions[1]), NULL) AS subject_suggestions,
1250 ARRAY_REMOVE(ARRAY_AGG(DISTINCT series_suggestions[1]), NULL) AS series_suggestions,
1251 ARRAY_REMOVE(ARRAY_AGG(DISTINCT identifier_suggestions[1]), NULL) AS identifier_suggestions
1255 INSERT INTO search.symspell_dictionary AS d SELECT * FROM computed_rows
1256 ON CONFLICT (prefix_key) DO UPDATE SET
1257 keyword_count = GREATEST(0, d.keyword_count + EXCLUDED.keyword_count),
1258 keyword_suggestions = evergreen.text_array_merge_unique(EXCLUDED.keyword_suggestions,d.keyword_suggestions),
1260 title_count = GREATEST(0, d.title_count + EXCLUDED.title_count),
1261 title_suggestions = evergreen.text_array_merge_unique(EXCLUDED.title_suggestions,d.title_suggestions),
1263 author_count = GREATEST(0, d.author_count + EXCLUDED.author_count),
1264 author_suggestions = evergreen.text_array_merge_unique(EXCLUDED.author_suggestions,d.author_suggestions),
1266 subject_count = GREATEST(0, d.subject_count + EXCLUDED.subject_count),
1267 subject_suggestions = evergreen.text_array_merge_unique(EXCLUDED.subject_suggestions,d.subject_suggestions),
1269 series_count = GREATEST(0, d.series_count + EXCLUDED.series_count),
1270 series_suggestions = evergreen.text_array_merge_unique(EXCLUDED.series_suggestions,d.series_suggestions),
1272 identifier_count = GREATEST(0, d.identifier_count + EXCLUDED.identifier_count),
1273 identifier_suggestions = evergreen.text_array_merge_unique(EXCLUDED.identifier_suggestions,d.identifier_suggestions)
1277 CREATE OR REPLACE FUNCTION search.symspell_parse_words ( phrase TEXT )
1278 RETURNS SETOF TEXT AS $F$
1280 FROM (SELECT UNNEST(x), ROW_NUMBER() OVER ()
1281 FROM regexp_matches($1, '(?:^|\s+)((?:-|\+)?[[:alnum:]]+''*[[:alnum:]]*)', 'g') x
1283 WHERE UNNEST IS NOT NULL
1285 $F$ LANGUAGE SQL STRICT IMMUTABLE;
1287 CREATE OR REPLACE FUNCTION search.distribute_phrase_sign (input TEXT) RETURNS TEXT AS $f$
1294 IF output ~ '^(?:-|\+)' THEN
1295 phrase_sign := SUBSTRING(input FROM 1 FOR 1);
1296 output := SUBSTRING(output FROM 2);
1299 IF output LIKE '"%"' THEN
1300 IF phrase_sign IS NULL THEN
1303 output := BTRIM(output,'"');
1306 IF phrase_sign IS NOT NULL THEN
1307 RETURN REGEXP_REPLACE(output,'(^|\s+)(?=[[:alnum:]])','\1'||phrase_sign,'g');
1312 $f$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
1314 CREATE OR REPLACE FUNCTION search.query_parse_phrases ( phrase TEXT )
1315 RETURNS SETOF TEXT AS $F$
1316 SELECT search.distribute_phrase_sign(UNNEST)
1317 FROM (SELECT UNNEST(x), ROW_NUMBER() OVER ()
1318 FROM regexp_matches($1, '(?:^|\s+)(?:((?:-|\+)?"[^"]+")|((?:-|\+)?[[:alnum:]]+''*[[:alnum:]]*))', 'g') x
1320 WHERE UNNEST IS NOT NULL
1322 $F$ LANGUAGE SQL STRICT IMMUTABLE;
1324 CREATE TYPE search.query_parse_position AS (
1327 phrase_in_input_pos INT,
1328 word_in_phrase_pos INT,
1333 CREATE OR REPLACE FUNCTION search.query_parse_positions ( raw_input TEXT )
1334 RETURNS SETOF search.query_parse_position AS $F$
1338 phrase_pos INT := 0;
1344 FOR curr_phrase IN SELECT x FROM search.query_parse_phrases(raw_input) x LOOP
1346 FOR curr_word IN SELECT x FROM search.symspell_parse_words(curr_phrase) x LOOP
1349 IF curr_word ~ '^(?:-|\+)' THEN
1351 IF curr_word LIKE '-%' THEN
1354 curr_word := SUBSTRING(curr_word FROM 2);
1356 RETURN QUERY SELECT curr_word, pos, phrase_pos, word_pos, neg, ex;
1357 word_pos := word_pos + 1;
1360 phrase_pos := phrase_pos + 1;
1364 $F$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
1366 -- This version does not preserve input word order!
1367 CREATE OR REPLACE FUNCTION search.symspell_parse_words_distinct ( phrase TEXT )
1368 RETURNS SETOF TEXT AS $F$
1369 SELECT DISTINCT UNNEST(x) FROM regexp_matches($1, '([[:alnum:]]+''*[[:alnum:]]*)', 'g') x;
1370 $F$ LANGUAGE SQL STRICT IMMUTABLE;
1372 CREATE OR REPLACE FUNCTION search.symspell_transfer_casing ( withCase TEXT, withoutCase TEXT )
1379 woChars := regexp_split_to_array(withoutCase,'');
1380 FOR curr IN SELECT x FROM regexp_split_to_table(withCase, '') x LOOP
1381 IF curr = evergreen.uppercase(curr) THEN
1382 woChars[ind] := evergreen.uppercase(woChars[ind]);
1386 RETURN ARRAY_TO_STRING(woChars,'');
1388 $F$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
1390 CREATE OR REPLACE FUNCTION search.symspell_generate_edits (
1394 ) RETURNS TEXT[] AS $F$
1397 list TEXT[] := '{}';
1398 sublist TEXT[] := '{}';
1400 FOR I IN 1 .. CHARACTER_LENGTH(raw_word) LOOP
1401 item := SUBSTRING(raw_word FROM 1 FOR I - 1) || SUBSTRING(raw_word FROM I + 1);
1402 IF NOT list @> ARRAY[item] THEN
1403 list := item || list;
1404 IF dist < maxED AND CHARACTER_LENGTH(raw_word) > dist + 1 THEN
1405 sublist := search.symspell_generate_edits(item, dist + 1, maxED) || sublist;
1411 RETURN evergreen.text_array_merge_unique(list, sublist);
1413 RETURN list || sublist;
1416 $F$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
1418 -- DROP TYPE search.symspell_lookup_output CASCADE;
1419 CREATE TYPE search.symspell_lookup_output AS (
1421 suggestion_count INT,
1423 pg_trgm_sim NUMERIC,
1424 qwerty_kb_match NUMERIC,
1425 soundex_sim NUMERIC,
1429 prefix_key_count INT,
1434 CREATE OR REPLACE FUNCTION search.symspell_generate_combined_suggestions(
1435 word_data search.symspell_lookup_output[],
1436 pos_data search.query_parse_position[],
1437 skip_correct BOOL DEFAULT TRUE,
1438 max_words INT DEFAULT 0
1439 ) RETURNS TABLE (suggestion TEXT, test TEXT) AS $f$
1440 my $word_data = shift;
1441 my $pos_data = shift;
1442 my $skip_correct = shift;
1443 my $max_per_word = shift;
1444 return undef unless (@$word_data and @$pos_data);
1446 my $last_word_pos = $$word_data[-1]{word_pos};
1447 my $pos_to_word_map = [ map { [] } 0 .. $last_word_pos ];
1448 my $parsed_query_data = { map { ($$_{word_pos} => $_) } @$pos_data };
1450 for my $row (@$word_data) {
1451 my $wp = +$$row{word_pos};
1453 $skip_correct eq 't' and $$row{lev_distance} > 0
1454 and @{$$pos_to_word_map[$wp]}
1455 and $$pos_to_word_map[$wp][0]{lev_distance} == 0
1457 push @{$$pos_to_word_map[$$row{word_pos}]}, $row;
1460 gen_step($max_per_word, $pos_to_word_map, $parsed_query_data, $last_word_pos);
1463 # -----------------------------
1465 my $max_words = shift;
1467 my $pos_data = shift;
1468 my $last_pos = shift;
1469 my $prefix = shift || '';
1470 my $test_prefix = shift || '';
1471 my $current_pos = shift || 0;
1474 for my $sugg ( @{$$data[$current_pos]} ) {
1475 my $was_inside_phrase = 0;
1476 my $now_inside_phrase = 0;
1478 my $word = $$sugg{suggestion};
1481 my $prev_phrase = $$pos_data{$current_pos - 1}{phrase_in_input_pos};
1482 my $curr_phrase = $$pos_data{$current_pos}{phrase_in_input_pos};
1483 my $next_phrase = $$pos_data{$current_pos + 1}{phrase_in_input_pos};
1485 $now_inside_phrase++ if (defined($next_phrase) and $curr_phrase == $next_phrase);
1486 $was_inside_phrase++ if (defined($prev_phrase) and $curr_phrase == $prev_phrase);
1488 my $string = $prefix;
1489 $string .= ' ' if $string;
1491 if (!$was_inside_phrase) { # might be starting a phrase?
1492 $string .= '-' if ($$pos_data{$current_pos}{negated} eq 't');
1493 if ($now_inside_phrase) { # we are! add the double-quote
1497 } else { # definitely were in a phrase
1499 if (!$now_inside_phrase) { # we are not any longer, add the double-quote
1504 my $test_string = $test_prefix;
1505 if ($current_pos > 0) { # have something already, need joiner
1506 $test_string .= $curr_phrase == $prev_phrase ? ' <-> ' : ' & ';
1508 $test_string .= '!' if ($$pos_data{$current_pos}{negated} eq 't');
1509 $test_string .= $word;
1511 if ($current_pos == $last_pos) {
1512 return_next {suggestion => $string, test => $test_string};
1514 gen_step($max_words, $data, $pos_data, $last_pos, $string, $test_string, $current_pos + 1);
1517 last if ($max_words and $word_count >= $max_words);
1520 $f$ LANGUAGE PLPERLU IMMUTABLE;
1523 CREATE FUNCTION search.symspell_lookup (
1526 verbosity INT DEFAULT NULL,
1527 xfer_case BOOL DEFAULT NULL,
1528 count_threshold INT DEFAULT NULL,
1529 soundex_weight INT DEFAULT NULL,
1530 pg_trgm_weight INT DEFAULT NULL,
1531 kbdist_weight INT DEFAULT NULL
1532 ) RETURNS SETOF search.symspell_lookup_output AS $F$
1537 edit_list TEXT[] := '{}';
1538 seen_list TEXT[] := '{}';
1539 output search.symspell_lookup_output;
1540 output_list search.symspell_lookup_output[];
1548 smallest_ed INT := -1;
1550 c_symspell_suggestion_verbosity INT;
1551 c_min_suggestion_use_threshold INT;
1552 c_soundex_weight INT;
1553 c_pg_trgm_weight INT;
1554 c_keyboard_distance_weight INT;
1555 c_symspell_transfer_case BOOL;
1558 SELECT cmc.min_suggestion_use_threshold,
1561 cmc.keyboard_distance_weight,
1562 cmc.symspell_transfer_case,
1563 cmc.symspell_suggestion_verbosity
1564 INTO c_min_suggestion_use_threshold,
1567 c_keyboard_distance_weight,
1568 c_symspell_transfer_case,
1569 c_symspell_suggestion_verbosity
1570 FROM config.metabib_class cmc
1571 WHERE cmc.name = search_class;
1573 c_min_suggestion_use_threshold := COALESCE(count_threshold,c_min_suggestion_use_threshold);
1574 c_symspell_transfer_case := COALESCE(xfer_case,c_symspell_transfer_case);
1575 c_symspell_suggestion_verbosity := COALESCE(verbosity,c_symspell_suggestion_verbosity);
1576 c_soundex_weight := COALESCE(soundex_weight,c_soundex_weight);
1577 c_pg_trgm_weight := COALESCE(pg_trgm_weight,c_pg_trgm_weight);
1578 c_keyboard_distance_weight := COALESCE(kbdist_weight,c_keyboard_distance_weight);
1580 SELECT value::INT INTO prefix_length FROM config.internal_flag WHERE name = 'symspell.prefix_length' AND enabled;
1581 prefix_length := COALESCE(prefix_length, 6);
1583 SELECT value::INT INTO maxED FROM config.internal_flag WHERE name = 'symspell.max_edit_distance' AND enabled;
1584 maxED := COALESCE(maxED, 3);
1586 -- XXX This should get some more thought ... maybe search_normalize?
1587 word_list := ARRAY_AGG(x.word) FROM search.query_parse_positions(raw_input) x;
1589 -- Common case exact match test for preformance
1590 IF c_symspell_suggestion_verbosity = 0 AND CARDINALITY(word_list) = 1 AND CHARACTER_LENGTH(word_list[1]) <= prefix_length THEN
1592 'SELECT '||search_class||'_suggestions AS suggestions,
1593 '||search_class||'_count AS count,
1595 FROM search.symspell_dictionary
1596 WHERE prefix_key = $1
1597 AND '||search_class||'_count >= $2
1598 AND '||search_class||'_suggestions @> ARRAY[$1]'
1599 INTO entry USING evergreen.lowercase(word_list[1]), c_min_suggestion_use_threshold;
1600 IF entry.prefix_key IS NOT NULL THEN
1601 output.lev_distance := 0; -- definitionally
1602 output.prefix_key := entry.prefix_key;
1603 output.prefix_key_count := entry.count;
1604 output.suggestion_count := entry.count;
1605 output.input := word_list[1];
1606 IF c_symspell_transfer_case THEN
1607 output.suggestion := search.symspell_transfer_casing(output.input, entry.prefix_key);
1609 output.suggestion := entry.prefix_key;
1611 output.norm_input := entry.prefix_key;
1612 output.qwerty_kb_match := 1;
1613 output.pg_trgm_sim := 1;
1614 output.soundex_sim := 1;
1621 FOREACH word IN ARRAY word_list LOOP
1623 input := evergreen.lowercase(word);
1625 IF CHARACTER_LENGTH(input) > prefix_length THEN
1626 prefix_key := SUBSTRING(input FROM 1 FOR prefix_length);
1627 edit_list := ARRAY[input,prefix_key] || search.symspell_generate_edits(prefix_key, 1, maxED);
1629 edit_list := input || search.symspell_generate_edits(input, 1, maxED);
1632 SELECT ARRAY_AGG(x ORDER BY CHARACTER_LENGTH(x) DESC) INTO edit_list FROM UNNEST(edit_list) x;
1634 output_list := '{}';
1639 FOREACH entry_key IN ARRAY edit_list LOOP
1641 IF global_ed IS NOT NULL THEN
1642 smallest_ed := global_ed;
1644 FOR entry IN EXECUTE
1645 'SELECT '||search_class||'_suggestions AS suggestions,
1646 '||search_class||'_count AS count,
1648 FROM search.symspell_dictionary
1649 WHERE prefix_key = $1
1650 AND '||search_class||'_suggestions IS NOT NULL'
1653 FOREACH sugg IN ARRAY entry.suggestions LOOP
1654 IF NOT seen_list @> ARRAY[sugg] THEN
1655 seen_list := seen_list || sugg;
1656 IF input = sugg THEN -- exact match, no need to spend time on a call
1657 output.lev_distance := 0;
1658 output.suggestion_count = entry.count;
1659 ELSIF ABS(CHARACTER_LENGTH(input) - CHARACTER_LENGTH(sugg)) > maxED THEN
1660 -- They are definitionally too different to consider, just move on.
1663 --output.lev_distance := levenshtein_less_equal(
1664 output.lev_distance := evergreen.levenshtein_damerau_edistance(
1669 IF output.lev_distance < 0 THEN
1670 -- The Perl module returns -1 for "more distant than max".
1671 output.lev_distance := maxED + 1;
1672 -- This short-circuit's the count test below for speed, bypassing
1673 -- a couple useless tests.
1674 output.suggestion_count := -1;
1676 EXECUTE 'SELECT '||search_class||'_count FROM search.symspell_dictionary WHERE prefix_key = $1'
1677 INTO output.suggestion_count USING sugg;
1681 -- The caller passes a minimum suggestion count threshold (or uses
1682 -- the default of 0) and if the suggestion has that many or less uses
1683 -- then we move on to the next suggestion, since this one is too rare.
1684 CONTINUE WHEN output.suggestion_count < c_min_suggestion_use_threshold;
1686 -- Track the smallest edit distance among suggestions from this prefix key.
1687 IF smallest_ed = -1 OR output.lev_distance < smallest_ed THEN
1688 smallest_ed := output.lev_distance;
1691 -- Track the smallest edit distance for all prefix keys for this word.
1692 IF global_ed IS NULL OR smallest_ed < global_ed THEN
1693 global_ed = smallest_ed;
1696 -- Only proceed if the edit distance is <= the max for the dictionary.
1697 IF output.lev_distance <= maxED THEN
1698 IF output.lev_distance > global_ed AND c_symspell_suggestion_verbosity <= 1 THEN
1699 -- Lev distance is our main similarity measure. While
1700 -- trgm or soundex similarity could be the main filter,
1701 -- Lev is both language agnostic and faster.
1703 -- Here we will skip suggestions that have a longer edit distance
1704 -- than the shortest we've already found. This is simply an
1705 -- optimization that allows us to avoid further processing
1706 -- of this entry. It would be filtered out later.
1711 -- If we have an exact match on the suggestion key we can also avoid
1712 -- some function calls.
1713 IF output.lev_distance = 0 THEN
1714 output.qwerty_kb_match := 1;
1715 output.pg_trgm_sim := 1;
1716 output.soundex_sim := 1;
1718 output.qwerty_kb_match := evergreen.qwerty_keyboard_distance_match(input, sugg);
1719 output.pg_trgm_sim := similarity(input, sugg);
1720 output.soundex_sim := difference(input, sugg) / 4.0;
1723 -- Fill in some fields
1724 IF c_symspell_transfer_case THEN
1725 output.suggestion := search.symspell_transfer_casing(word, sugg);
1727 output.suggestion := sugg;
1729 output.prefix_key := entry.prefix_key;
1730 output.prefix_key_count := entry.count;
1731 output.input := word;
1732 output.norm_input := input;
1733 output.word_pos := w_pos;
1735 -- We can't "cache" a set of generated records directly, so
1736 -- here we build up an array of search.symspell_lookup_output
1737 -- records that we can revivicate later as a table using UNNEST().
1738 output_list := output_list || output;
1740 EXIT entry_key_loop WHEN smallest_ed = 0 AND c_symspell_suggestion_verbosity = 0; -- exact match early exit
1741 CONTINUE entry_key_loop WHEN smallest_ed = 0 AND c_symspell_suggestion_verbosity = 1; -- exact match early jump to the next key
1742 END IF; -- maxED test
1743 END IF; -- suggestion not seen test
1744 END LOOP; -- loop over suggestions
1745 END LOOP; -- loop over entries
1746 END LOOP; -- loop over entry_keys
1748 -- Now we're done examining this word
1749 IF c_symspell_suggestion_verbosity = 0 THEN
1750 -- Return the "best" suggestion from the smallest edit
1751 -- distance group. We define best based on the weighting
1752 -- of the non-lev similarity measures and use the suggestion
1753 -- use count to break ties.
1755 SELECT * FROM UNNEST(output_list)
1756 ORDER BY lev_distance,
1757 (soundex_sim * c_soundex_weight)
1758 + (pg_trgm_sim * c_pg_trgm_weight)
1759 + (qwerty_kb_match * c_keyboard_distance_weight) DESC,
1760 suggestion_count DESC
1762 ELSIF c_symspell_suggestion_verbosity = 1 THEN
1763 -- Return all suggestions from the smallest
1764 -- edit distance group.
1766 SELECT * FROM UNNEST(output_list) WHERE lev_distance = smallest_ed
1767 ORDER BY (soundex_sim * c_soundex_weight)
1768 + (pg_trgm_sim * c_pg_trgm_weight)
1769 + (qwerty_kb_match * c_keyboard_distance_weight) DESC,
1770 suggestion_count DESC;
1771 ELSIF c_symspell_suggestion_verbosity = 2 THEN
1772 -- Return everything we find, along with relevant stats
1774 SELECT * FROM UNNEST(output_list)
1775 ORDER BY lev_distance,
1776 (soundex_sim * c_soundex_weight)
1777 + (pg_trgm_sim * c_pg_trgm_weight)
1778 + (qwerty_kb_match * c_keyboard_distance_weight) DESC,
1779 suggestion_count DESC;
1780 ELSIF c_symspell_suggestion_verbosity = 3 THEN
1781 -- Return everything we find from the two smallest edit distance groups
1783 SELECT * FROM UNNEST(output_list)
1784 WHERE lev_distance IN (SELECT DISTINCT lev_distance FROM UNNEST(output_list) ORDER BY 1 LIMIT 2)
1785 ORDER BY lev_distance,
1786 (soundex_sim * c_soundex_weight)
1787 + (pg_trgm_sim * c_pg_trgm_weight)
1788 + (qwerty_kb_match * c_keyboard_distance_weight) DESC,
1789 suggestion_count DESC;
1790 ELSIF c_symspell_suggestion_verbosity = 4 THEN
1791 -- Return everything we find from the two smallest edit distance groups that are NOT 0 distance
1793 SELECT * FROM UNNEST(output_list)
1794 WHERE lev_distance IN (SELECT DISTINCT lev_distance FROM UNNEST(output_list) WHERE lev_distance > 0 ORDER BY 1 LIMIT 2)
1795 ORDER BY lev_distance,
1796 (soundex_sim * c_soundex_weight)
1797 + (pg_trgm_sim * c_pg_trgm_weight)
1798 + (qwerty_kb_match * c_keyboard_distance_weight) DESC,
1799 suggestion_count DESC;
1801 END LOOP; -- loop over words
1803 $F$ LANGUAGE PLPGSQL;
1805 CREATE OR REPLACE FUNCTION search.symspell_suggest (
1808 search_fields TEXT[] DEFAULT '{}',
1809 max_ed INT DEFAULT NULL, -- per word, on average, between norm input and suggestion
1810 verbosity INT DEFAULT NULL, -- 0=Best only; 1=
1811 skip_correct BOOL DEFAULT NULL, -- only suggest replacement words for misspellings?
1812 max_word_opts INT DEFAULT NULL, -- 0 means all combinations, probably want to restrict?
1813 count_threshold INT DEFAULT NULL -- min count of records using the terms
1814 ) RETURNS SETOF search.symspell_lookup_output AS $F$
1816 sugg_set search.symspell_lookup_output[];
1817 parsed_query_set search.query_parse_position[];
1821 current_sugg RECORD;
1826 query_part TEXT := '';
1827 output search.symspell_lookup_output;
1828 c_skip_correct BOOL;
1829 c_variant_authority_suggestion BOOL;
1830 c_symspell_transfer_case BOOL;
1831 c_authority_class_restrict BOOL;
1832 c_min_suggestion_use_threshold INT;
1833 c_soundex_weight INT;
1834 c_pg_trgm_weight INT;
1835 c_keyboard_distance_weight INT;
1836 c_suggestion_word_option_count INT;
1837 c_symspell_suggestion_verbosity INT;
1838 c_max_phrase_edit_distance INT;
1842 SELECT cmc.min_suggestion_use_threshold,
1845 cmc.keyboard_distance_weight,
1846 cmc.suggestion_word_option_count,
1847 cmc.symspell_suggestion_verbosity,
1848 cmc.symspell_skip_correct,
1849 cmc.symspell_transfer_case,
1850 cmc.max_phrase_edit_distance,
1851 cmc.variant_authority_suggestion,
1853 INTO c_min_suggestion_use_threshold,
1856 c_keyboard_distance_weight,
1857 c_suggestion_word_option_count,
1858 c_symspell_suggestion_verbosity,
1860 c_symspell_transfer_case,
1861 c_max_phrase_edit_distance,
1862 c_variant_authority_suggestion,
1863 c_authority_class_restrict
1864 FROM config.metabib_class cmc
1865 WHERE cmc.name = search_class;
1868 -- Set up variables to use at run time based on params and settings
1869 c_min_suggestion_use_threshold := COALESCE(count_threshold,c_min_suggestion_use_threshold);
1870 c_max_phrase_edit_distance := COALESCE(max_ed,c_max_phrase_edit_distance);
1871 c_symspell_suggestion_verbosity := COALESCE(verbosity,c_symspell_suggestion_verbosity);
1872 c_suggestion_word_option_count := COALESCE(max_word_opts,c_suggestion_word_option_count);
1873 c_skip_correct := COALESCE(skip_correct,c_skip_correct);
1876 x ORDER BY x.word_pos,
1878 (x.soundex_sim * c_soundex_weight)
1879 + (x.pg_trgm_sim * c_pg_trgm_weight)
1880 + (x.qwerty_kb_match * c_keyboard_distance_weight) DESC,
1881 x.suggestion_count DESC
1883 FROM search.symspell_lookup(
1886 c_symspell_suggestion_verbosity,
1887 c_symspell_transfer_case,
1888 c_min_suggestion_use_threshold,
1891 c_keyboard_distance_weight
1893 WHERE x.lev_distance <= c_max_phrase_edit_distance;
1895 SELECT ARRAY_AGG(x) INTO parsed_query_set FROM search.query_parse_positions(raw_input) x;
1897 IF search_fields IS NOT NULL AND CARDINALITY(search_fields) > 0 THEN
1898 SELECT STRING_AGG(id::TEXT,',') INTO query_part FROM config.metabib_field WHERE name = ANY (search_fields);
1899 IF CHARACTER_LENGTH(query_part) > 0 THEN query_part := 'AND field IN ('||query_part||')'; END IF;
1902 SELECT STRING_AGG(word,' ') INTO norm_input FROM search.query_parse_positions(evergreen.lowercase(raw_input)) WHERE NOT negated;
1903 EXECUTE 'SELECT COUNT(DISTINCT source) AS recs
1904 FROM metabib.' || search_class || '_field_entry
1905 WHERE index_vector @@ plainto_tsquery($$simple$$,$1)' || query_part
1906 INTO norm_count USING norm_input;
1908 SELECT STRING_AGG(word,' ') INTO norm_test FROM UNNEST(parsed_query_set);
1911 FROM search.symspell_generate_combined_suggestions(
1915 c_suggestion_word_option_count
1918 EXECUTE 'SELECT COUNT(DISTINCT source) AS recs
1919 FROM metabib.' || search_class || '_field_entry
1920 WHERE index_vector @@ to_tsquery($$simple$$,$1)' || query_part
1921 INTO entry USING current_sugg.test;
1922 SELECT STRING_AGG(word,' ') INTO norm_sugg FROM search.query_parse_positions(current_sugg.suggestion);
1923 IF entry.recs >= c_min_suggestion_use_threshold AND (norm_count.recs = 0 OR norm_sugg <> norm_input) THEN
1925 output.input := raw_input;
1926 output.norm_input := norm_input;
1927 output.suggestion := current_sugg.suggestion;
1928 output.suggestion_count := entry.recs;
1929 output.prefix_key := NULL;
1930 output.prefix_key_count := norm_count.recs;
1932 output.lev_distance := NULLIF(evergreen.levenshtein_damerau_edistance(norm_test, norm_sugg, c_max_phrase_edit_distance * CARDINALITY(parsed_query_set)), -1);
1933 output.qwerty_kb_match := evergreen.qwerty_keyboard_distance_match(norm_test, norm_sugg);
1934 output.pg_trgm_sim := similarity(norm_input, norm_sugg);
1935 output.soundex_sim := difference(norm_input, norm_sugg) / 4.0;
1940 IF c_variant_authority_suggestion THEN
1942 SELECT DISTINCT m.value AS prefix_key,
1943 m.sort_value AS suggestion,
1944 v.value as raw_input,
1945 v.sort_value as norm_input
1946 FROM authority.simple_heading v
1947 JOIN authority.control_set_authority_field csaf ON (csaf.id = v.atag)
1948 JOIN authority.heading_field f ON (f.id = csaf.heading_field)
1949 JOIN authority.simple_heading m ON (m.record = v.record AND csaf.main_entry = m.atag)
1950 JOIN authority.control_set_bib_field csbf ON (csbf.authority_field = csaf.main_entry)
1951 JOIN authority.control_set_bib_field_metabib_field_map csbfmfm ON (csbf.id = csbfmfm.bib_field)
1952 JOIN config.metabib_field cmf ON (
1953 csbfmfm.metabib_field = cmf.id
1954 AND (c_authority_class_restrict IS FALSE OR cmf.field_class = search_class)
1955 AND (search_fields = '{}'::TEXT[] OR cmf.name = ANY (search_fields))
1957 WHERE v.sort_value = norm_sugg
1959 EXECUTE 'SELECT COUNT(DISTINCT source) AS recs
1960 FROM metabib.' || search_class || '_field_entry
1961 WHERE index_vector @@ plainto_tsquery($$simple$$,$1)' || query_part
1962 INTO auth_entry USING auth_sugg.suggestion;
1963 IF auth_entry.recs >= c_min_suggestion_use_threshold AND (norm_count.recs = 0 OR auth_sugg.suggestion <> norm_input) THEN
1964 output.input := auth_sugg.raw_input;
1965 output.norm_input := auth_sugg.norm_input;
1966 output.suggestion := auth_sugg.suggestion;
1967 output.prefix_key := auth_sugg.prefix_key;
1968 output.suggestion_count := auth_entry.recs * -1; -- negative value here
1970 output.lev_distance := 0;
1971 output.qwerty_kb_match := 0;
1972 output.pg_trgm_sim := 0;
1973 output.soundex_sim := 0;
1983 $F$ LANGUAGE PLPGSQL;
1985 CREATE OR REPLACE FUNCTION search.symspell_build_raw_entry (
1988 no_limit BOOL DEFAULT FALSE,
1989 prefix_length INT DEFAULT 6,
1991 ) RETURNS SETOF search.symspell_dictionary AS $F$
1996 entry search.symspell_dictionary%ROWTYPE;
2000 IF NOT no_limit AND CHARACTER_LENGTH(raw_input) > prefix_length THEN
2001 key := SUBSTRING(key FROM 1 FOR prefix_length);
2002 key_list := ARRAY[raw_input, key];
2004 key_list := ARRAY[key];
2007 FOREACH del_key IN ARRAY key_list LOOP
2009 CONTINUE WHEN del_key IS NULL OR CHARACTER_LENGTH(del_key) = 0;
2011 entry.prefix_key := del_key;
2013 entry.keyword_count := 0;
2014 entry.title_count := 0;
2015 entry.author_count := 0;
2016 entry.subject_count := 0;
2017 entry.series_count := 0;
2018 entry.identifier_count := 0;
2020 entry.keyword_suggestions := '{}';
2021 entry.title_suggestions := '{}';
2022 entry.author_suggestions := '{}';
2023 entry.subject_suggestions := '{}';
2024 entry.series_suggestions := '{}';
2025 entry.identifier_suggestions := '{}';
2027 IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
2028 IF source_class = 'title' THEN entry.title_suggestions := ARRAY[raw_input]; END IF;
2029 IF source_class = 'author' THEN entry.author_suggestions := ARRAY[raw_input]; END IF;
2030 IF source_class = 'subject' THEN entry.subject_suggestions := ARRAY[raw_input]; END IF;
2031 IF source_class = 'series' THEN entry.series_suggestions := ARRAY[raw_input]; END IF;
2032 IF source_class = 'identifier' THEN entry.identifier_suggestions := ARRAY[raw_input]; END IF;
2033 IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
2035 IF del_key = raw_input THEN
2036 IF source_class = 'keyword' THEN entry.keyword_count := 1; END IF;
2037 IF source_class = 'title' THEN entry.title_count := 1; END IF;
2038 IF source_class = 'author' THEN entry.author_count := 1; END IF;
2039 IF source_class = 'subject' THEN entry.subject_count := 1; END IF;
2040 IF source_class = 'series' THEN entry.series_count := 1; END IF;
2041 IF source_class = 'identifier' THEN entry.identifier_count := 1; END IF;
2047 FOR del_key IN SELECT x FROM UNNEST(search.symspell_generate_edits(key, 1, maxED)) x LOOP
2050 CONTINUE WHEN del_key IS NULL OR CHARACTER_LENGTH(del_key) = 0;
2051 -- skip suggestions that are already too long for the prefix key
2052 CONTINUE WHEN CHARACTER_LENGTH(del_key) <= (prefix_length - maxED) AND CHARACTER_LENGTH(raw_input) > prefix_length;
2054 entry.keyword_suggestions := '{}';
2055 entry.title_suggestions := '{}';
2056 entry.author_suggestions := '{}';
2057 entry.subject_suggestions := '{}';
2058 entry.series_suggestions := '{}';
2059 entry.identifier_suggestions := '{}';
2061 IF source_class = 'keyword' THEN entry.keyword_count := 0; END IF;
2062 IF source_class = 'title' THEN entry.title_count := 0; END IF;
2063 IF source_class = 'author' THEN entry.author_count := 0; END IF;
2064 IF source_class = 'subject' THEN entry.subject_count := 0; END IF;
2065 IF source_class = 'series' THEN entry.series_count := 0; END IF;
2066 IF source_class = 'identifier' THEN entry.identifier_count := 0; END IF;
2068 entry.prefix_key := del_key;
2070 IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
2071 IF source_class = 'title' THEN entry.title_suggestions := ARRAY[raw_input]; END IF;
2072 IF source_class = 'author' THEN entry.author_suggestions := ARRAY[raw_input]; END IF;
2073 IF source_class = 'subject' THEN entry.subject_suggestions := ARRAY[raw_input]; END IF;
2074 IF source_class = 'series' THEN entry.series_suggestions := ARRAY[raw_input]; END IF;
2075 IF source_class = 'identifier' THEN entry.identifier_suggestions := ARRAY[raw_input]; END IF;
2076 IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
2082 $F$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
2084 CREATE OR REPLACE FUNCTION search.symspell_build_entries (
2087 old_input TEXT DEFAULT NULL,
2088 include_phrases BOOL DEFAULT FALSE
2089 ) RETURNS SETOF search.symspell_dictionary AS $F$
2096 entry search.symspell_dictionary;
2098 IF full_input IS NOT NULL THEN
2099 SELECT value::INT INTO prefix_length FROM config.internal_flag WHERE name = 'symspell.prefix_length' AND enabled;
2100 prefix_length := COALESCE(prefix_length, 6);
2102 SELECT value::INT INTO maxED FROM config.internal_flag WHERE name = 'symspell.max_edit_distance' AND enabled;
2103 maxED := COALESCE(maxED, 3);
2105 input := evergreen.lowercase(full_input);
2106 word_list := ARRAY_AGG(x) FROM search.symspell_parse_words_distinct(input) x;
2107 IF word_list IS NULL THEN
2111 IF CARDINALITY(word_list) > 1 AND include_phrases THEN
2112 RETURN QUERY SELECT * FROM search.symspell_build_raw_entry(input, source_class, TRUE, prefix_length, maxED);
2115 FOREACH word IN ARRAY word_list LOOP
2116 -- Skip words that have runs of 5 or more digits (I'm looking at you, ISxNs)
2117 CONTINUE WHEN CHARACTER_LENGTH(word) > 4 AND word ~ '\d{5,}';
2118 RETURN QUERY SELECT * FROM search.symspell_build_raw_entry(word, source_class, FALSE, prefix_length, maxED);
2122 IF old_input IS NOT NULL THEN
2123 input := evergreen.lowercase(old_input);
2125 FOR word IN SELECT x FROM search.symspell_parse_words_distinct(input) x LOOP
2126 -- similarly skip words that have 5 or more digits here to
2127 -- avoid adding erroneous prefix deletion entries to the dictionary
2128 CONTINUE WHEN CHARACTER_LENGTH(word) > 4 AND word ~ '\d{5,}';
2129 entry.prefix_key := word;
2131 entry.keyword_count := 0;
2132 entry.title_count := 0;
2133 entry.author_count := 0;
2134 entry.subject_count := 0;
2135 entry.series_count := 0;
2136 entry.identifier_count := 0;
2138 entry.keyword_suggestions := '{}';
2139 entry.title_suggestions := '{}';
2140 entry.author_suggestions := '{}';
2141 entry.subject_suggestions := '{}';
2142 entry.series_suggestions := '{}';
2143 entry.identifier_suggestions := '{}';
2145 IF source_class = 'keyword' THEN entry.keyword_count := -1; END IF;
2146 IF source_class = 'title' THEN entry.title_count := -1; END IF;
2147 IF source_class = 'author' THEN entry.author_count := -1; END IF;
2148 IF source_class = 'subject' THEN entry.subject_count := -1; END IF;
2149 IF source_class = 'series' THEN entry.series_count := -1; END IF;
2150 IF source_class = 'identifier' THEN entry.identifier_count := -1; END IF;
2156 $F$ LANGUAGE PLPGSQL;
2158 CREATE OR REPLACE FUNCTION search.symspell_build_and_merge_entries (
2161 old_input TEXT DEFAULT NULL,
2162 include_phrases BOOL DEFAULT FALSE
2163 ) RETURNS SETOF search.symspell_dictionary AS $F$
2166 conflict_entry RECORD;
2169 IF full_input = old_input THEN -- neither NULL, and are the same
2173 FOR new_entry IN EXECUTE $q$
2177 FROM (SELECT prefix_key,
2178 ARRAY_AGG(DISTINCT $q$ || source_class || $q$_suggestions[1]) s,
2179 SUM($q$ || source_class || $q$_count) count
2180 FROM search.symspell_build_entries($1, $2, $3, $4)
2182 $q$ USING full_input, source_class, old_input, include_phrases
2186 $q$ || source_class || $q$_suggestions suggestions,
2187 $q$ || source_class || $q$_count count
2188 FROM search.symspell_dictionary
2189 WHERE prefix_key = $1 $q$
2191 USING new_entry.prefix_key;
2193 IF new_entry.count <> 0 THEN -- Real word, and count changed
2194 IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating
2195 IF conflict_entry.count > 0 THEN -- it's a real word
2196 RETURN QUERY EXECUTE $q$
2197 UPDATE search.symspell_dictionary
2198 SET $q$ || source_class || $q$_count = $2
2199 WHERE prefix_key = $1
2201 USING new_entry.prefix_key, GREATEST(0, new_entry.count + conflict_entry.count);
2202 ELSE -- it was a prefix key or delete-emptied word before
2203 IF conflict_entry.suggestions @> new_entry.suggestions THEN -- already have all suggestions here...
2204 RETURN QUERY EXECUTE $q$
2205 UPDATE search.symspell_dictionary
2206 SET $q$ || source_class || $q$_count = $2
2207 WHERE prefix_key = $1
2209 USING new_entry.prefix_key, GREATEST(0, new_entry.count);
2210 ELSE -- new suggestion!
2211 RETURN QUERY EXECUTE $q$
2212 UPDATE search.symspell_dictionary
2213 SET $q$ || source_class || $q$_count = $2,
2214 $q$ || source_class || $q$_suggestions = $3
2215 WHERE prefix_key = $1
2217 USING new_entry.prefix_key, GREATEST(0, new_entry.count), evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions);
2221 -- We keep the on-conflict clause just in case...
2222 RETURN QUERY EXECUTE $q$
2223 INSERT INTO search.symspell_dictionary AS d (
2224 $q$ || source_class || $q$_count,
2226 $q$ || source_class || $q$_suggestions
2227 ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO
2228 UPDATE SET $q$ || source_class || $q$_count = d.$q$ || source_class || $q$_count + EXCLUDED.$q$ || source_class || $q$_count,
2229 $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions)
2231 USING new_entry.count, new_entry.prefix_key, new_entry.suggestions;
2233 ELSE -- key only, or no change
2234 IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating
2235 IF NOT conflict_entry.suggestions @> new_entry.suggestions THEN -- There are new suggestions
2236 RETURN QUERY EXECUTE $q$
2237 UPDATE search.symspell_dictionary
2238 SET $q$ || source_class || $q$_suggestions = $2
2239 WHERE prefix_key = $1
2241 USING new_entry.prefix_key, evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions);
2244 RETURN QUERY EXECUTE $q$
2245 INSERT INTO search.symspell_dictionary AS d (
2246 $q$ || source_class || $q$_count,
2248 $q$ || source_class || $q$_suggestions
2249 ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO -- key exists, suggestions may be added due to this entry
2250 UPDATE SET $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions)
2252 USING new_entry.count, new_entry.prefix_key, new_entry.suggestions;
2257 $F$ LANGUAGE PLPGSQL;
2259 CREATE OR REPLACE FUNCTION search.symspell_maintain_entries () RETURNS TRIGGER AS $f$
2262 new_value TEXT := NULL;
2263 old_value TEXT := NULL;
2266 IF TG_TABLE_SCHEMA = 'authority' THEN
2267 SELECT m.field_class INTO search_class
2268 FROM authority.control_set_auth_field_metabib_field_map_refs a
2269 JOIN config.metabib_field m ON (a.metabib_field=m.id)
2270 WHERE a.authority_field = NEW.atag;
2276 search_class := COALESCE(TG_ARGV[0], SPLIT_PART(TG_TABLE_NAME,'_',1));
2279 IF TG_OP IN ('INSERT', 'UPDATE') THEN
2280 new_value := NEW.value;
2283 IF TG_OP IN ('DELETE', 'UPDATE') THEN
2284 old_value := OLD.value;
2287 IF new_value = old_value THEN
2290 INSERT INTO search.symspell_dictionary_updates
2291 SELECT txid_current(), *
2292 FROM search.symspell_build_entries(
2299 -- PERFORM * FROM search.symspell_build_and_merge_entries(new_value, search_class, old_value);
2301 RETURN NULL; -- always fired AFTER
2303 $f$ LANGUAGE PLPGSQL;
2305 CREATE TRIGGER maintain_symspell_entries_tgr
2306 AFTER INSERT OR UPDATE OR DELETE ON metabib.title_field_entry
2307 FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
2309 CREATE TRIGGER maintain_symspell_entries_tgr
2310 AFTER INSERT OR UPDATE OR DELETE ON metabib.author_field_entry
2311 FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
2313 CREATE TRIGGER maintain_symspell_entries_tgr
2314 AFTER INSERT OR UPDATE OR DELETE ON metabib.subject_field_entry
2315 FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
2317 CREATE TRIGGER maintain_symspell_entries_tgr
2318 AFTER INSERT OR UPDATE OR DELETE ON metabib.series_field_entry
2319 FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
2321 CREATE TRIGGER maintain_symspell_entries_tgr
2322 AFTER INSERT OR UPDATE OR DELETE ON metabib.keyword_field_entry
2323 FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
2325 CREATE TRIGGER maintain_symspell_entries_tgr
2326 AFTER INSERT OR UPDATE OR DELETE ON metabib.identifier_field_entry
2327 FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();
2329 CREATE TRIGGER maintain_symspell_entries_tgr
2330 AFTER INSERT OR UPDATE OR DELETE ON authority.simple_heading
2331 FOR EACH ROW EXECUTE PROCEDURE search.symspell_maintain_entries();