3 SELECT evergreen.upgrade_deps_block_check('1366', :eg_version);
5 ALTER TABLE config.metabib_class
6 ADD COLUMN IF NOT EXISTS variant_authority_suggestion BOOL NOT NULL DEFAULT TRUE,
7 ADD COLUMN IF NOT EXISTS symspell_transfer_case BOOL NOT NULL DEFAULT TRUE,
8 ADD COLUMN IF NOT EXISTS symspell_skip_correct BOOL NOT NULL DEFAULT FALSE,
9 ADD COLUMN IF NOT EXISTS symspell_suggestion_verbosity INT NOT NULL DEFAULT 2,
10 ADD COLUMN IF NOT EXISTS max_phrase_edit_distance INT NOT NULL DEFAULT 2,
11 ADD COLUMN IF NOT EXISTS suggestion_word_option_count INT NOT NULL DEFAULT 5,
12 ADD COLUMN IF NOT EXISTS max_suggestions INT NOT NULL DEFAULT -1,
13 ADD COLUMN IF NOT EXISTS low_result_threshold INT NOT NULL DEFAULT 0,
14 ADD COLUMN IF NOT EXISTS min_suggestion_use_threshold INT NOT NULL DEFAULT 1,
15 ADD COLUMN IF NOT EXISTS soundex_weight INT NOT NULL DEFAULT 0,
16 ADD COLUMN IF NOT EXISTS pg_trgm_weight INT NOT NULL DEFAULT 0,
17 ADD COLUMN IF NOT EXISTS keyboard_distance_weight INT NOT NULL DEFAULT 0;
20 /* -- may not need these 2 functions
21 CREATE OR REPLACE FUNCTION search.symspell_parse_positive_words ( phrase TEXT )
22 RETURNS SETOF TEXT AS $F$
24 FROM (SELECT UNNEST(x), ROW_NUMBER() OVER ()
25 FROM regexp_matches($1, '(?<!-)\+?([[:alnum:]]+''*[[:alnum:]]*)', 'g') x
27 WHERE UNNEST IS NOT NULL
29 $F$ LANGUAGE SQL STRICT IMMUTABLE;
31 CREATE OR REPLACE FUNCTION search.symspell_parse_positive_phrases ( phrase TEXT )
32 RETURNS SETOF TEXT AS $F$
33 SELECT BTRIM(BTRIM(UNNEST),'"')
34 FROM (SELECT UNNEST(x), ROW_NUMBER() OVER ()
35 FROM regexp_matches($1, '(?:^|\s+)(?:(-?"[^"]+")|(-?\+?[[:alnum:]]+''*?[[:alnum:]]*?))', 'g') x
37 WHERE UNNEST IS NOT NULL AND UNNEST NOT LIKE '-%'
39 $F$ LANGUAGE SQL STRICT IMMUTABLE;
42 CREATE OR REPLACE FUNCTION search.symspell_parse_words ( phrase TEXT )
43 RETURNS SETOF TEXT AS $F$
45 FROM (SELECT UNNEST(x), ROW_NUMBER() OVER ()
46 FROM regexp_matches($1, '(?:^|\s+)((?:-|\+)?[[:alnum:]]+''*[[:alnum:]]*)', 'g') x
48 WHERE UNNEST IS NOT NULL
50 $F$ LANGUAGE SQL STRICT IMMUTABLE;
52 CREATE OR REPLACE FUNCTION search.distribute_phrase_sign (input TEXT) RETURNS TEXT AS $f$
59 IF output ~ '^(?:-|\+)' THEN
60 phrase_sign := SUBSTRING(input FROM 1 FOR 1);
61 output := SUBSTRING(output FROM 2);
64 IF output LIKE '"%"' THEN
65 IF phrase_sign IS NULL THEN
68 output := BTRIM(output,'"');
71 IF phrase_sign IS NOT NULL THEN
72 RETURN REGEXP_REPLACE(output,'(^|\s+)(?=[[:alnum:]])','\1'||phrase_sign,'g');
77 $f$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
79 CREATE OR REPLACE FUNCTION search.query_parse_phrases ( phrase TEXT )
80 RETURNS SETOF TEXT AS $F$
81 SELECT search.distribute_phrase_sign(UNNEST)
82 FROM (SELECT UNNEST(x), ROW_NUMBER() OVER ()
83 FROM regexp_matches($1, '(?:^|\s+)(?:((?:-|\+)?"[^"]+")|((?:-|\+)?[[:alnum:]]+''*[[:alnum:]]*))', 'g') x
85 WHERE UNNEST IS NOT NULL
87 $F$ LANGUAGE SQL STRICT IMMUTABLE;
89 CREATE TYPE search.query_parse_position AS (
92 phrase_in_input_pos INT,
93 word_in_phrase_pos INT,
98 CREATE OR REPLACE FUNCTION search.query_parse_positions ( raw_input TEXT )
99 RETURNS SETOF search.query_parse_position AS $F$
109 FOR curr_phrase IN SELECT x FROM search.query_parse_phrases(raw_input) x LOOP
111 FOR curr_word IN SELECT x FROM search.symspell_parse_words(curr_phrase) x LOOP
114 IF curr_word ~ '^(?:-|\+)' THEN
116 IF curr_word LIKE '-%' THEN
119 curr_word := SUBSTRING(curr_word FROM 2);
121 RETURN QUERY SELECT curr_word, pos, phrase_pos, word_pos, neg, ex;
122 word_pos := word_pos + 1;
125 phrase_pos := phrase_pos + 1;
129 $F$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
132 select suggestion as sugg,
133 suggestion_count as scount,
136 prefix_key_count as ncount,
141 from search.symspell_suggest(
142 'Cedenzas (2) for Mosart''s Piano concerto',
147 where lev_distance is not null
148 order by lev_distance,
149 suggestion_count desc,
155 select * from search.symspell_suggest('piano concerto -jaz','subject','{}',2,2,false,4) order by lev_distance, soundex_sim desc, pg_trgm_sim desc, qwerty_kb_match desc;
158 CREATE OR REPLACE FUNCTION search.symspell_suggest (
161 search_fields TEXT[] DEFAULT '{}',
162 max_ed INT DEFAULT NULL, -- per word, on average, between norm input and suggestion
163 verbosity INT DEFAULT NULL, -- 0=Best only; 1=
164 skip_correct BOOL DEFAULT NULL, -- only suggest replacement words for misspellings?
165 max_word_opts INT DEFAULT NULL, -- 0 means all combinations, probably want to restrict?
166 count_threshold INT DEFAULT NULL -- min count of records using the terms
167 ) RETURNS SETOF search.symspell_lookup_output AS $F$
169 sugg_set search.symspell_lookup_output[];
170 parsed_query_set search.query_parse_position[];
179 query_part TEXT := '';
180 output search.symspell_lookup_output;
182 c_variant_authority_suggestion BOOL;
183 c_symspell_transfer_case BOOL;
184 c_authority_class_restrict BOOL;
185 c_min_suggestion_use_threshold INT;
186 c_soundex_weight INT;
187 c_pg_trgm_weight INT;
188 c_keyboard_distance_weight INT;
189 c_suggestion_word_option_count INT;
190 c_symspell_suggestion_verbosity INT;
191 c_max_phrase_edit_distance INT;
195 SELECT cmc.min_suggestion_use_threshold,
198 cmc.keyboard_distance_weight,
199 cmc.suggestion_word_option_count,
200 cmc.symspell_suggestion_verbosity,
201 cmc.symspell_skip_correct,
202 cmc.symspell_transfer_case,
203 cmc.max_phrase_edit_distance,
204 cmc.variant_authority_suggestion,
206 INTO c_min_suggestion_use_threshold,
209 c_keyboard_distance_weight,
210 c_suggestion_word_option_count,
211 c_symspell_suggestion_verbosity,
213 c_symspell_transfer_case,
214 c_max_phrase_edit_distance,
215 c_variant_authority_suggestion,
216 c_authority_class_restrict
217 FROM config.metabib_class cmc
218 WHERE cmc.name = search_class;
221 -- Set up variables to use at run time based on params and settings
222 c_min_suggestion_use_threshold := COALESCE(count_threshold,c_min_suggestion_use_threshold);
223 c_max_phrase_edit_distance := COALESCE(max_ed,c_max_phrase_edit_distance);
224 c_symspell_suggestion_verbosity := COALESCE(verbosity,c_symspell_suggestion_verbosity);
225 c_suggestion_word_option_count := COALESCE(max_word_opts,c_suggestion_word_option_count);
226 c_skip_correct := COALESCE(skip_correct,c_skip_correct);
229 x ORDER BY x.word_pos,
231 (x.soundex_sim * c_soundex_weight)
232 + (x.pg_trgm_sim * c_pg_trgm_weight)
233 + (x.qwerty_kb_match * c_keyboard_distance_weight) DESC,
234 x.suggestion_count DESC
236 FROM search.symspell_lookup(
239 c_symspell_suggestion_verbosity,
240 c_symspell_transfer_case,
241 c_min_suggestion_use_threshold,
244 c_keyboard_distance_weight
246 WHERE x.lev_distance <= c_max_phrase_edit_distance;
248 SELECT ARRAY_AGG(x) INTO parsed_query_set FROM search.query_parse_positions(raw_input) x;
250 IF search_fields IS NOT NULL AND CARDINALITY(search_fields) > 0 THEN
251 SELECT STRING_AGG(id::TEXT,',') INTO query_part FROM config.metabib_field WHERE name = ANY (search_fields);
252 IF CHARACTER_LENGTH(query_part) > 0 THEN query_part := 'AND field IN ('||query_part||')'; END IF;
255 SELECT STRING_AGG(word,' ') INTO norm_input FROM search.query_parse_positions(evergreen.lowercase(raw_input)) WHERE NOT negated;
256 EXECUTE 'SELECT COUNT(DISTINCT source) AS recs
257 FROM metabib.' || search_class || '_field_entry
258 WHERE index_vector @@ plainto_tsquery($$simple$$,$1)' || query_part
259 INTO norm_count USING norm_input;
261 SELECT STRING_AGG(word,' ') INTO norm_test FROM UNNEST(parsed_query_set);
264 FROM search.symspell_generate_combined_suggestions(
268 c_suggestion_word_option_count
271 EXECUTE 'SELECT COUNT(DISTINCT source) AS recs
272 FROM metabib.' || search_class || '_field_entry
273 WHERE index_vector @@ to_tsquery($$simple$$,$1)' || query_part
274 INTO entry USING current_sugg.test;
275 SELECT STRING_AGG(word,' ') INTO norm_sugg FROM search.query_parse_positions(current_sugg.suggestion);
276 IF entry.recs >= c_min_suggestion_use_threshold AND (norm_count.recs = 0 OR norm_sugg <> norm_input) THEN
278 output.input := raw_input;
279 output.norm_input := norm_input;
280 output.suggestion := current_sugg.suggestion;
281 output.suggestion_count := entry.recs;
282 output.prefix_key := NULL;
283 output.prefix_key_count := norm_count.recs;
285 output.lev_distance := NULLIF(evergreen.levenshtein_damerau_edistance(norm_test, norm_sugg, c_max_phrase_edit_distance * CARDINALITY(parsed_query_set)), -1);
286 output.qwerty_kb_match := evergreen.qwerty_keyboard_distance_match(norm_test, norm_sugg);
287 output.pg_trgm_sim := similarity(norm_input, norm_sugg);
288 output.soundex_sim := difference(norm_input, norm_sugg) / 4.0;
293 IF c_variant_authority_suggestion THEN
295 SELECT DISTINCT m.value AS prefix_key,
296 m.sort_value AS suggestion,
297 v.value as raw_input,
298 v.sort_value as norm_input
299 FROM authority.simple_heading v
300 JOIN authority.control_set_authority_field csaf ON (csaf.id = v.atag)
301 JOIN authority.heading_field f ON (f.id = csaf.heading_field)
302 JOIN authority.simple_heading m ON (m.record = v.record AND csaf.main_entry = m.atag)
303 JOIN authority.control_set_bib_field csbf ON (csbf.authority_field = csaf.main_entry)
304 JOIN authority.control_set_bib_field_metabib_field_map csbfmfm ON (csbf.id = csbfmfm.bib_field)
305 JOIN config.metabib_field cmf ON (
306 csbfmfm.metabib_field = cmf.id
307 AND (c_authority_class_restrict IS FALSE OR cmf.field_class = search_class)
308 AND (search_fields = '{}'::TEXT[] OR cmf.name = ANY (search_fields))
310 WHERE v.sort_value = norm_sugg
312 EXECUTE 'SELECT COUNT(DISTINCT source) AS recs
313 FROM metabib.' || search_class || '_field_entry
314 WHERE index_vector @@ plainto_tsquery($$simple$$,$1)' || query_part
315 INTO auth_entry USING auth_sugg.suggestion;
316 IF auth_entry.recs >= c_min_suggestion_use_threshold AND (norm_count.recs = 0 OR auth_sugg.suggestion <> norm_input) THEN
317 output.input := auth_sugg.raw_input;
318 output.norm_input := auth_sugg.norm_input;
319 output.suggestion := auth_sugg.suggestion;
320 output.prefix_key := auth_sugg.prefix_key;
321 output.suggestion_count := auth_entry.recs * -1; -- negative value here
323 output.lev_distance := 0;
324 output.qwerty_kb_match := 0;
325 output.pg_trgm_sim := 0;
326 output.soundex_sim := 0;
336 $F$ LANGUAGE PLPGSQL;
338 CREATE OR REPLACE FUNCTION search.symspell_generate_combined_suggestions(
339 word_data search.symspell_lookup_output[],
340 pos_data search.query_parse_position[],
341 skip_correct BOOL DEFAULT TRUE,
342 max_words INT DEFAULT 0
343 ) RETURNS TABLE (suggestion TEXT, test TEXT) AS $f$
344 my $word_data = shift;
345 my $pos_data = shift;
346 my $skip_correct = shift;
347 my $max_per_word = shift;
348 return undef unless (@$word_data and @$pos_data);
350 my $last_word_pos = $$word_data[-1]{word_pos};
351 my $pos_to_word_map = [ map { [] } 0 .. $last_word_pos ];
352 my $parsed_query_data = { map { ($$_{word_pos} => $_) } @$pos_data };
354 for my $row (@$word_data) {
355 my $wp = +$$row{word_pos};
357 $skip_correct eq 't' and $$row{lev_distance} > 0
358 and @{$$pos_to_word_map[$wp]}
359 and $$pos_to_word_map[$wp][0]{lev_distance} == 0
361 push @{$$pos_to_word_map[$$row{word_pos}]}, $row;
364 gen_step($max_per_word, $pos_to_word_map, $parsed_query_data, $last_word_pos);
367 # -----------------------------
369 my $max_words = shift;
371 my $pos_data = shift;
372 my $last_pos = shift;
373 my $prefix = shift || '';
374 my $test_prefix = shift || '';
375 my $current_pos = shift || 0;
378 for my $sugg ( @{$$data[$current_pos]} ) {
379 my $was_inside_phrase = 0;
380 my $now_inside_phrase = 0;
382 my $word = $$sugg{suggestion};
385 my $prev_phrase = $$pos_data{$current_pos - 1}{phrase_in_input_pos};
386 my $curr_phrase = $$pos_data{$current_pos}{phrase_in_input_pos};
387 my $next_phrase = $$pos_data{$current_pos + 1}{phrase_in_input_pos};
389 $now_inside_phrase++ if (defined($next_phrase) and $curr_phrase == $next_phrase);
390 $was_inside_phrase++ if (defined($prev_phrase) and $curr_phrase == $prev_phrase);
392 my $string = $prefix;
393 $string .= ' ' if $string;
395 if (!$was_inside_phrase) { # might be starting a phrase?
396 $string .= '-' if ($$pos_data{$current_pos}{negated} eq 't');
397 if ($now_inside_phrase) { # we are! add the double-quote
401 } else { # definitely were in a phrase
403 if (!$now_inside_phrase) { # we are not any longer, add the double-quote
408 my $test_string = $test_prefix;
409 if ($current_pos > 0) { # have something already, need joiner
410 $test_string .= $curr_phrase == $prev_phrase ? ' <-> ' : ' & ';
412 $test_string .= '!' if ($$pos_data{$current_pos}{negated} eq 't');
413 $test_string .= $word;
415 if ($current_pos == $last_pos) {
416 return_next {suggestion => $string, test => $test_string};
418 gen_step($max_words, $data, $pos_data, $last_pos, $string, $test_string, $current_pos + 1);
421 last if ($max_words and $word_count >= $max_words);
424 $f$ LANGUAGE PLPERLU IMMUTABLE;
426 -- Changing parameters, so we have to drop the old one first
427 DROP FUNCTION search.symspell_lookup;
428 CREATE FUNCTION search.symspell_lookup (
431 verbosity INT DEFAULT NULL,
432 xfer_case BOOL DEFAULT NULL,
433 count_threshold INT DEFAULT NULL,
434 soundex_weight INT DEFAULT NULL,
435 pg_trgm_weight INT DEFAULT NULL,
436 kbdist_weight INT DEFAULT NULL
437 ) RETURNS SETOF search.symspell_lookup_output AS $F$
442 edit_list TEXT[] := '{}';
443 seen_list TEXT[] := '{}';
444 output search.symspell_lookup_output;
445 output_list search.symspell_lookup_output[];
453 smallest_ed INT := -1;
455 c_symspell_suggestion_verbosity INT;
456 c_min_suggestion_use_threshold INT;
457 c_soundex_weight INT;
458 c_pg_trgm_weight INT;
459 c_keyboard_distance_weight INT;
460 c_symspell_transfer_case BOOL;
463 SELECT cmc.min_suggestion_use_threshold,
466 cmc.keyboard_distance_weight,
467 cmc.symspell_transfer_case,
468 cmc.symspell_suggestion_verbosity
469 INTO c_min_suggestion_use_threshold,
472 c_keyboard_distance_weight,
473 c_symspell_transfer_case,
474 c_symspell_suggestion_verbosity
475 FROM config.metabib_class cmc
476 WHERE cmc.name = search_class;
478 c_min_suggestion_use_threshold := COALESCE(count_threshold,c_min_suggestion_use_threshold);
479 c_symspell_transfer_case := COALESCE(xfer_case,c_symspell_transfer_case);
480 c_symspell_suggestion_verbosity := COALESCE(verbosity,c_symspell_suggestion_verbosity);
481 c_soundex_weight := COALESCE(soundex_weight,c_soundex_weight);
482 c_pg_trgm_weight := COALESCE(pg_trgm_weight,c_pg_trgm_weight);
483 c_keyboard_distance_weight := COALESCE(kbdist_weight,c_keyboard_distance_weight);
485 SELECT value::INT INTO prefix_length FROM config.internal_flag WHERE name = 'symspell.prefix_length' AND enabled;
486 prefix_length := COALESCE(prefix_length, 6);
488 SELECT value::INT INTO maxED FROM config.internal_flag WHERE name = 'symspell.max_edit_distance' AND enabled;
489 maxED := COALESCE(maxED, 3);
491 -- XXX This should get some more thought ... maybe search_normalize?
492 word_list := ARRAY_AGG(x.word) FROM search.query_parse_positions(raw_input) x;
494 -- Common case exact match test for preformance
495 IF c_symspell_suggestion_verbosity = 0 AND CARDINALITY(word_list) = 1 AND CHARACTER_LENGTH(word_list[1]) <= prefix_length THEN
497 'SELECT '||search_class||'_suggestions AS suggestions,
498 '||search_class||'_count AS count,
500 FROM search.symspell_dictionary
501 WHERE prefix_key = $1
502 AND '||search_class||'_count >= $2
503 AND '||search_class||'_suggestions @> ARRAY[$1]'
504 INTO entry USING evergreen.lowercase(word_list[1]), c_min_suggestion_use_threshold;
505 IF entry.prefix_key IS NOT NULL THEN
506 output.lev_distance := 0; -- definitionally
507 output.prefix_key := entry.prefix_key;
508 output.prefix_key_count := entry.count;
509 output.suggestion_count := entry.count;
510 output.input := word_list[1];
511 IF c_symspell_transfer_case THEN
512 output.suggestion := search.symspell_transfer_casing(output.input, entry.prefix_key);
514 output.suggestion := entry.prefix_key;
516 output.norm_input := entry.prefix_key;
517 output.qwerty_kb_match := 1;
518 output.pg_trgm_sim := 1;
519 output.soundex_sim := 1;
526 FOREACH word IN ARRAY word_list LOOP
528 input := evergreen.lowercase(word);
530 IF CHARACTER_LENGTH(input) > prefix_length THEN
531 prefix_key := SUBSTRING(input FROM 1 FOR prefix_length);
532 edit_list := ARRAY[input,prefix_key] || search.symspell_generate_edits(prefix_key, 1, maxED);
534 edit_list := input || search.symspell_generate_edits(input, 1, maxED);
537 SELECT ARRAY_AGG(x ORDER BY CHARACTER_LENGTH(x) DESC) INTO edit_list FROM UNNEST(edit_list) x;
544 FOREACH entry_key IN ARRAY edit_list LOOP
546 IF global_ed IS NOT NULL THEN
547 smallest_ed := global_ed;
550 'SELECT '||search_class||'_suggestions AS suggestions,
551 '||search_class||'_count AS count,
553 FROM search.symspell_dictionary
554 WHERE prefix_key = $1
555 AND '||search_class||'_suggestions IS NOT NULL'
558 FOREACH sugg IN ARRAY entry.suggestions LOOP
559 IF NOT seen_list @> ARRAY[sugg] THEN
560 seen_list := seen_list || sugg;
561 IF input = sugg THEN -- exact match, no need to spend time on a call
562 output.lev_distance := 0;
563 output.suggestion_count = entry.count;
564 ELSIF ABS(CHARACTER_LENGTH(input) - CHARACTER_LENGTH(sugg)) > maxED THEN
565 -- They are definitionally too different to consider, just move on.
568 --output.lev_distance := levenshtein_less_equal(
569 output.lev_distance := evergreen.levenshtein_damerau_edistance(
574 IF output.lev_distance < 0 THEN
575 -- The Perl module returns -1 for "more distant than max".
576 output.lev_distance := maxED + 1;
577 -- This short-circuit's the count test below for speed, bypassing
578 -- a couple useless tests.
579 output.suggestion_count := -1;
581 EXECUTE 'SELECT '||search_class||'_count FROM search.symspell_dictionary WHERE prefix_key = $1'
582 INTO output.suggestion_count USING sugg;
586 -- The caller passes a minimum suggestion count threshold (or uses
587 -- the default of 0) and if the suggestion has that many or less uses
588 -- then we move on to the next suggestion, since this one is too rare.
589 CONTINUE WHEN output.suggestion_count < c_min_suggestion_use_threshold;
591 -- Track the smallest edit distance among suggestions from this prefix key.
592 IF smallest_ed = -1 OR output.lev_distance < smallest_ed THEN
593 smallest_ed := output.lev_distance;
596 -- Track the smallest edit distance for all prefix keys for this word.
597 IF global_ed IS NULL OR smallest_ed < global_ed THEN
598 global_ed = smallest_ed;
601 -- Only proceed if the edit distance is <= the max for the dictionary.
602 IF output.lev_distance <= maxED THEN
603 IF output.lev_distance > global_ed AND c_symspell_suggestion_verbosity <= 1 THEN
604 -- Lev distance is our main similarity measure. While
605 -- trgm or soundex similarity could be the main filter,
606 -- Lev is both language agnostic and faster.
608 -- Here we will skip suggestions that have a longer edit distance
609 -- than the shortest we've already found. This is simply an
610 -- optimization that allows us to avoid further processing
611 -- of this entry. It would be filtered out later.
616 -- If we have an exact match on the suggestion key we can also avoid
617 -- some function calls.
618 IF output.lev_distance = 0 THEN
619 output.qwerty_kb_match := 1;
620 output.pg_trgm_sim := 1;
621 output.soundex_sim := 1;
623 output.qwerty_kb_match := evergreen.qwerty_keyboard_distance_match(input, sugg);
624 output.pg_trgm_sim := similarity(input, sugg);
625 output.soundex_sim := difference(input, sugg) / 4.0;
628 -- Fill in some fields
629 IF c_symspell_transfer_case THEN
630 output.suggestion := search.symspell_transfer_casing(word, sugg);
632 output.suggestion := sugg;
634 output.prefix_key := entry.prefix_key;
635 output.prefix_key_count := entry.count;
636 output.input := word;
637 output.norm_input := input;
638 output.word_pos := w_pos;
640 -- We can't "cache" a set of generated records directly, so
641 -- here we build up an array of search.symspell_lookup_output
642 -- records that we can revivicate later as a table using UNNEST().
643 output_list := output_list || output;
645 EXIT entry_key_loop WHEN smallest_ed = 0 AND c_symspell_suggestion_verbosity = 0; -- exact match early exit
646 CONTINUE entry_key_loop WHEN smallest_ed = 0 AND c_symspell_suggestion_verbosity = 1; -- exact match early jump to the next key
647 END IF; -- maxED test
648 END IF; -- suggestion not seen test
649 END LOOP; -- loop over suggestions
650 END LOOP; -- loop over entries
651 END LOOP; -- loop over entry_keys
653 -- Now we're done examining this word
654 IF c_symspell_suggestion_verbosity = 0 THEN
655 -- Return the "best" suggestion from the smallest edit
656 -- distance group. We define best based on the weighting
657 -- of the non-lev similarity measures and use the suggestion
658 -- use count to break ties.
660 SELECT * FROM UNNEST(output_list)
661 ORDER BY lev_distance,
662 (soundex_sim * c_soundex_weight)
663 + (pg_trgm_sim * c_pg_trgm_weight)
664 + (qwerty_kb_match * c_keyboard_distance_weight) DESC,
665 suggestion_count DESC
667 ELSIF c_symspell_suggestion_verbosity = 1 THEN
668 -- Return all suggestions from the smallest
669 -- edit distance group.
671 SELECT * FROM UNNEST(output_list) WHERE lev_distance = smallest_ed
672 ORDER BY (soundex_sim * c_soundex_weight)
673 + (pg_trgm_sim * c_pg_trgm_weight)
674 + (qwerty_kb_match * c_keyboard_distance_weight) DESC,
675 suggestion_count DESC;
676 ELSIF c_symspell_suggestion_verbosity = 2 THEN
677 -- Return everything we find, along with relevant stats
679 SELECT * FROM UNNEST(output_list)
680 ORDER BY lev_distance,
681 (soundex_sim * c_soundex_weight)
682 + (pg_trgm_sim * c_pg_trgm_weight)
683 + (qwerty_kb_match * c_keyboard_distance_weight) DESC,
684 suggestion_count DESC;
685 ELSIF c_symspell_suggestion_verbosity = 3 THEN
686 -- Return everything we find from the two smallest edit distance groups
688 SELECT * FROM UNNEST(output_list)
689 WHERE lev_distance IN (SELECT DISTINCT lev_distance FROM UNNEST(output_list) ORDER BY 1 LIMIT 2)
690 ORDER BY lev_distance,
691 (soundex_sim * c_soundex_weight)
692 + (pg_trgm_sim * c_pg_trgm_weight)
693 + (qwerty_kb_match * c_keyboard_distance_weight) DESC,
694 suggestion_count DESC;
695 ELSIF c_symspell_suggestion_verbosity = 4 THEN
696 -- Return everything we find from the two smallest edit distance groups that are NOT 0 distance
698 SELECT * FROM UNNEST(output_list)
699 WHERE lev_distance IN (SELECT DISTINCT lev_distance FROM UNNEST(output_list) WHERE lev_distance > 0 ORDER BY 1 LIMIT 2)
700 ORDER BY lev_distance,
701 (soundex_sim * c_soundex_weight)
702 + (pg_trgm_sim * c_pg_trgm_weight)
703 + (qwerty_kb_match * c_keyboard_distance_weight) DESC,
704 suggestion_count DESC;
706 END LOOP; -- loop over words
708 $F$ LANGUAGE PLPGSQL;
712 -- Find the "broadest" value in use, and update the defaults for all classes
717 SELECT FIRST(s.value ORDER BY t.depth) INTO val
718 FROM actor.org_unit_setting s
719 JOIN actor.org_unit u ON (u.id = s.org_unit)
720 JOIN actor.org_unit_type t ON (u.ou_type = t.id)
721 WHERE s.name = 'opac.did_you_mean.low_result_threshold';
723 IF FOUND AND val IS NOT NULL THEN
724 UPDATE config.metabib_class SET low_result_threshold = val::INT;
727 SELECT FIRST(s.value ORDER BY t.depth) INTO val
728 FROM actor.org_unit_setting s
729 JOIN actor.org_unit u ON (u.id = s.org_unit)
730 JOIN actor.org_unit_type t ON (u.ou_type = t.id)
731 WHERE s.name = 'opac.did_you_mean.max_suggestions';
733 IF FOUND AND val IS NOT NULL THEN
734 UPDATE config.metabib_class SET max_suggestions = val::INT;
737 SELECT FIRST(s.value ORDER BY t.depth) INTO val
738 FROM actor.org_unit_setting s
739 JOIN actor.org_unit u ON (u.id = s.org_unit)
740 JOIN actor.org_unit_type t ON (u.ou_type = t.id)
741 WHERE s.name = 'search.symspell.min_suggestion_use_threshold';
743 IF FOUND AND val IS NOT NULL THEN
744 UPDATE config.metabib_class SET min_suggestion_use_threshold = val::INT;
747 SELECT FIRST(s.value ORDER BY t.depth) INTO val
748 FROM actor.org_unit_setting s
749 JOIN actor.org_unit u ON (u.id = s.org_unit)
750 JOIN actor.org_unit_type t ON (u.ou_type = t.id)
751 WHERE s.name = 'search.symspell.soundex.weight';
753 IF FOUND AND val IS NOT NULL THEN
754 UPDATE config.metabib_class SET soundex_weight = val::INT;
757 SELECT FIRST(s.value ORDER BY t.depth) INTO val
758 FROM actor.org_unit_setting s
759 JOIN actor.org_unit u ON (u.id = s.org_unit)
760 JOIN actor.org_unit_type t ON (u.ou_type = t.id)
761 WHERE s.name = 'search.symspell.pg_trgm.weight';
763 IF FOUND AND val IS NOT NULL THEN
764 UPDATE config.metabib_class SET pg_trgm_weight = val::INT;
767 SELECT FIRST(s.value ORDER BY t.depth) INTO val
768 FROM actor.org_unit_setting s
769 JOIN actor.org_unit u ON (u.id = s.org_unit)
770 JOIN actor.org_unit_type t ON (u.ou_type = t.id)
771 WHERE s.name = 'search.symspell.keyboard_distance.weight';
773 IF FOUND AND val IS NOT NULL THEN
774 UPDATE config.metabib_class SET keyboard_distance_weight = val::INT;