value TEXT unique,
index_vector tsvector
);
-CREATE INDEX metabib_browse_entry_index_vector_idx ON metabib.browse_entry USING GIST (index_vector);
+CREATE INDEX metabib_browse_entry_index_vector_idx ON metabib.browse_entry USING GIN (index_vector);
CREATE TRIGGER metabib_browse_entry_fti_trigger
BEFORE INSERT OR UPDATE ON metabib.browse_entry
FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
def INT REFERENCES config.metabib_field (id),
source BIGINT REFERENCES biblio.record_entry (id)
);
+CREATE INDEX browse_entry_def_map_def_idx ON metabib.browse_entry_def_map (def);
+CREATE INDEX browse_entry_def_map_entry_idx ON metabib.browse_entry_def_map (entry);
+CREATE INDEX browse_entry_def_map_source_idx ON metabib.browse_entry_def_map (source);
+
CREATE OR REPLACE FUNCTION metabib.facet_normalize_trigger () RETURNS TRIGGER AS $$
END IF;
END IF;
END;
-$func$ LANGUAGE PLPGSQL;
+$func$ LANGUAGE PLPGSQL ROWS 1;
-- Given a string such as a user might type into a search box, prepare
IF visibility_org IS NOT NULL THEN
opac_visibility_join := '
JOIN asset.opac_visible_copies aovc ON (
- aovc.record = mbedm.source AND
+ aovc.record = x.source AND
aovc.circ_lib IN (SELECT id FROM actor.org_unit_descendants($4))
)';
ELSE
';
END IF;
- RETURN QUERY EXECUTE 'SELECT *, TS_HEADLINE(value, $7, $3) FROM (SELECT DISTINCT
- mbe.value,
- cmf.id,
- cmc.buoyant AND _registered.field_class IS NOT NULL,
- _registered.field = cmf.id,
- cmf.weight,
- TS_RANK_CD(mbe.index_vector, $1, $6),
- cmc.buoyant
- FROM metabib.browse_entry_def_map mbedm
- JOIN metabib.browse_entry mbe ON (mbe.id = mbedm.entry)
- JOIN config.metabib_field cmf ON (cmf.id = mbedm.def)
- JOIN config.metabib_class cmc ON (cmf.field_class = cmc.name)
- ' || search_class_join || opac_visibility_join ||
- ' WHERE $1 @@ mbe.index_vector
- ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
- LIMIT $5) x
- ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
- ' -- sic, repeat the order by clause in the outer select too
+ RETURN QUERY EXECUTE '
+SELECT DISTINCT
+ x.value,
+ x.id,
+ x.push,
+ x.restrict,
+ x.weight,
+ x.ts_rank_cd,
+ x.buoyant,
+ TS_HEADLINE(value, $7, $3)
+ FROM (SELECT DISTINCT
+ mbe.value,
+ cmf.id,
+ cmc.buoyant AND _registered.field_class IS NOT NULL AS push,
+ _registered.field = cmf.id AS restrict,
+ cmf.weight,
+ TS_RANK_CD(mbe.index_vector, $1, $6),
+ cmc.buoyant,
+ mbedm.source
+ FROM metabib.browse_entry_def_map mbedm
+ JOIN (SELECT * FROM metabib.browse_entry WHERE index_vector @@ $1 LIMIT 10000) mbe ON (mbe.id = mbedm.entry)
+ JOIN config.metabib_field cmf ON (cmf.id = mbedm.def)
+ JOIN config.metabib_class cmc ON (cmf.field_class = cmc.name)
+ ' || search_class_join || '
+ ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
+ LIMIT 1000) AS x
+ ' || opac_visibility_join || '
+ ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
+ LIMIT $5
+' -- sic, repeat the order by clause in the outer select too
USING
query, search_class, headline_opts,
visibility_org, query_limit, normalization, plain_query
INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, browse_field ) VALUES
(16, 'subject', 'complete', oils_i18n_gettext(16, 'All Subjects', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:subject$$, FALSE );
-INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES
- (17, 'identifier', 'accession', oils_i18n_gettext(17, 'Accession Number', 'cmf', 'label'), 'marcxml', $$//marc:controlfield[@tag='001']$$ );
-INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES
- (18, 'identifier', 'isbn', oils_i18n_gettext(18, 'ISBN', 'cmf', 'label'), 'marcxml', $$//marc:datafield[@tag='020']/marc:subfield[@code='a' or @code='z']$$ );
-INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES
- (19, 'identifier', 'issn', oils_i18n_gettext(19, 'ISSN', 'cmf', 'label'), 'marcxml', $$//marc:datafield[@tag='022']/marc:subfield[@code='a' or @code='z']$$ );
-INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES
- (20, 'identifier', 'upc', oils_i18n_gettext(20, 'UPC', 'cmf', 'label'), 'marcxml', $$//marc:datafield[@tag='024' and @ind1='1']/marc:subfield[@code='a' or @code='z']$$ );
-INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES
- (21, 'identifier', 'ismn', oils_i18n_gettext(21, 'ISMN', 'cmf', 'label'), 'marcxml', $$//marc:datafield[@tag='024' and @ind1='2']/marc:subfield[@code='a' or @code='z']$$ );
-INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES
- (22, 'identifier', 'ean', oils_i18n_gettext(22, 'EAN', 'cmf', 'label'), 'marcxml', $$//marc:datafield[@tag='024' and @ind1='3']/marc:subfield[@code='a' or @code='z']$$ );
-INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES
- (23, 'identifier', 'isrc', oils_i18n_gettext(23, 'ISRC', 'cmf', 'label'), 'marcxml', $$//marc:datafield[@tag='024' and @ind1='0']/marc:subfield[@code='a' or @code='z']$$ );
-INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES
- (24, 'identifier', 'sici', oils_i18n_gettext(24, 'SICI', 'cmf', 'label'), 'marcxml', $$//marc:datafield[@tag='024' and @ind1='4']/marc:subfield[@code='a' or @code='z']$$ );
-INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES
- (25, 'identifier', 'bibcn', oils_i18n_gettext(25, 'Local Free-Text Call Number', 'cmf', 'label'), 'marcxml', $$//marc:datafield[@tag='099']$$ );
-INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES
- (26, 'identifier', 'tcn', oils_i18n_gettext(26, 'Title Control Number', 'cmf', 'label'), 'marcxml', $$//marc:datafield[@tag='901']/marc:subfield[@code='a']$$ );
-INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath ) VALUES
- (27, 'identifier', 'bibid', oils_i18n_gettext(27, 'Internal ID', 'cmf', 'label'), 'marcxml', $$//marc:datafield[@tag='901']/marc:subfield[@code='c']$$ );
-INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, search_field, facet_field) VALUES
- (28, 'identifier', 'authority_id', oils_i18n_gettext(28, 'Authority Record ID', 'cmf', 'label'), 'marcxml', '//marc:datafield/marc:subfield[@code="0"]', FALSE, TRUE);
-INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath) VALUES
- (29, 'identifier', 'scn', oils_i18n_gettext(29, 'System Control Number', 'cmf', 'label'), 'marcxml', $$//marc:datafield[@tag='035']/marc:subfield[@code="a"]$$);
-INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath) VALUES
- (30, 'identifier', 'lccn', oils_i18n_gettext(30, 'LC Control Number', 'cmf', 'label'), 'marcxml', $$//marc:datafield[@tag='010']/marc:subfield[@code="a" or @code='z']$$);
+INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, browse_field ) VALUES
+ (17, 'identifier', 'accession', oils_i18n_gettext(17, 'Accession Number', 'cmf', 'label'), 'marcxml', $$//marc:controlfield[@tag='001']$$, FALSE );
+INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, browse_field ) VALUES
+ (18, 'identifier', 'isbn', oils_i18n_gettext(18, 'ISBN', 'cmf', 'label'), 'marcxml', $$//marc:datafield[@tag='020']/marc:subfield[@code='a' or @code='z']$$, FALSE );
+INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, browse_field ) VALUES
+ (19, 'identifier', 'issn', oils_i18n_gettext(19, 'ISSN', 'cmf', 'label'), 'marcxml', $$//marc:datafield[@tag='022']/marc:subfield[@code='a' or @code='z']$$, FALSE );
+INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, browse_field ) VALUES
+ (20, 'identifier', 'upc', oils_i18n_gettext(20, 'UPC', 'cmf', 'label'), 'marcxml', $$//marc:datafield[@tag='024' and @ind1='1']/marc:subfield[@code='a' or @code='z']$$, FALSE );
+INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, browse_field ) VALUES
+ (21, 'identifier', 'ismn', oils_i18n_gettext(21, 'ISMN', 'cmf', 'label'), 'marcxml', $$//marc:datafield[@tag='024' and @ind1='2']/marc:subfield[@code='a' or @code='z']$$, FALSE );
+INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, browse_field ) VALUES
+ (22, 'identifier', 'ean', oils_i18n_gettext(22, 'EAN', 'cmf', 'label'), 'marcxml', $$//marc:datafield[@tag='024' and @ind1='3']/marc:subfield[@code='a' or @code='z']$$, FALSE );
+INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, browse_field ) VALUES
+ (23, 'identifier', 'isrc', oils_i18n_gettext(23, 'ISRC', 'cmf', 'label'), 'marcxml', $$//marc:datafield[@tag='024' and @ind1='0']/marc:subfield[@code='a' or @code='z']$$, FALSE );
+INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, browse_field ) VALUES
+ (24, 'identifier', 'sici', oils_i18n_gettext(24, 'SICI', 'cmf', 'label'), 'marcxml', $$//marc:datafield[@tag='024' and @ind1='4']/marc:subfield[@code='a' or @code='z']$$, FALSE );
+INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, browse_field ) VALUES
+ (25, 'identifier', 'bibcn', oils_i18n_gettext(25, 'Local Free-Text Call Number', 'cmf', 'label'), 'marcxml', $$//marc:datafield[@tag='099']$$, FALSE );
+INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, browse_field ) VALUES
+ (26, 'identifier', 'tcn', oils_i18n_gettext(26, 'Title Control Number', 'cmf', 'label'), 'marcxml', $$//marc:datafield[@tag='901']/marc:subfield[@code='a']$$, FALSE );
+INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, browse_field ) VALUES
+ (27, 'identifier', 'bibid', oils_i18n_gettext(27, 'Internal ID', 'cmf', 'label'), 'marcxml', $$//marc:datafield[@tag='901']/marc:subfield[@code='c']$$, FALSE );
+INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, search_field, facet_field, browse_field) VALUES
+ (28, 'identifier', 'authority_id', oils_i18n_gettext(28, 'Authority Record ID', 'cmf', 'label'), 'marcxml', '//marc:datafield/marc:subfield[@code="0"]', FALSE, TRUE, FALSE);
+INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, browse_field) VALUES
+ (29, 'identifier', 'scn', oils_i18n_gettext(29, 'System Control Number', 'cmf', 'label'), 'marcxml', $$//marc:datafield[@tag='035']/marc:subfield[@code="a"]$$, FALSE);
+INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, browse_field) VALUES
+ (30, 'identifier', 'lccn', oils_i18n_gettext(30, 'LC Control Number', 'cmf', 'label'), 'marcxml', $$//marc:datafield[@tag='010']/marc:subfield[@code="a" or @code='z']$$, FALSE);
SELECT SETVAL('config.metabib_field_id_seq'::TEXT, (SELECT MAX(id) FROM config.metabib_field), TRUE);
--- /dev/null
+BEGIN;
+
+SELECT evergreen.upgrade_deps_block_check('0680', :eg_version);
+
+-- Not much use in having identifier-class fields be suggestions. Credit for the idea goes to Ben Shum.
+UPDATE config.metabib_field SET browse_field = FALSE WHERE id < 100 AND field_class = 'identifier';
+
+
+---------------------------------------------------------------------------
+-- The rest of this was tested on Evergreen Indiana's dev server, which has
+-- a large data set of 2.6M bibs, and was instrumental in sussing out the
+-- needed adjustments. Thanks, EG-IN!
+---------------------------------------------------------------------------
+
+-- GIN indexes are /much/ better for prefix matching, which is important for browse and autosuggest
+DROP INDEX metabib.metabib_browse_entry_index_vector_idx;
+CREATE INDEX metabib_browse_entry_index_vector_idx ON metabib.browse_entry USING GIN (index_vector);
+
+
+-- We need thes to make the autosuggest limiting joins fast
+CREATE INDEX browse_entry_def_map_def_idx ON metabib.browse_entry_def_map (def);
+CREATE INDEX browse_entry_def_map_entry_idx ON metabib.browse_entry_def_map (entry);
+CREATE INDEX browse_entry_def_map_source_idx ON metabib.browse_entry_def_map (source);
+
+-- In practice this will always be ~1 row, and the default of 1000 causes terrible plans
+ALTER FUNCTION metabib.search_class_to_registered_components(text) ROWS 1;
+
+-- Reworking of the generated query to act in a sane manner in the face of large datasets
+CREATE OR REPLACE
+ FUNCTION metabib.suggest_browse_entries(
+ raw_query_text TEXT, -- actually typed by humans at the UI level
+ search_class TEXT, -- 'alias' or 'class' or 'class|field..', etc
+ headline_opts TEXT, -- markup options for ts_headline()
+ visibility_org INTEGER,-- null if you don't want opac visibility test
+ query_limit INTEGER,-- use in LIMIT clause of interal query
+ normalization INTEGER -- argument to TS_RANK_CD()
+ ) RETURNS TABLE (
+ value TEXT, -- plain
+ field INTEGER,
+ buoyant_and_class_match BOOL,
+ field_match BOOL,
+ field_weight INTEGER,
+ rank REAL,
+ buoyant BOOL,
+ match TEXT -- marked up
+ ) AS $func$
+DECLARE
+ prepared_query_texts TEXT[];
+ query TSQUERY;
+ plain_query TSQUERY;
+ opac_visibility_join TEXT;
+ search_class_join TEXT;
+ r_fields RECORD;
+BEGIN
+ prepared_query_texts := metabib.autosuggest_prepare_tsquery(raw_query_text);
+
+ query := TO_TSQUERY('keyword', prepared_query_texts[1]);
+ plain_query := TO_TSQUERY('keyword', prepared_query_texts[2]);
+
+ IF visibility_org IS NOT NULL THEN
+ opac_visibility_join := '
+ JOIN asset.opac_visible_copies aovc ON (
+ aovc.record = x.source AND
+ aovc.circ_lib IN (SELECT id FROM actor.org_unit_descendants($4))
+ )';
+ ELSE
+ opac_visibility_join := '';
+ END IF;
+
+ -- The following determines whether we only provide suggestsons matching
+ -- the user's selected search_class, or whether we show other suggestions
+ -- too. The reason for MIN() is that for search_classes like
+ -- 'title|proper|uniform' you would otherwise get multiple rows. The
+ -- implication is that if title as a class doesn't have restrict,
+ -- nor does the proper field, but the uniform field does, you're going
+ -- to get 'false' for your overall evaluation of 'should we restrict?'
+ -- To invert that, change from MIN() to MAX().
+
+ SELECT
+ INTO r_fields
+ MIN(cmc.restrict::INT) AS restrict_class,
+ MIN(cmf.restrict::INT) AS restrict_field
+ FROM metabib.search_class_to_registered_components(search_class)
+ AS _registered (field_class TEXT, field INT)
+ JOIN
+ config.metabib_class cmc ON (cmc.name = _registered.field_class)
+ LEFT JOIN
+ config.metabib_field cmf ON (cmf.id = _registered.field);
+
+ -- evaluate 'should we restrict?'
+ IF r_fields.restrict_field::BOOL OR r_fields.restrict_class::BOOL THEN
+ search_class_join := '
+ JOIN
+ metabib.search_class_to_registered_components($2)
+ AS _registered (field_class TEXT, field INT) ON (
+ (_registered.field IS NULL AND
+ _registered.field_class = cmf.field_class) OR
+ (_registered.field = cmf.id)
+ )
+ ';
+ ELSE
+ search_class_join := '
+ LEFT JOIN
+ metabib.search_class_to_registered_components($2)
+ AS _registered (field_class TEXT, field INT) ON (
+ _registered.field_class = cmc.name
+ )
+ ';
+ END IF;
+
+ RETURN QUERY EXECUTE '
+SELECT DISTINCT
+ x.value,
+ x.id,
+ x.push,
+ x.restrict,
+ x.weight,
+ x.ts_rank_cd,
+ x.buoyant,
+ TS_HEADLINE(value, $7, $3)
+ FROM (SELECT DISTINCT
+ mbe.value,
+ cmf.id,
+ cmc.buoyant AND _registered.field_class IS NOT NULL AS push,
+ _registered.field = cmf.id AS restrict,
+ cmf.weight,
+ TS_RANK_CD(mbe.index_vector, $1, $6),
+ cmc.buoyant,
+ mbedm.source
+ FROM metabib.browse_entry_def_map mbedm
+
+ -- Start with a pre-limited set of 10k possible suggestions. More than that is not going to be useful anyway
+ JOIN (SELECT * FROM metabib.browse_entry WHERE index_vector @@ $1 LIMIT 10000) mbe ON (mbe.id = mbedm.entry)
+
+ JOIN config.metabib_field cmf ON (cmf.id = mbedm.def)
+ JOIN config.metabib_class cmc ON (cmf.field_class = cmc.name)
+ ' || search_class_join || '
+ ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
+ LIMIT 1000) AS x -- This outer limit makes testing for opac visibility usably fast
+ ' || opac_visibility_join || '
+ ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
+ LIMIT $5
+' -- sic, repeat the order by clause in the outer select too
+ USING
+ query, search_class, headline_opts,
+ visibility_org, query_limit, normalization, plain_query
+ ;
+
+ -- sort order:
+ -- buoyant AND chosen class = match class
+ -- chosen field = match field
+ -- field weight
+ -- rank
+ -- buoyancy
+ -- value itself
+
+END;
+$func$ LANGUAGE PLPGSQL;
+
+COMMIT;