3 -- XXXX.schema.vandelay.import-match-no-like-any.sql
5 SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
7 CREATE OR REPLACE FUNCTION vandelay.match_set_test_marcxml(
8 match_set_id INTEGER, record_xml TEXT
9 ) RETURNS SETOF vandelay.match_set_test_result AS $$
20 tags_rstore := vandelay.flatten_marc_hstore(record_xml);
21 svf_rstore := vandelay.extract_rec_attrs(record_xml);
23 CREATE TEMPORARY TABLE _vandelay_tmp_qrows (q INTEGER);
24 CREATE TEMPORARY TABLE _vandelay_tmp_jrows (j TEXT);
26 -- generate the where clause and return that directly (into wq), and as
27 -- a side-effect, populate the _vandelay_tmp_[qj]rows tables.
28 wq := vandelay.get_expr_from_match_set(match_set_id, tags_rstore);
30 query_ := 'SELECT DISTINCT(record), ';
32 -- qrows table is for the quality bits we add to the SELECT clause
33 SELECT ARRAY_TO_STRING(
34 ARRAY_ACCUM('COALESCE(n' || q::TEXT || '.quality, 0)'), ' + '
35 ) INTO coal FROM _vandelay_tmp_qrows;
37 -- our query string so far is the SELECT clause and the inital FROM.
38 -- no JOINs yet nor the WHERE clause
39 query_ := query_ || coal || ' AS quality ' || E'\n';
41 -- jrows table is for the joins we must make (and the real text conditions)
42 SELECT ARRAY_TO_STRING(ARRAY_ACCUM(j), E'\n') INTO joins
43 FROM _vandelay_tmp_jrows;
45 -- add those joins and the where clause to our query.
46 query_ := query_ || joins || E'\n' || 'JOIN biblio.record_entry bre ON (bre.id = record) ' || 'WHERE ' || wq || ' AND not bre.deleted';
48 -- this will return rows of record,quality
49 FOR rec IN EXECUTE query_ USING tags_rstore, svf_rstore LOOP
53 DROP TABLE _vandelay_tmp_qrows;
54 DROP TABLE _vandelay_tmp_jrows;
60 CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set(
65 root vandelay.match_set_point;
67 SELECT * INTO root FROM vandelay.match_set_point
68 WHERE parent IS NULL AND match_set = match_set_id;
70 RETURN vandelay.get_expr_from_match_set_point(root, tags_rstore);
74 CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set_point(
75 node vandelay.match_set_point,
83 child vandelay.match_set_point;
85 SELECT ARRAY_ACCUM(id) INTO children FROM vandelay.match_set_point
86 WHERE parent = node.id;
88 IF ARRAY_LENGTH(children, 1) > 0 THEN
89 this_op := vandelay._get_expr_render_one(node);
92 WHILE children[i] IS NOT NULL LOOP
93 SELECT * INTO child FROM vandelay.match_set_point
94 WHERE id = children[i];
96 q := q || ' ' || this_op || ' ';
99 q := q || vandelay.get_expr_from_match_set_point(child, tags_rstore);
103 ELSIF node.bool_op IS NULL THEN
104 PERFORM vandelay._get_expr_push_qrow(node);
105 PERFORM vandelay._get_expr_push_jrow(node, tags_rstore);
106 RETURN vandelay._get_expr_render_one(node);
113 CREATE OR REPLACE FUNCTION vandelay._get_expr_push_jrow(
114 node vandelay.match_set_point,
127 -- remember $1 is tags_rstore, and $2 is svf_rstore
130 SELECT COUNT(*) INTO jrow_count FROM _vandelay_tmp_jrows;
131 IF jrow_count > 0 THEN
132 my_using := ' USING (record)';
133 my_join := 'FULL OUTER JOIN';
139 IF node.tag IS NOT NULL THEN
140 caseless := (node.tag IN ('020', '022', '024'));
142 IF node.subfield IS NOT NULL THEN
143 tagkey := tagkey || node.subfield;
161 my_alias := 'n' || node.id::TEXT;
163 jrow := my_join || ' (SELECT *, ';
164 IF node.tag IS NOT NULL THEN
165 jrow := jrow || node.quality ||
166 ' AS quality FROM metabib.full_rec mfr WHERE mfr.tag = ''' ||
168 IF node.subfield IS NOT NULL THEN
169 jrow := jrow || ' AND mfr.subfield = ''' ||
170 node.subfield || '''';
172 jrow := jrow || ' AND (';
173 jrow := jrow || vandelay._node_tag_comparisons(caseless, op, tags_rstore, tagkey);
174 jrow := jrow || ')) ' || my_alias || my_using || E'\n';
176 jrow := jrow || 'id AS record, ' || node.quality ||
177 ' AS quality FROM metabib.record_attr mra WHERE mra.attrs->''' ||
178 node.svf || ''' ' || op || ' $2->''' || node.svf || ''') ' ||
179 my_alias || my_using || E'\n';
181 INSERT INTO _vandelay_tmp_jrows (j) VALUES (jrow);
185 CREATE OR REPLACE FUNCTION vandelay._node_tag_comparisons(
197 vals := tags_rstore->tagkey;
202 IF vals[i] IS NULL THEN
205 result := result || ' OR ';
210 result := result || 'LOWER(mfr.value) ' || op;
212 result := result || 'mfr.value ' || op;
215 result := result || ' ' || COALESCE('''' || vals[i] || '''', 'NULL');
217 IF vals[i] IS NULL THEN
228 -- drop old versions of these functions with fewer args
229 DROP FUNCTION vandelay.get_expr_from_match_set( INTEGER );
230 DROP FUNCTION vandelay.get_expr_from_match_set_point( vandelay.match_set_point );
231 DROP FUNCTION vandelay._get_expr_push_jrow( vandelay.match_set_point );
233 -- This next index might fully supplant an existing one but leaving both for now
234 -- (they are not too large)
235 -- The reason we need this index is to ensure that the query parser always
236 -- prefers this index over the simpler tag/subfield index, as this greatly
237 -- increases Vandelay overlay speed for these identifiers, especially when
238 -- a record has many of these fields (around > 4-6 seems like the cutoff
239 -- on at least one PG9.1 system)
240 -- A similar index could be added for other fields (e.g. 010), but one should
241 -- leave out the LOWER() in all other cases.
242 -- TODO: verify whether we can discard the non tag/subfield/substring version
243 -- (metabib_full_rec_isxn_caseless_idx)
244 CREATE INDEX metabib_full_rec_02x_tag_subfield_lower_substring
245 ON metabib.real_full_rec (tag, subfield, LOWER(substring(value, 1, 1024)))
246 WHERE tag IN ('020', '022', '024');