owner INT NOT NULL REFERENCES actor.usr (id) DEFERRABLE INITIALLY DEFERRED,
name TEXT NOT NULL,
complete BOOL NOT NULL DEFAULT FALSE,
- queue_type TEXT NOT NULL DEFAULT 'bib' CHECK (queue_type IN ('bib','authority')),
- match_set INT REFERENCES vandelay.match_set (id) ON UPDATE CASCADE ON DELETE SET NULL DEFERRABLE INITIALLY DEFERRED,
- CONSTRAINT vand_queue_name_once_per_owner_const UNIQUE (owner,name,queue_type)
+ match_set INT REFERENCES vandelay.match_set (id) ON UPDATE CASCADE ON DELETE SET NULL DEFERRABLE INITIALLY DEFERRED
);
CREATE TABLE vandelay.queued_record (
pub_note TEXT,
priv_note_title TEXT,
priv_note TEXT,
+ internal_id TEXT,
CONSTRAINT vand_import_item_attr_def_idx UNIQUE (owner,name)
);
description TEXT NOT NULL -- i18n
);
+CREATE TYPE vandelay.bib_queue_queue_type AS ENUM ('bib', 'acq');
+
CREATE TABLE vandelay.bib_queue (
- queue_type TEXT NOT NULL DEFAULT 'bib' CHECK (queue_type = 'bib'),
+ queue_type vandelay.bib_queue_queue_type NOT NULL DEFAULT 'bib',
item_attr_def BIGINT REFERENCES vandelay.import_item_attr_definition (id) ON DELETE SET NULL DEFERRABLE INITIALLY DEFERRED,
CONSTRAINT vand_bib_queue_name_once_per_owner_const UNIQUE (owner,name,queue_type)
) INHERITS (vandelay.queue);
alert_message TEXT,
pub_note TEXT,
priv_note TEXT,
- opac_visible BOOL
+ opac_visible BOOL,
+ internal_id BIGINT -- queue_type == 'acq' ? acq.lineitem_detail.id : asset.copy.id
);
CREATE TABLE vandelay.import_bib_trash_fields (
-- generate the where clause and return that directly (into wq), and as
-- a side-effect, populate the _vandelay_tmp_[qj]rows tables.
- wq := vandelay.get_expr_from_match_set(match_set_id);
+ wq := vandelay.get_expr_from_match_set(match_set_id, tags_rstore);
- query_ := 'SELECT bre.id AS record, ';
+ query_ := 'SELECT DISTINCT(record), ';
-- qrows table is for the quality bits we add to the SELECT clause
SELECT ARRAY_TO_STRING(
-- our query string so far is the SELECT clause and the inital FROM.
-- no JOINs yet nor the WHERE clause
- query_ := query_ || coal || ' AS quality ' || E'\n' ||
- 'FROM biblio.record_entry bre ';
+ query_ := query_ || coal || ' AS quality ' || E'\n';
-- jrows table is for the joins we must make (and the real text conditions)
SELECT ARRAY_TO_STRING(ARRAY_ACCUM(j), E'\n') INTO joins
FROM _vandelay_tmp_jrows;
-- add those joins and the where clause to our query.
- query_ := query_ || joins || E'\n' || 'WHERE ' || wq || ' AND not bre.deleted';
+ query_ := query_ || joins || E'\n' || 'JOIN biblio.record_entry bre ON (bre.id = record) ' || 'WHERE ' || wq || ' AND not bre.deleted';
-- this will return rows of record,quality
FOR rec IN EXECUTE query_ USING tags_rstore, svf_rstore LOOP
CREATE OR REPLACE FUNCTION vandelay.flatten_marc_hstore(
record_xml TEXT
-) RETURNS HSTORE AS $$
+) RETURNS HSTORE AS $func$
BEGIN
RETURN (SELECT
HSTORE(
ARRAY_ACCUM(value)
)
FROM (
- SELECT tag, subfield, ARRAY_ACCUM(value)::TEXT AS value
- FROM vandelay.flatten_marc(record_xml)
+ SELECT tag, subfield, ARRAY_ACCUM(value)::TEXT AS value
+ FROM (SELECT tag,
+ subfield,
+ CASE WHEN tag = '020' THEN -- caseless -- isbn
+ LOWER((REGEXP_MATCHES(value,$$^(\S{10,17})$$))[1] || '%')
+ WHEN tag = '022' THEN -- caseless -- issn
+ LOWER((REGEXP_MATCHES(value,$$^(\S{4}[- ]?\S{4})$$))[1] || '%')
+ WHEN tag = '024' THEN -- caseless -- upc (other)
+ LOWER(value || '%')
+ ELSE
+ value
+ END AS value
+ FROM vandelay.flatten_marc(record_xml)) x
GROUP BY tag, subfield ORDER BY tag, subfield
) subquery
);
END;
-$$ LANGUAGE PLPGSQL;
+$func$ LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set(
- match_set_id INTEGER
+ match_set_id INTEGER,
+ tags_rstore HSTORE
) RETURNS TEXT AS $$
DECLARE
root vandelay.match_set_point;
SELECT * INTO root FROM vandelay.match_set_point
WHERE parent IS NULL AND match_set = match_set_id;
- RETURN vandelay.get_expr_from_match_set_point(root);
+ RETURN vandelay.get_expr_from_match_set_point(root, tags_rstore);
END;
$$ LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set_point(
- node vandelay.match_set_point
+ node vandelay.match_set_point,
+ tags_rstore HSTORE
) RETURNS TEXT AS $$
DECLARE
q TEXT;
q := q || ' ' || this_op || ' ';
END IF;
i := i + 1;
- q := q || vandelay.get_expr_from_match_set_point(child);
+ q := q || vandelay.get_expr_from_match_set_point(child, tags_rstore);
END LOOP;
q := q || ')';
RETURN q;
ELSIF node.bool_op IS NULL THEN
PERFORM vandelay._get_expr_push_qrow(node);
- PERFORM vandelay._get_expr_push_jrow(node);
+ PERFORM vandelay._get_expr_push_jrow(node, tags_rstore);
RETURN vandelay._get_expr_render_one(node);
ELSE
RETURN '';
$$ LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION vandelay._get_expr_push_jrow(
- node vandelay.match_set_point
+ node vandelay.match_set_point,
+ tags_rstore HSTORE
) RETURNS VOID AS $$
DECLARE
jrow TEXT;
my_alias TEXT;
op TEXT;
tagkey TEXT;
+ caseless BOOL;
+ jrow_count INT;
+ my_using TEXT;
+ my_join TEXT;
BEGIN
- IF node.negate THEN
- op := '<>';
+ -- remember $1 is tags_rstore, and $2 is svf_rstore
+
+ caseless := FALSE;
+ SELECT COUNT(*) INTO jrow_count FROM _vandelay_tmp_jrows;
+ IF jrow_count > 0 THEN
+ my_using := ' USING (record)';
+ my_join := 'FULL OUTER JOIN';
ELSE
- op := '=';
+ my_using := '';
+ my_join := 'FROM';
END IF;
IF node.tag IS NOT NULL THEN
+ caseless := (node.tag IN ('020', '022', '024'));
tagkey := node.tag;
IF node.subfield IS NOT NULL THEN
tagkey := tagkey || node.subfield;
END IF;
END IF;
+ IF node.negate THEN
+ IF caseless THEN
+ op := 'NOT LIKE';
+ ELSE
+ op := '<>';
+ END IF;
+ ELSE
+ IF caseless THEN
+ op := 'LIKE';
+ ELSE
+ op := '=';
+ END IF;
+ END IF;
+
my_alias := 'n' || node.id::TEXT;
- jrow := 'LEFT JOIN (SELECT *, ' || node.quality ||
- ' AS quality FROM metabib.';
+ jrow := my_join || ' (SELECT *, ';
IF node.tag IS NOT NULL THEN
- jrow := jrow || 'full_rec) ' || my_alias || ' ON (' ||
- my_alias || '.record = bre.id AND ' || my_alias || '.tag = ''' ||
+ jrow := jrow || node.quality ||
+ ' AS quality FROM metabib.full_rec mfr WHERE mfr.tag = ''' ||
node.tag || '''';
IF node.subfield IS NOT NULL THEN
- jrow := jrow || ' AND ' || my_alias || '.subfield = ''' ||
+ jrow := jrow || ' AND mfr.subfield = ''' ||
node.subfield || '''';
END IF;
- jrow := jrow || ' AND (' || my_alias || '.value ' || op ||
- ' ANY(($1->''' || tagkey || ''')::TEXT[])))';
+ jrow := jrow || ' AND (';
+ jrow := jrow || vandelay._node_tag_comparisons(caseless, op, tags_rstore, tagkey);
+ jrow := jrow || ')) ' || my_alias || my_using || E'\n';
ELSE -- svf
- jrow := jrow || 'record_attr) ' || my_alias || ' ON (' ||
- my_alias || '.id = bre.id AND (' ||
- my_alias || '.attrs->''' || node.svf ||
- ''' ' || op || ' $2->''' || node.svf || '''))';
+ jrow := jrow || 'id AS record, ' || node.quality ||
+ ' AS quality FROM metabib.record_attr mra WHERE mra.attrs->''' ||
+ node.svf || ''' ' || op || ' $2->''' || node.svf || ''') ' ||
+ my_alias || my_using || E'\n';
END IF;
INSERT INTO _vandelay_tmp_jrows (j) VALUES (jrow);
END;
$$ LANGUAGE PLPGSQL;
+CREATE OR REPLACE FUNCTION vandelay._node_tag_comparisons(
+ caseless BOOLEAN,
+ op TEXT,
+ tags_rstore HSTORE,
+ tagkey TEXT
+) RETURNS TEXT AS $$
+DECLARE
+ result TEXT;
+ i INT;
+ vals TEXT[];
+BEGIN
+ i := 1;
+ vals := tags_rstore->tagkey;
+ result := '';
+
+ WHILE TRUE LOOP
+ IF i > 1 THEN
+ IF vals[i] IS NULL THEN
+ EXIT;
+ ELSE
+ result := result || ' OR ';
+ END IF;
+ END IF;
+
+ IF caseless THEN
+ result := result || 'LOWER(mfr.value) ' || op;
+ ELSE
+ result := result || 'mfr.value ' || op;
+ END IF;
+
+ result := result || ' ' || COALESCE('''' || vals[i] || '''', 'NULL');
+
+ IF vals[i] IS NULL THEN
+ EXIT;
+ END IF;
+ i := i + 1;
+ END LOOP;
+
+ RETURN result;
+
+END;
+$$ LANGUAGE PLPGSQL;
+
CREATE OR REPLACE FUNCTION vandelay._get_expr_render_one(
node vandelay.match_set_point
) RETURNS TEXT AS $$
if (exists($fields{$f}{match})) {
next unless (grep { $_ =~ $fields{$f}{match}{re} } $to_field->subfield($fields{$f}{match}{sf}));
}
- my @new_sf = map { ($_ => $from_field->subfield($_)) } @{$fields{$f}{sf}};
+ my @new_sf = map { ($_ => $from_field->subfield($_)) } grep { defined($from_field->subfield($_)) } @{$fields{$f}{sf}};
$to_field->add_subfields( @new_sf );
}
}
BEGIN
parsed_target := vandelay.strip_field( target_xml, ''); -- this dance normalizes the format of the xml for the IF below
+ xml_output := parsed_target; -- if there are no replace rules, just return the input
FOR curr_field IN SELECT UNNEST( STRING_TO_ARRAY(field, ',') ) LOOP -- naive split, but it's the same we use in the perl
END;
$$ LANGUAGE PLPGSQL;
-CREATE OR REPLACE FUNCTION vandelay.match_bib_record ( ) RETURNS TRIGGER AS $func$
-DECLARE
- attr RECORD;
- attr_def RECORD;
- eg_rec RECORD;
- id_value TEXT;
- exact_id BIGINT;
-BEGIN
-
- DELETE FROM vandelay.bib_match WHERE queued_record = NEW.id;
-
- SELECT * INTO attr_def FROM vandelay.bib_attr_definition WHERE xpath = '//*[@tag="901"]/*[@code="c"]' ORDER BY id LIMIT 1;
-
- IF attr_def IS NOT NULL AND attr_def.id IS NOT NULL THEN
- id_value := extract_marc_field('vandelay.queued_bib_record', NEW.id, attr_def.xpath, attr_def.remove);
-
- IF id_value IS NOT NULL AND id_value <> '' AND id_value ~ $r$^\d+$$r$ THEN
- SELECT id INTO exact_id FROM biblio.record_entry WHERE id = id_value::BIGINT AND NOT deleted;
- SELECT * INTO attr FROM vandelay.queued_bib_record_attr WHERE record = NEW.id and field = attr_def.id LIMIT 1;
- IF exact_id IS NOT NULL THEN
- INSERT INTO vandelay.bib_match (field_type, matched_attr, queued_record, eg_record) VALUES ('id', attr.id, NEW.id, exact_id);
- END IF;
- END IF;
- END IF;
-
- IF exact_id IS NULL THEN
- FOR attr IN SELECT a.* FROM vandelay.queued_bib_record_attr a JOIN vandelay.bib_attr_definition d ON (d.id = a.field) WHERE record = NEW.id AND d.ident IS TRUE LOOP
-
- -- All numbers? check for an id match
- IF (attr.attr_value ~ $r$^\d+$$r$) THEN
- FOR eg_rec IN SELECT * FROM biblio.record_entry WHERE id = attr.attr_value::BIGINT AND deleted IS FALSE LOOP
- INSERT INTO vandelay.bib_match (field_type, matched_attr, queued_record, eg_record) VALUES ('id', attr.id, NEW.id, eg_rec.id);
- END LOOP;
- END IF;
-
- -- Looks like an ISBN? check for an isbn match
- IF (attr.attr_value ~* $r$^[0-9x]+$$r$ AND character_length(attr.attr_value) IN (10,13)) THEN
- FOR eg_rec IN EXECUTE $$SELECT * FROM metabib.full_rec fr WHERE fr.value LIKE evergreen.lowercase('$$ || attr.attr_value || $$%') AND fr.tag = '020' AND fr.subfield = 'a'$$ LOOP
- PERFORM id FROM biblio.record_entry WHERE id = eg_rec.record AND deleted IS FALSE;
- IF FOUND THEN
- INSERT INTO vandelay.bib_match (field_type, matched_attr, queued_record, eg_record) VALUES ('isbn', attr.id, NEW.id, eg_rec.record);
- END IF;
- END LOOP;
-
- -- subcheck for isbn-as-tcn
- FOR eg_rec IN SELECT * FROM biblio.record_entry WHERE tcn_value = 'i' || attr.attr_value AND deleted IS FALSE LOOP
- INSERT INTO vandelay.bib_match (field_type, matched_attr, queued_record, eg_record) VALUES ('tcn_value', attr.id, NEW.id, eg_rec.id);
- END LOOP;
- END IF;
-
- -- check for an OCLC tcn_value match
- IF (attr.attr_value ~ $r$^o\d+$$r$) THEN
- FOR eg_rec IN SELECT * FROM biblio.record_entry WHERE tcn_value = regexp_replace(attr.attr_value,'^o','ocm') AND deleted IS FALSE LOOP
- INSERT INTO vandelay.bib_match (field_type, matched_attr, queued_record, eg_record) VALUES ('tcn_value', attr.id, NEW.id, eg_rec.id);
- END LOOP;
- END IF;
-
- -- check for a direct tcn_value match
- FOR eg_rec IN SELECT * FROM biblio.record_entry WHERE tcn_value = attr.attr_value AND deleted IS FALSE LOOP
- INSERT INTO vandelay.bib_match (field_type, matched_attr, queued_record, eg_record) VALUES ('tcn_value', attr.id, NEW.id, eg_rec.id);
- END LOOP;
-
- -- check for a direct item barcode match
- FOR eg_rec IN
- SELECT DISTINCT b.*
- FROM biblio.record_entry b
- JOIN asset.call_number cn ON (cn.record = b.id)
- JOIN asset.copy cp ON (cp.call_number = cn.id)
- WHERE cp.barcode = attr.attr_value AND cp.deleted IS FALSE
- LOOP
- INSERT INTO vandelay.bib_match (field_type, matched_attr, queued_record, eg_record) VALUES ('id', attr.id, NEW.id, eg_rec.id);
- END LOOP;
-
- END LOOP;
- END IF;
-
- RETURN NULL;
-END;
-$func$ LANGUAGE PLPGSQL;
-
CREATE OR REPLACE FUNCTION vandelay.cleanup_bib_marc ( ) RETURNS TRIGGER AS $$
BEGIN
IF TG_OP IN ('INSERT','UPDATE') AND NEW.imported_as IS NOT NULL THEN
remove TEXT NOT NULL DEFAULT ''
);
+CREATE TYPE vandelay.authority_queue_queue_type AS ENUM ('authority');
CREATE TABLE vandelay.authority_queue (
- queue_type TEXT NOT NULL DEFAULT 'authority' CHECK (queue_type = 'authority'),
+ queue_type vandelay.authority_queue_queue_type NOT NULL DEFAULT 'authority',
CONSTRAINT vand_authority_queue_name_once_per_owner_const UNIQUE (owner,name,queue_type)
) INHERITS (vandelay.queue);
ALTER TABLE vandelay.authority_queue ADD PRIMARY KEY (id);