-- Table to define match points, either FF via SVF or tag+subfield
CREATE TABLE vandelay.match_set_point (
id SERIAL PRIMARY KEY,
- match_set INT REFERENCES vandelay.match_set (id),
+ match_set INT REFERENCES vandelay.match_set (id) ON DELETE CASCADE,
parent INT REFERENCES vandelay.match_set_point (id),
bool_op TEXT CHECK (bool_op IS NULL OR (bool_op IN ('AND','OR','NOT'))),
svf TEXT REFERENCES config.record_attr_definition (name),
CREATE TABLE vandelay.match_set_quality (
id SERIAL PRIMARY KEY,
- match_set INT NOT NULL REFERENCES vandelay.match_set (id),
+ match_set INT NOT NULL REFERENCES vandelay.match_set (id) ON DELETE CASCADE,
svf TEXT REFERENCES config.record_attr_definition,
tag TEXT,
subfield TEXT,
CONSTRAINT vmsq_need_a_subfield_with_a_tag CHECK ((tag IS NOT NULL AND subfield IS NOT NULL) OR tag IS NULL),
CONSTRAINT vmsq_need_a_tag_or_a_ff CHECK ((tag IS NOT NULL AND svf IS NULL) OR (tag IS NULL AND svf IS NOT NULL))
);
-CREATE UNIQUE INDEX vmsq_def_once_per_set ON vandelay.match_set_quality (match_set, COALESCE(tag,''), COALESCE(subfield,''), COALESCE(svf,''));
+CREATE UNIQUE INDEX vmsq_def_once_per_set ON vandelay.match_set_quality (match_set, COALESCE(tag,''), COALESCE(subfield,''), COALESCE(svf,''), value);
CREATE TABLE vandelay.queue (
owner INT NOT NULL REFERENCES actor.usr (id) DEFERRABLE INITIALLY DEFERRED,
name TEXT NOT NULL,
complete BOOL NOT NULL DEFAULT FALSE,
- queue_type TEXT NOT NULL DEFAULT 'bib' CHECK (queue_type IN ('bib','authority')),
- match_set INT REFERENCES vandelay.match_set (id) ON UPDATE CASCADE ON DELETE SET NULL DEFERRABLE INITIALLY DEFERRED,
- CONSTRAINT vand_queue_name_once_per_owner_const UNIQUE (owner,name,queue_type)
+ match_set INT REFERENCES vandelay.match_set (id) ON UPDATE CASCADE ON DELETE SET NULL DEFERRABLE INITIALLY DEFERRED
);
CREATE TABLE vandelay.queued_record (
pub_note TEXT,
priv_note_title TEXT,
priv_note TEXT,
+ internal_id TEXT,
CONSTRAINT vand_import_item_attr_def_idx UNIQUE (owner,name)
);
description TEXT NOT NULL -- i18n
);
+CREATE TYPE vandelay.bib_queue_queue_type AS ENUM ('bib', 'acq');
+
CREATE TABLE vandelay.bib_queue (
- queue_type TEXT NOT NULL DEFAULT 'bib' CHECK (queue_type = 'bib'),
+ queue_type vandelay.bib_queue_queue_type NOT NULL DEFAULT 'bib',
item_attr_def BIGINT REFERENCES vandelay.import_item_attr_definition (id) ON DELETE SET NULL DEFERRABLE INITIALLY DEFERRED,
CONSTRAINT vand_bib_queue_name_once_per_owner_const UNIQUE (owner,name,queue_type)
) INHERITS (vandelay.queue);
CREATE TABLE vandelay.bib_match (
id BIGSERIAL PRIMARY KEY,
- matched_set INT REFERENCES vandelay.match_set (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
queued_record BIGINT REFERENCES vandelay.queued_bib_record (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
eg_record BIGINT REFERENCES biblio.record_entry (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
- quality INT NOT NULL DEFAULT 0,
+ quality INT NOT NULL DEFAULT 1,
match_score INT NOT NULL DEFAULT 0
);
definition BIGINT NOT NULL REFERENCES vandelay.import_item_attr_definition (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
import_error TEXT REFERENCES vandelay.import_error (code) ON DELETE SET NULL ON UPDATE CASCADE DEFERRABLE INITIALLY DEFERRED,
error_detail TEXT,
+ imported_as BIGINT,
+ import_time TIMESTAMP WITH TIME ZONE,
owning_lib INT,
circ_lib INT,
call_number TEXT,
alert_message TEXT,
pub_note TEXT,
priv_note TEXT,
- opac_visible BOOL
+ opac_visible BOOL,
+ internal_id BIGINT -- queue_type == 'acq' ? acq.lineitem_detail.id : asset.copy.id
);
CREATE TABLE vandelay.import_bib_trash_fields (
attr_value := oils_xpath_string(attr_def.xpath, transformed_xml, COALESCE(attr_def.joiner,' '), ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]);
ELSIF attr_def.phys_char_sf IS NOT NULL THEN -- a named Physical Characteristic, see config.marc21_physical_characteristic_*_map
- SELECT value::TEXT INTO attr_value
- FROM vandelay.marc21_physical_characteristics(xml)
- WHERE subfield = attr_def.phys_char_sf
+ SELECT m.value::TEXT INTO attr_value
+ FROM vandelay.marc21_physical_characteristics(xml) v
+ JOIN config.marc21_physical_characteristic_value_map m ON (m.id = v.value)
+ WHERE v.subfield = attr_def.phys_char_sf
LIMIT 1; -- Just in case ...
END IF;
-- generate the where clause and return that directly (into wq), and as
-- a side-effect, populate the _vandelay_tmp_[qj]rows tables.
- wq := vandelay.get_expr_from_match_set(match_set_id);
+ wq := vandelay.get_expr_from_match_set(match_set_id, tags_rstore);
- query_ := 'SELECT bre.id AS record, ';
+ query_ := 'SELECT DISTINCT(record), ';
-- qrows table is for the quality bits we add to the SELECT clause
SELECT ARRAY_TO_STRING(
-- our query string so far is the SELECT clause and the inital FROM.
-- no JOINs yet nor the WHERE clause
- query_ := query_ || coal || ' AS quality ' || E'\n' ||
- 'FROM biblio.record_entry bre ';
+ query_ := query_ || coal || ' AS quality ' || E'\n';
-- jrows table is for the joins we must make (and the real text conditions)
SELECT ARRAY_TO_STRING(ARRAY_ACCUM(j), E'\n') INTO joins
FROM _vandelay_tmp_jrows;
-- add those joins and the where clause to our query.
- query_ := query_ || joins || E'\n' || 'WHERE ' || wq;
+ query_ := query_ || joins || E'\n' || 'JOIN biblio.record_entry bre ON (bre.id = record) ' || 'WHERE ' || wq || ' AND not bre.deleted';
-- this will return rows of record,quality
FOR rec IN EXECUTE query_ USING tags_rstore, svf_rstore LOOP
CREATE OR REPLACE FUNCTION vandelay.flatten_marc_hstore(
record_xml TEXT
-) RETURNS HSTORE AS $$
+) RETURNS HSTORE AS $func$
BEGIN
RETURN (SELECT
HSTORE(
ARRAY_ACCUM(value)
)
FROM (
- SELECT tag, subfield, ARRAY_ACCUM(value)::TEXT AS value
- FROM vandelay.flatten_marc(record_xml)
+ SELECT tag, subfield, ARRAY_ACCUM(value)::TEXT AS value
+ FROM (SELECT tag,
+ subfield,
+ CASE WHEN tag = '020' THEN -- caseless -- isbn
+ LOWER((REGEXP_MATCHES(value,$$^(\S{10,17})$$))[1] || '%')
+ WHEN tag = '022' THEN -- caseless -- issn
+ LOWER((REGEXP_MATCHES(value,$$^(\S{4}[- ]?\S{4})$$))[1] || '%')
+ WHEN tag = '024' THEN -- caseless -- upc (other)
+ LOWER(value || '%')
+ ELSE
+ value
+ END AS value
+ FROM vandelay.flatten_marc(record_xml)) x
GROUP BY tag, subfield ORDER BY tag, subfield
) subquery
);
END;
-$$ LANGUAGE PLPGSQL;
+$func$ LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set(
- match_set_id INTEGER
+ match_set_id INTEGER,
+ tags_rstore HSTORE
) RETURNS TEXT AS $$
DECLARE
root vandelay.match_set_point;
SELECT * INTO root FROM vandelay.match_set_point
WHERE parent IS NULL AND match_set = match_set_id;
- RETURN vandelay.get_expr_from_match_set_point(root);
+ RETURN vandelay.get_expr_from_match_set_point(root, tags_rstore);
END;
$$ LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set_point(
- node vandelay.match_set_point
+ node vandelay.match_set_point,
+ tags_rstore HSTORE
) RETURNS TEXT AS $$
DECLARE
q TEXT;
q := q || ' ' || this_op || ' ';
END IF;
i := i + 1;
- q := q || vandelay.get_expr_from_match_set_point(child);
+ q := q || vandelay.get_expr_from_match_set_point(child, tags_rstore);
END LOOP;
q := q || ')';
RETURN q;
ELSIF node.bool_op IS NULL THEN
PERFORM vandelay._get_expr_push_qrow(node);
- PERFORM vandelay._get_expr_push_jrow(node);
+ PERFORM vandelay._get_expr_push_jrow(node, tags_rstore);
RETURN vandelay._get_expr_render_one(node);
ELSE
RETURN '';
$$ LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION vandelay._get_expr_push_jrow(
- node vandelay.match_set_point
+ node vandelay.match_set_point,
+ tags_rstore HSTORE
) RETURNS VOID AS $$
DECLARE
jrow TEXT;
my_alias TEXT;
op TEXT;
tagkey TEXT;
+ caseless BOOL;
+ jrow_count INT;
+ my_using TEXT;
+ my_join TEXT;
BEGIN
- IF node.negate THEN
- op := '<>';
+ -- remember $1 is tags_rstore, and $2 is svf_rstore
+
+ caseless := FALSE;
+ SELECT COUNT(*) INTO jrow_count FROM _vandelay_tmp_jrows;
+ IF jrow_count > 0 THEN
+ my_using := ' USING (record)';
+ my_join := 'FULL OUTER JOIN';
ELSE
- op := '=';
+ my_using := '';
+ my_join := 'FROM';
END IF;
IF node.tag IS NOT NULL THEN
+ caseless := (node.tag IN ('020', '022', '024'));
tagkey := node.tag;
IF node.subfield IS NOT NULL THEN
tagkey := tagkey || node.subfield;
END IF;
END IF;
+ IF node.negate THEN
+ IF caseless THEN
+ op := 'NOT LIKE';
+ ELSE
+ op := '<>';
+ END IF;
+ ELSE
+ IF caseless THEN
+ op := 'LIKE';
+ ELSE
+ op := '=';
+ END IF;
+ END IF;
+
my_alias := 'n' || node.id::TEXT;
- jrow := 'LEFT JOIN (SELECT *, ' || node.quality ||
- ' AS quality FROM metabib.';
+ jrow := my_join || ' (SELECT *, ';
IF node.tag IS NOT NULL THEN
- jrow := jrow || 'full_rec) ' || my_alias || ' ON (' ||
- my_alias || '.record = bre.id AND ' || my_alias || '.tag = ''' ||
+ jrow := jrow || node.quality ||
+ ' AS quality FROM metabib.full_rec mfr WHERE mfr.tag = ''' ||
node.tag || '''';
IF node.subfield IS NOT NULL THEN
- jrow := jrow || ' AND ' || my_alias || '.subfield = ''' ||
+ jrow := jrow || ' AND mfr.subfield = ''' ||
node.subfield || '''';
END IF;
- jrow := jrow || ' AND (' || my_alias || '.value ' || op ||
- ' ANY(($1->''' || tagkey || ''')::TEXT[])))';
+ jrow := jrow || ' AND (';
+ jrow := jrow || vandelay._node_tag_comparisons(caseless, op, tags_rstore, tagkey);
+ jrow := jrow || ')) ' || my_alias || my_using || E'\n';
ELSE -- svf
- jrow := jrow || 'record_attr) ' || my_alias || ' ON (' ||
- my_alias || '.id = bre.id AND (' ||
- my_alias || '.attrs->''' || node.svf ||
- ''' ' || op || ' $2->''' || node.svf || '''))';
+ jrow := jrow || 'id AS record, ' || node.quality ||
+ ' AS quality FROM metabib.record_attr mra WHERE mra.attrs->''' ||
+ node.svf || ''' ' || op || ' $2->''' || node.svf || ''') ' ||
+ my_alias || my_using || E'\n';
END IF;
INSERT INTO _vandelay_tmp_jrows (j) VALUES (jrow);
END;
$$ LANGUAGE PLPGSQL;
+CREATE OR REPLACE FUNCTION vandelay._node_tag_comparisons(
+ caseless BOOLEAN,
+ op TEXT,
+ tags_rstore HSTORE,
+ tagkey TEXT
+) RETURNS TEXT AS $$
+DECLARE
+ result TEXT;
+ i INT;
+ vals TEXT[];
+BEGIN
+ i := 1;
+ vals := tags_rstore->tagkey;
+ result := '';
+
+ WHILE TRUE LOOP
+ IF i > 1 THEN
+ IF vals[i] IS NULL THEN
+ EXIT;
+ ELSE
+ result := result || ' OR ';
+ END IF;
+ END IF;
+
+ IF caseless THEN
+ result := result || 'LOWER(mfr.value) ' || op;
+ ELSE
+ result := result || 'mfr.value ' || op;
+ END IF;
+
+ result := result || ' ' || COALESCE('''' || vals[i] || '''', 'NULL');
+
+ IF vals[i] IS NULL THEN
+ EXIT;
+ END IF;
+ i := i + 1;
+ END LOOP;
+
+ RETURN result;
+
+END;
+$$ LANGUAGE PLPGSQL;
+
CREATE OR REPLACE FUNCTION vandelay._get_expr_render_one(
node vandelay.match_set_point
) RETURNS TEXT AS $$
CREATE OR REPLACE FUNCTION vandelay.match_bib_record() RETURNS TRIGGER AS $func$
DECLARE
incoming_existing_id TEXT;
- my_bib_queue vandelay.bib_queue%ROWTYPE;
test_result vandelay.match_set_test_result%ROWTYPE;
tmp_rec BIGINT;
+ match_set INT;
BEGIN
+ IF TG_OP IN ('INSERT','UPDATE') AND NEW.imported_as IS NOT NULL THEN
+ RETURN NEW;
+ END IF;
+
DELETE FROM vandelay.bib_match WHERE queued_record = NEW.id;
+ SELECT q.match_set INTO match_set FROM vandelay.bib_queue q WHERE q.id = NEW.queue;
+
+ IF match_set IS NOT NULL THEN
+ NEW.quality := vandelay.measure_record_quality( NEW.marc, match_set );
+ END IF;
+
-- Perfect matches on 901$c exit early with a match with high quality.
incoming_existing_id :=
oils_xpath_string('//*[@tag="901"]/*[@code="c"][1]', NEW.marc);
IF incoming_existing_id IS NOT NULL AND incoming_existing_id != '' THEN
SELECT id INTO tmp_rec FROM biblio.record_entry WHERE id = incoming_existing_id::bigint;
IF tmp_rec IS NOT NULL THEN
- INSERT INTO vandelay.bib_match (queued_record, eg_record, quality) VALUES ( NEW.id, incoming_existing_id::bigint, 9999);
- RETURN NEW;
+ INSERT INTO vandelay.bib_match (queued_record, eg_record, match_score, quality)
+ SELECT
+ NEW.id,
+ b.id,
+ 9999,
+ -- note: no match_set means quality==0
+ vandelay.measure_record_quality( b.marc, match_set )
+ FROM biblio.record_entry b
+ WHERE id = incoming_existing_id::bigint;
END IF;
END IF;
- SELECT * INTO my_bib_queue FROM vandelay.bib_queue WHERE id = NEW.queue;
+ IF match_set IS NULL THEN
+ RETURN NEW;
+ END IF;
FOR test_result IN SELECT * FROM
- vandelay.match_set_test_marcxml(my_bib_queue.match_set, NEW.marc) LOOP
+ vandelay.match_set_test_marcxml(match_set, NEW.marc) LOOP
- INSERT INTO vandelay.bib_match (
- matched_set, queued_record, eg_record, match_score, quality
- ) VALUES (
- my_bib_queue.match_set, NEW.id, test_result.record,
- test_result.quality, vandelay.incoming_record_quality(NEW.marc)
- );
+ INSERT INTO vandelay.bib_match ( queued_record, eg_record, match_score, quality )
+ SELECT
+ NEW.id,
+ test_result.record,
+ test_result.quality,
+ vandelay.measure_record_quality( b.marc, match_set )
+ FROM biblio.record_entry b
+ WHERE id = test_result.record;
END LOOP;
END;
$func$ LANGUAGE PLPGSQL;
-CREATE OR REPLACE FUNCTION vandelay.incoming_record_quality ( xml TEXT, match_set_id INT ) RETURNS INT AS $_$
+CREATE OR REPLACE FUNCTION vandelay.measure_record_quality ( xml TEXT, match_set_id INT ) RETURNS INT AS $_$
DECLARE
out_q INT := 0;
rvalue TEXT;
END IF;
END LOOP;
ELSE
- IF test.value = vandelay.extract_rec_attrs(xml, ARRAY[test.svf]) THEN
+ IF test.value = vandelay.extract_rec_attrs(xml, ARRAY[test.svf]) -> test.svf THEN
out_q := out_q + test.quality;
END IF;
END IF;
if (exists($fields{$f}{match})) {
next unless (grep { $_ =~ $fields{$f}{match}{re} } $to_field->subfield($fields{$f}{match}{sf}));
}
- my @new_sf = map { ($_ => $from_field->subfield($_)) } @{$fields{$f}{sf}};
+ my @new_sf = map { ($_ => $from_field->subfield($_)) } grep { defined($from_field->subfield($_)) } @{$fields{$f}{sf}};
$to_field->add_subfields( @new_sf );
}
}
BEGIN
parsed_target := vandelay.strip_field( target_xml, ''); -- this dance normalizes the format of the xml for the IF below
+ xml_output := parsed_target; -- if there are no replace rules, just return the input
FOR curr_field IN SELECT UNNEST( STRING_TO_ARRAY(field, ',') ) LOOP -- naive split, but it's the same we use in the perl
END;
$$ LANGUAGE PLPGSQL;
-CREATE OR REPLACE FUNCTION vandelay.auto_overlay_bib_record_with_best ( import_id BIGINT, merge_profile_id INT, lwm_ratio_value NUMERIC ) RETURNS BOOL AS $$
+CREATE OR REPLACE FUNCTION vandelay.auto_overlay_bib_record_with_best ( import_id BIGINT, merge_profile_id INT, lwm_ratio_value_p NUMERIC ) RETURNS BOOL AS $$
DECLARE
eg_id BIGINT;
+ lwm_ratio_value NUMERIC;
BEGIN
- IF lwm_ratio_value IS NULL THEN
- lwm_ratio_value := 0.0;
- END IF;
+ lwm_ratio_value := COALESCE(lwm_ratio_value_p, 0.0);
PERFORM * FROM vandelay.queued_bib_record WHERE import_time IS NOT NULL AND id = import_id;
SELECT m.eg_record INTO eg_id
FROM vandelay.bib_match m
- JOIN biblio.record_entry r
+ JOIN vandelay.queued_bib_record qr ON (m.queued_record = qr.id)
+ JOIN vandelay.bib_queue q ON (qr.queue = q.id)
+ JOIN biblio.record_entry r ON (r.id = m.eg_record)
WHERE m.queued_record = import_id
- AND r.id = m.eg_record
- AND m.quality::NUMERIC / COALESCE(NULLIF(vandelay.incoming_record_quality(r.marc),0),1)::NUMERIC >= lwm_ratio_value
- ORDER BY m.match_score DESC,
- m.quality::NUMERIC / COALESCE(NULLIF(vandelay.incoming_record_quality(r.marc),0),1)::NUMERIC DESC,
- id
+ AND qr.quality::NUMERIC / COALESCE(NULLIF(m.quality,0),1)::NUMERIC >= lwm_ratio_value
+ ORDER BY m.match_score DESC, -- required match score
+ qr.quality::NUMERIC / COALESCE(NULLIF(m.quality,0),1)::NUMERIC DESC, -- quality tie breaker
+ m.id -- when in doubt, use the first match
LIMIT 1;
IF eg_id IS NULL THEN
- -- RAISE NOTICE 'incoming record is not of hight enough quality';
+ -- RAISE NOTICE 'incoming record is not of high enough quality';
RETURN FALSE;
END IF;
SELECT * FROM vandelay.auto_overlay_bib_queue( $1, NULL );
$$ LANGUAGE SQL;
-CREATE OR REPLACE FUNCTION vandelay.ingest_items ( import_id BIGINT, attr_def_id BIGINT ) RETURNS SETOF vandelay.import_item AS $$
-DECLARE
-
- owning_lib TEXT;
- circ_lib TEXT;
- call_number TEXT;
- copy_number TEXT;
- status TEXT;
- location TEXT;
- circulate TEXT;
- deposit TEXT;
- deposit_amount TEXT;
- ref TEXT;
- holdable TEXT;
- price TEXT;
- barcode TEXT;
- circ_modifier TEXT;
- circ_as_type TEXT;
- alert_message TEXT;
- opac_visible TEXT;
- pub_note TEXT;
- priv_note TEXT;
-
- attr_def RECORD;
- tmp_attr_set RECORD;
- attr_set vandelay.import_item%ROWTYPE;
-
- xpath TEXT;
-
-BEGIN
-
- SELECT * INTO attr_def FROM vandelay.import_item_attr_definition WHERE id = attr_def_id;
-
- IF FOUND THEN
-
- attr_set.definition := attr_def.id;
-
- -- Build the combined XPath
-
- owning_lib :=
- CASE
- WHEN attr_def.owning_lib IS NULL THEN 'null()'
- WHEN LENGTH( attr_def.owning_lib ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.owning_lib || '"]'
- ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.owning_lib
- END;
-
- circ_lib :=
- CASE
- WHEN attr_def.circ_lib IS NULL THEN 'null()'
- WHEN LENGTH( attr_def.circ_lib ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.circ_lib || '"]'
- ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.circ_lib
- END;
-
- call_number :=
- CASE
- WHEN attr_def.call_number IS NULL THEN 'null()'
- WHEN LENGTH( attr_def.call_number ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.call_number || '"]'
- ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.call_number
- END;
-
- copy_number :=
- CASE
- WHEN attr_def.copy_number IS NULL THEN 'null()'
- WHEN LENGTH( attr_def.copy_number ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.copy_number || '"]'
- ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.copy_number
- END;
-
- status :=
- CASE
- WHEN attr_def.status IS NULL THEN 'null()'
- WHEN LENGTH( attr_def.status ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.status || '"]'
- ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.status
- END;
-
- location :=
- CASE
- WHEN attr_def.location IS NULL THEN 'null()'
- WHEN LENGTH( attr_def.location ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.location || '"]'
- ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.location
- END;
-
- circulate :=
- CASE
- WHEN attr_def.circulate IS NULL THEN 'null()'
- WHEN LENGTH( attr_def.circulate ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.circulate || '"]'
- ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.circulate
- END;
-
- deposit :=
- CASE
- WHEN attr_def.deposit IS NULL THEN 'null()'
- WHEN LENGTH( attr_def.deposit ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.deposit || '"]'
- ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.deposit
- END;
-
- deposit_amount :=
- CASE
- WHEN attr_def.deposit_amount IS NULL THEN 'null()'
- WHEN LENGTH( attr_def.deposit_amount ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.deposit_amount || '"]'
- ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.deposit_amount
- END;
-
- ref :=
- CASE
- WHEN attr_def.ref IS NULL THEN 'null()'
- WHEN LENGTH( attr_def.ref ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.ref || '"]'
- ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.ref
- END;
-
- holdable :=
- CASE
- WHEN attr_def.holdable IS NULL THEN 'null()'
- WHEN LENGTH( attr_def.holdable ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.holdable || '"]'
- ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.holdable
- END;
-
- price :=
- CASE
- WHEN attr_def.price IS NULL THEN 'null()'
- WHEN LENGTH( attr_def.price ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.price || '"]'
- ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.price
- END;
-
- barcode :=
- CASE
- WHEN attr_def.barcode IS NULL THEN 'null()'
- WHEN LENGTH( attr_def.barcode ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.barcode || '"]'
- ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.barcode
- END;
-
- circ_modifier :=
- CASE
- WHEN attr_def.circ_modifier IS NULL THEN 'null()'
- WHEN LENGTH( attr_def.circ_modifier ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.circ_modifier || '"]'
- ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.circ_modifier
- END;
-
- circ_as_type :=
- CASE
- WHEN attr_def.circ_as_type IS NULL THEN 'null()'
- WHEN LENGTH( attr_def.circ_as_type ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.circ_as_type || '"]'
- ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.circ_as_type
- END;
-
- alert_message :=
- CASE
- WHEN attr_def.alert_message IS NULL THEN 'null()'
- WHEN LENGTH( attr_def.alert_message ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.alert_message || '"]'
- ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.alert_message
- END;
-
- opac_visible :=
- CASE
- WHEN attr_def.opac_visible IS NULL THEN 'null()'
- WHEN LENGTH( attr_def.opac_visible ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.opac_visible || '"]'
- ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.opac_visible
- END;
-
- pub_note :=
- CASE
- WHEN attr_def.pub_note IS NULL THEN 'null()'
- WHEN LENGTH( attr_def.pub_note ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.pub_note || '"]'
- ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.pub_note
- END;
- priv_note :=
- CASE
- WHEN attr_def.priv_note IS NULL THEN 'null()'
- WHEN LENGTH( attr_def.priv_note ) = 1 THEN '//*[@tag="' || attr_def.tag || '"]/*[@code="' || attr_def.priv_note || '"]'
- ELSE '//*[@tag="' || attr_def.tag || '"]/*' || attr_def.priv_note
- END;
-
-
- xpath :=
- owning_lib || '|' ||
- circ_lib || '|' ||
- call_number || '|' ||
- copy_number || '|' ||
- status || '|' ||
- location || '|' ||
- circulate || '|' ||
- deposit || '|' ||
- deposit_amount || '|' ||
- ref || '|' ||
- holdable || '|' ||
- price || '|' ||
- barcode || '|' ||
- circ_modifier || '|' ||
- circ_as_type || '|' ||
- alert_message || '|' ||
- pub_note || '|' ||
- priv_note || '|' ||
- opac_visible;
-
- -- RAISE NOTICE 'XPath: %', xpath;
-
- FOR tmp_attr_set IN
- SELECT *
- FROM oils_xpath_table( 'id', 'marc', 'vandelay.queued_bib_record', xpath, 'id = ' || import_id )
- AS t( id INT, ol TEXT, clib TEXT, cn TEXT, cnum TEXT, cs TEXT, cl TEXT, circ TEXT,
- dep TEXT, dep_amount TEXT, r TEXT, hold TEXT, pr TEXT, bc TEXT, circ_mod TEXT,
- circ_as TEXT, amessage TEXT, note TEXT, pnote TEXT, opac_vis TEXT )
- LOOP
-
- tmp_attr_set.pr = REGEXP_REPLACE(tmp_attr_set.pr, E'[^0-9\\.]', '', 'g');
- tmp_attr_set.dep_amount = REGEXP_REPLACE(tmp_attr_set.dep_amount, E'[^0-9\\.]', '', 'g');
-
- tmp_attr_set.pr := NULLIF( tmp_attr_set.pr, '' );
- tmp_attr_set.dep_amount := NULLIF( tmp_attr_set.dep_amount, '' );
-
- SELECT id INTO attr_set.owning_lib FROM actor.org_unit WHERE shortname = UPPER(tmp_attr_set.ol); -- INT
- SELECT id INTO attr_set.circ_lib FROM actor.org_unit WHERE shortname = UPPER(tmp_attr_set.clib); -- INT
- SELECT id INTO attr_set.status FROM config.copy_status WHERE LOWER(name) = LOWER(tmp_attr_set.cs); -- INT
-
- SELECT id INTO attr_set.location
- FROM asset.copy_location
- WHERE LOWER(name) = LOWER(tmp_attr_set.cl)
- AND asset.copy_location.owning_lib = COALESCE(attr_set.owning_lib, attr_set.circ_lib); -- INT
-
- attr_set.circulate :=
- LOWER( SUBSTRING( tmp_attr_set.circ, 1, 1)) IN ('t','y','1')
- OR LOWER(tmp_attr_set.circ) = 'circulating'; -- BOOL
-
- attr_set.deposit :=
- LOWER( SUBSTRING( tmp_attr_set.dep, 1, 1 ) ) IN ('t','y','1')
- OR LOWER(tmp_attr_set.dep) = 'deposit'; -- BOOL
-
- attr_set.holdable :=
- LOWER( SUBSTRING( tmp_attr_set.hold, 1, 1 ) ) IN ('t','y','1')
- OR LOWER(tmp_attr_set.hold) = 'holdable'; -- BOOL
-
- attr_set.opac_visible :=
- LOWER( SUBSTRING( tmp_attr_set.opac_vis, 1, 1 ) ) IN ('t','y','1')
- OR LOWER(tmp_attr_set.opac_vis) = 'visible'; -- BOOL
-
- attr_set.ref :=
- LOWER( SUBSTRING( tmp_attr_set.r, 1, 1 ) ) IN ('t','y','1')
- OR LOWER(tmp_attr_set.r) = 'reference'; -- BOOL
-
- attr_set.copy_number := tmp_attr_set.cnum::INT; -- INT,
- attr_set.deposit_amount := tmp_attr_set.dep_amount::NUMERIC(6,2); -- NUMERIC(6,2),
- attr_set.price := tmp_attr_set.pr::NUMERIC(8,2); -- NUMERIC(8,2),
-
- attr_set.call_number := tmp_attr_set.cn; -- TEXT
- attr_set.barcode := tmp_attr_set.bc; -- TEXT,
- attr_set.circ_modifier := tmp_attr_set.circ_mod; -- TEXT,
- attr_set.circ_as_type := tmp_attr_set.circ_as; -- TEXT,
- attr_set.alert_message := tmp_attr_set.amessage; -- TEXT,
- attr_set.pub_note := tmp_attr_set.note; -- TEXT,
- attr_set.priv_note := tmp_attr_set.pnote; -- TEXT,
- attr_set.alert_message := tmp_attr_set.amessage; -- TEXT,
-
- RETURN NEXT attr_set;
-
- END LOOP;
-
- END IF;
-
- RETURN;
-
-END;
-$$ LANGUAGE PLPGSQL;
-
-
CREATE OR REPLACE FUNCTION vandelay.ingest_bib_marc ( ) RETURNS TRIGGER AS $$
DECLARE
value TEXT;
atype TEXT;
adef RECORD;
BEGIN
+ IF TG_OP IN ('INSERT','UPDATE') AND NEW.imported_as IS NOT NULL THEN
+ RETURN NEW;
+ END IF;
+
FOR adef IN SELECT * FROM vandelay.bib_attr_definition LOOP
SELECT extract_marc_field('vandelay.queued_bib_record', id, adef.xpath, adef.remove) INTO value FROM vandelay.queued_bib_record WHERE id = NEW.id;
END;
$$ LANGUAGE PLPGSQL;
-CREATE OR REPLACE FUNCTION vandelay.ingest_bib_items ( ) RETURNS TRIGGER AS $func$
-DECLARE
- attr_def BIGINT;
- item_data vandelay.import_item%ROWTYPE;
-BEGIN
-
- SELECT item_attr_def INTO attr_def FROM vandelay.bib_queue WHERE id = NEW.queue;
-
- FOR item_data IN SELECT * FROM vandelay.ingest_items( NEW.id::BIGINT, attr_def ) LOOP
- INSERT INTO vandelay.import_item (
- record,
- definition,
- owning_lib,
- circ_lib,
- call_number,
- copy_number,
- status,
- location,
- circulate,
- deposit,
- deposit_amount,
- ref,
- holdable,
- price,
- barcode,
- circ_modifier,
- circ_as_type,
- alert_message,
- pub_note,
- priv_note,
- opac_visible
- ) VALUES (
- NEW.id,
- item_data.definition,
- item_data.owning_lib,
- item_data.circ_lib,
- item_data.call_number,
- item_data.copy_number,
- item_data.status,
- item_data.location,
- item_data.circulate,
- item_data.deposit,
- item_data.deposit_amount,
- item_data.ref,
- item_data.holdable,
- item_data.price,
- item_data.barcode,
- item_data.circ_modifier,
- item_data.circ_as_type,
- item_data.alert_message,
- item_data.pub_note,
- item_data.priv_note,
- item_data.opac_visible
- );
- END LOOP;
-
- RETURN NULL;
-END;
-$func$ LANGUAGE PLPGSQL;
-
-CREATE OR REPLACE FUNCTION vandelay.match_bib_record ( ) RETURNS TRIGGER AS $func$
-DECLARE
- attr RECORD;
- attr_def RECORD;
- eg_rec RECORD;
- id_value TEXT;
- exact_id BIGINT;
+CREATE OR REPLACE FUNCTION vandelay.cleanup_bib_marc ( ) RETURNS TRIGGER AS $$
BEGIN
-
- DELETE FROM vandelay.bib_match WHERE queued_record = NEW.id;
-
- SELECT * INTO attr_def FROM vandelay.bib_attr_definition WHERE xpath = '//*[@tag="901"]/*[@code="c"]' ORDER BY id LIMIT 1;
-
- IF attr_def IS NOT NULL AND attr_def.id IS NOT NULL THEN
- id_value := extract_marc_field('vandelay.queued_bib_record', NEW.id, attr_def.xpath, attr_def.remove);
-
- IF id_value IS NOT NULL AND id_value <> '' AND id_value ~ $r$^\d+$$r$ THEN
- SELECT id INTO exact_id FROM biblio.record_entry WHERE id = id_value::BIGINT AND NOT deleted;
- SELECT * INTO attr FROM vandelay.queued_bib_record_attr WHERE record = NEW.id and field = attr_def.id LIMIT 1;
- IF exact_id IS NOT NULL THEN
- INSERT INTO vandelay.bib_match (field_type, matched_attr, queued_record, eg_record) VALUES ('id', attr.id, NEW.id, exact_id);
- END IF;
- END IF;
- END IF;
-
- IF exact_id IS NULL THEN
- FOR attr IN SELECT a.* FROM vandelay.queued_bib_record_attr a JOIN vandelay.bib_attr_definition d ON (d.id = a.field) WHERE record = NEW.id AND d.ident IS TRUE LOOP
-
- -- All numbers? check for an id match
- IF (attr.attr_value ~ $r$^\d+$$r$) THEN
- FOR eg_rec IN SELECT * FROM biblio.record_entry WHERE id = attr.attr_value::BIGINT AND deleted IS FALSE LOOP
- INSERT INTO vandelay.bib_match (field_type, matched_attr, queued_record, eg_record) VALUES ('id', attr.id, NEW.id, eg_rec.id);
- END LOOP;
- END IF;
-
- -- Looks like an ISBN? check for an isbn match
- IF (attr.attr_value ~* $r$^[0-9x]+$$r$ AND character_length(attr.attr_value) IN (10,13)) THEN
- FOR eg_rec IN EXECUTE $$SELECT * FROM metabib.full_rec fr WHERE fr.value LIKE evergreen.lowercase('$$ || attr.attr_value || $$%') AND fr.tag = '020' AND fr.subfield = 'a'$$ LOOP
- PERFORM id FROM biblio.record_entry WHERE id = eg_rec.record AND deleted IS FALSE;
- IF FOUND THEN
- INSERT INTO vandelay.bib_match (field_type, matched_attr, queued_record, eg_record) VALUES ('isbn', attr.id, NEW.id, eg_rec.record);
- END IF;
- END LOOP;
-
- -- subcheck for isbn-as-tcn
- FOR eg_rec IN SELECT * FROM biblio.record_entry WHERE tcn_value = 'i' || attr.attr_value AND deleted IS FALSE LOOP
- INSERT INTO vandelay.bib_match (field_type, matched_attr, queued_record, eg_record) VALUES ('tcn_value', attr.id, NEW.id, eg_rec.id);
- END LOOP;
- END IF;
-
- -- check for an OCLC tcn_value match
- IF (attr.attr_value ~ $r$^o\d+$$r$) THEN
- FOR eg_rec IN SELECT * FROM biblio.record_entry WHERE tcn_value = regexp_replace(attr.attr_value,'^o','ocm') AND deleted IS FALSE LOOP
- INSERT INTO vandelay.bib_match (field_type, matched_attr, queued_record, eg_record) VALUES ('tcn_value', attr.id, NEW.id, eg_rec.id);
- END LOOP;
- END IF;
-
- -- check for a direct tcn_value match
- FOR eg_rec IN SELECT * FROM biblio.record_entry WHERE tcn_value = attr.attr_value AND deleted IS FALSE LOOP
- INSERT INTO vandelay.bib_match (field_type, matched_attr, queued_record, eg_record) VALUES ('tcn_value', attr.id, NEW.id, eg_rec.id);
- END LOOP;
-
- -- check for a direct item barcode match
- FOR eg_rec IN
- SELECT DISTINCT b.*
- FROM biblio.record_entry b
- JOIN asset.call_number cn ON (cn.record = b.id)
- JOIN asset.copy cp ON (cp.call_number = cn.id)
- WHERE cp.barcode = attr.attr_value AND cp.deleted IS FALSE
- LOOP
- INSERT INTO vandelay.bib_match (field_type, matched_attr, queued_record, eg_record) VALUES ('id', attr.id, NEW.id, eg_rec.id);
- END LOOP;
-
- END LOOP;
+ IF TG_OP IN ('INSERT','UPDATE') AND NEW.imported_as IS NOT NULL THEN
+ RETURN NEW;
END IF;
- RETURN NULL;
-END;
-$func$ LANGUAGE PLPGSQL;
-
-CREATE OR REPLACE FUNCTION vandelay.cleanup_bib_marc ( ) RETURNS TRIGGER AS $$
-BEGIN
DELETE FROM vandelay.queued_bib_record_attr WHERE record = OLD.id;
DELETE FROM vandelay.import_item WHERE record = OLD.id;
AFTER INSERT OR UPDATE ON vandelay.queued_bib_record
FOR EACH ROW EXECUTE PROCEDURE vandelay.ingest_bib_marc();
-CREATE TRIGGER ingest_item_trigger
- AFTER INSERT OR UPDATE ON vandelay.queued_bib_record
- FOR EACH ROW EXECUTE PROCEDURE vandelay.ingest_bib_items();
-
CREATE TRIGGER zz_match_bibs_trigger
- AFTER INSERT OR UPDATE ON vandelay.queued_bib_record
+ BEFORE INSERT OR UPDATE ON vandelay.queued_bib_record
FOR EACH ROW EXECUTE PROCEDURE vandelay.match_bib_record();
remove TEXT NOT NULL DEFAULT ''
);
+CREATE TYPE vandelay.authority_queue_queue_type AS ENUM ('authority');
CREATE TABLE vandelay.authority_queue (
- queue_type TEXT NOT NULL DEFAULT 'authority' CHECK (queue_type = 'authority'),
+ queue_type vandelay.authority_queue_queue_type NOT NULL DEFAULT 'authority',
CONSTRAINT vand_authority_queue_name_once_per_owner_const UNIQUE (owner,name,queue_type)
) INHERITS (vandelay.queue);
ALTER TABLE vandelay.authority_queue ADD PRIMARY KEY (id);
CREATE TABLE vandelay.authority_match (
id BIGSERIAL PRIMARY KEY,
- matched_set INT REFERENCES vandelay.match_set (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
queued_record BIGINT REFERENCES vandelay.queued_authority_record (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
eg_record BIGINT REFERENCES authority.record_entry (id) DEFERRABLE INITIALLY DEFERRED,
quality INT NOT NULL DEFAULT 0
atype TEXT;
adef RECORD;
BEGIN
+ IF TG_OP IN ('INSERT','UPDATE') AND NEW.imported_as IS NOT NULL THEN
+ RETURN NEW;
+ END IF;
+
FOR adef IN SELECT * FROM vandelay.authority_attr_definition LOOP
SELECT extract_marc_field('vandelay.queued_authority_record', id, adef.xpath, adef.remove) INTO value FROM vandelay.queued_authority_record WHERE id = NEW.id;
CREATE OR REPLACE FUNCTION vandelay.cleanup_authority_marc ( ) RETURNS TRIGGER AS $$
BEGIN
+ IF TG_OP IN ('INSERT','UPDATE') AND NEW.imported_as IS NOT NULL THEN
+ RETURN NEW;
+ END IF;
+
DELETE FROM vandelay.queued_authority_record_attr WHERE record = OLD.id;
IF TG_OP = 'UPDATE' THEN
RETURN NEW;