From: Jason Etheridge Date: Sun, 24 Mar 2019 16:59:26 +0000 (-0400) Subject: migration_tools.name_parse_out_fuller_last_first_middle_and_random_affix2 X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=commitdiff_plain;h=4ec23f88c7f20557b13ec6f5be637e726904e114 migration_tools.name_parse_out_fuller_last_first_middle_and_random_affix2 --- diff --git a/sql/base/base.sql b/sql/base/base.sql index 30951e0..1648ea3 100644 --- a/sql/base/base.sql +++ b/sql/base/base.sql @@ -573,6 +573,98 @@ CREATE OR REPLACE FUNCTION migration_tools.name_parse_out_fuller_last_first_midd END; $$ LANGUAGE PLPGSQL STRICT IMMUTABLE; +CREATE OR REPLACE FUNCTION migration_tools.name_parse_out_fuller_last_first_middle_and_random_affix2 (TEXT) RETURNS TEXT[] AS $$ + DECLARE + full_name TEXT := $1; + temp TEXT; + family_name TEXT := ''; + first_given_name TEXT := ''; + second_given_name TEXT := ''; + suffix TEXT := ''; + prefix TEXT := ''; + BEGIN + temp := BTRIM(full_name); + -- Use values, not structure, for prefix/suffix, unless we come up with a better idea + --IF temp ~ '^\S{2,}\.' THEN + -- prefix := REGEXP_REPLACE(temp, '^(\S{2,}\.).*$','\1'); + -- temp := BTRIM(REGEXP_REPLACE(temp, '^\S{2,}\.(.*)$','\1')); + --END IF; + --IF temp ~ '\S{2,}\.$' THEN + -- suffix := REGEXP_REPLACE(temp, '^.*(\S{2,}\.)$','\1'); + -- temp := REGEXP_REPLACE(temp, '^(.*)\S{2,}\.$','\1'); + --END IF; + IF temp ilike '%MR.%' THEN + prefix := 'Mr.'; + temp := BTRIM(REGEXP_REPLACE( temp, E'MR\.\\s*', '', 'i' )); + END IF; + IF temp ilike '%MRS.%' THEN + prefix := 'Mrs.'; + temp := BTRIM(REGEXP_REPLACE( temp, E'MRS\.\\s*', '', 'i' )); + END IF; + IF temp ilike '%MS.%' THEN + prefix := 'Ms.'; + temp := BTRIM(REGEXP_REPLACE( temp, E'MS\.\\s*', '', 'i' )); + END IF; + IF temp ilike '%DR.%' THEN + prefix := 'Dr.'; + temp := BTRIM(REGEXP_REPLACE( temp, E'DR\.\\s*', '', 'i' )); + END IF; + IF temp ilike '%JR.%' THEN + suffix := 'Jr.'; + temp := BTRIM(REGEXP_REPLACE( temp, E'JR\.\\s*', '', 'i' )); + END IF; + IF temp ilike '%JR,%' THEN + suffix := 'Jr.'; + temp := BTRIM(REGEXP_REPLACE( temp, E'JR,\\s*', ',', 'i' )); + END IF; + IF temp ilike '%SR.%' THEN + suffix := 'Sr.'; + temp := BTRIM(REGEXP_REPLACE( temp, E'SR\.\\s*', '', 'i' )); + END IF; + IF temp ilike '%SR,%' THEN + suffix := 'Sr.'; + temp := BTRIM(REGEXP_REPLACE( temp, E'SR,\\s*', ',', 'i' )); + END IF; + IF temp like '%III%' THEN + suffix := 'III'; + temp := BTRIM(REGEXP_REPLACE( temp, E'III', '' )); + END IF; + IF temp like '%II%' THEN + suffix := 'II'; + temp := BTRIM(REGEXP_REPLACE( temp, E'II', '' )); + END IF; + + IF temp ~ ',' THEN + family_name = BTRIM(REGEXP_REPLACE(temp,'^(.*?,).*$','\1')); + temp := BTRIM(REPLACE( temp, family_name, '' )); + family_name := REPLACE( family_name, ',', '' ); + IF temp ~ ' ' THEN + first_given_name := BTRIM( REGEXP_REPLACE(temp,'^(.+)\s(.+)$','\1') ); + second_given_name := BTRIM( REGEXP_REPLACE(temp,'^(.+)\s(.+)$','\2') ); + ELSE + first_given_name := temp; + second_given_name := ''; + END IF; + ELSE + IF temp ~ '^\S+\s+\S+\s+\S+$' THEN + first_given_name := BTRIM( REGEXP_REPLACE(temp,'^(\S+)\s*(\S+)\s*(\S+)$','\1') ); + second_given_name := BTRIM( REGEXP_REPLACE(temp,'^(\S+)\s*(\S+)\s*(\S+)$','\2') ); + family_name := BTRIM( REGEXP_REPLACE(temp,'^(\S+)\s*(\S+)\s*(\S+)$','\3') ); + ELSE + first_given_name := BTRIM( REGEXP_REPLACE(temp,'^(\S+)\s*(\S+)$','\1') ); + second_given_name := temp; + family_name := BTRIM( REGEXP_REPLACE(temp,'^(\S+)\s*(\S+)$','\2') ); + END IF; + END IF; + + family_name := BTRIM(REPLACE(REPLACE(family_name,',',''),'"','')); + first_given_name := BTRIM(REPLACE(REPLACE(first_given_name,',',''),'"','')); + second_given_name := BTRIM(REPLACE(REPLACE(second_given_name,',',''),'"','')); + + RETURN ARRAY[ family_name, prefix, first_given_name, second_given_name, suffix ]; + END; +$$ LANGUAGE PLPGSQL STRICT IMMUTABLE; + CREATE OR REPLACE FUNCTION migration_tools.address_parse_out_citystatezip (TEXT) RETURNS TEXT[] AS $$ DECLARE city_state_zip TEXT := $1;