--- /dev/null
+#!/usr/bin/perl -w
+
+use strict;
+
+my $VERSION = '1.00';
+
+=pod
+
+NAME
+
+ elect_zips - Utility to elect a winning city/state for each ZIP code based on patron data
+
+USAGE
+
+ psql -U evergreen -A -t -F $'\t' -c 'SELECT city, state, post_code FROM actor.usr_address' > raw-csz.tsv
+ elect_zips < raw-csz.tsv > winning-zips.tsv
+
+NOTES
+
+ Given input like "Miami Springs\tFL\t33166\n" derived from patron addresses,
+ this utility will print a city and state for each zip that has the maximum
+ number of occurrences. (It does not attempt to break ties. If there is a tie,
+ the city and state that reaches the maximum first will end up winning.)
+
+ You can also feed the output of elect_zips directly into I<enrich_zips --db US.txt --makezips>
+
+=cut
+
+my %zips;
+
+# Go through the input and tally the city-state combinations for each ZIP code
+while (<>) {
+ chomp;
+ (my $city, my $state, my $zip) = split(/\t/) or next;
+ next unless $zip =~ m/([\d]{5})/; # If it doesn't have 5 digits in a row, it's not a ZIP
+ $zip =~ s/^([\d]{5}).*$/$1/; # We only want the 5-digit ZIP
+ $state = uc($state);
+ $city =~ s/^\s+//;
+ $city =~ s/\s+$//;
+ $zips{$zip}{"$city\t$state"}++;
+}
+
+# Pick and print a winner for each ZIP code
+foreach(sort keys %zips) {
+ my $zip = $_;
+ my $max = 0;
+ my $citystate = "";
+ foreach(keys %{$zips{$zip}}) {
+ if ($zips{$zip}{$_} > $max) {
+ $max = $zips{$zip}{$_};
+ $citystate = $_;
+ }
+ }
+ print "$citystate\t$zip\n";
+}
--- /dev/null
+#!/usr/bin/perl -w
+
+use strict;
+use Getopt::Long;
+
+my $VERSION = '1.01';
+
+=pod
+
+NAME
+
+ enrich_zips - Utility to add county information to city/state/ZIP data (and optionally use zips.txt format)
+
+USAGE
+
+ enrich_zips --db US.txt < citystatezip.tsv
+ enrich_zips --makezips --db US.txt < citystatezip.tsv > zips.txt
+
+NOTES
+
+ Geonames database can be downloaded from http://download.geonames.org/export/zip/US.zip
+
+=cut
+
+my ($db, $makezips, %zips, $warn);
+my $result = GetOptions ("db=s" => \$db,
+ "makezips" => \$makezips,
+ "warn" => \$warn);
+die
+ "Please specify the location of the Geonames database with --db US.txt\n" .
+ "HINT: You can download it at http://download.geonames.org/export/zip/US.zip" .
+ "\n\nProgram halted"
+unless defined($db);
+
+open DB, $db or die "Couldn't open Geonames database $db: $!\n";
+
+# Slurp in the Geonames database
+while (<DB>) {
+ chomp;
+ my @f = split(/\t/);
+ @{$zips{$f[1]}} = @f[4,2,5];
+ # @{$zips{"33166"}} == ("FL", "Miami Springs", "Miami-Dade")
+}
+
+while (<>) {
+
+ chomp;
+ (my $city, my $state, my $zip) = split(/\t/) or next;
+ my $county = "";
+ my ($dbcity, $dbstate);
+
+ if (defined $zips{$zip}) {
+ ($dbstate, $dbcity, $county) = @{$zips{$zip}};
+ }
+
+ if ($warn) {
+ if (!defined $zips{$zip}) {
+ print STDERR "No county data found for ZIP code $zip ($city, $state)\n\n";
+ next;
+ }
+ if ($city ne $dbcity || $state ne $dbstate) {
+ print STDERR "Patron data input says $zip is $city, $state\n".
+ "Geonames database says $zip is $dbcity, $dbstate\n\n";
+ }
+ }
+
+ if ($makezips) {
+ print "|" . join("|", ($state, $city, $zip, "1", "", $county)) . "||\n";
+ } else {
+ print join("\t", ($city, $state, $zip, $county)) . "\n";
+ }
+}