seed kmig-clean with mig-clean
authorJason Etheridge <jason@equinoxinitiative.org>
Fri, 10 Apr 2020 17:51:52 +0000 (13:51 -0400)
committerJason Etheridge <jason@equinoxinitiative.org>
Fri, 10 Apr 2020 17:51:52 +0000 (13:51 -0400)
kmig.d/bin/kmig-clean [new file with mode: 0755]

diff --git a/kmig.d/bin/kmig-clean b/kmig.d/bin/kmig-clean
new file mode 100755 (executable)
index 0000000..b9cb013
--- /dev/null
@@ -0,0 +1,127 @@
+#!/usr/bin/perl -w
+###############################################################################
+=pod
+
+=head1 NAME
+
+mig-clean 
+
+Attempts to invoke B<clean_csv> on the specified tracked file, placing the
+output in [file].clean
+
+If given no other arguments, the invocation will lool like
+
+=over 5
+
+clean_csv --config scripts/clean.conf --fix --apply [--create-headers|--use-headers <hfile>] <file>
+
+=back
+
+otherwise, the arguments will be passed through like so
+
+=over 5
+
+clean_csv [other arguments...] <file>
+
+=back
+
+You'll need to invoke B<mig-iconv> or B<mig-skip-iconv> prior to using commands
+like B<mig-clean>
+
+=head1 SYNOPSIS
+
+B<mig-clean> <file> [other arguments...]
+
+=cut
+
+###############################################################################
+
+use strict;
+use Switch;
+use Env qw(
+    HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA
+    MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR
+);
+use Pod::Usage;
+use DBI;
+use Cwd 'abs_path';
+use FindBin;
+my $mig_bin = "$FindBin::Bin/";
+use lib "$FindBin::Bin/";
+use Mig;
+
+pod2usage(-verbose => 2) if ! $ARGV[0] || $ARGV[0] eq '--help';
+
+Mig::die_if_no_env_migschema();
+Mig::die_if_mig_tracking_table_does_not_exist();
+
+my $file = abs_path($ARGV[0]);
+if ($file =~ /^$MIGBASEWORKDIR/) {
+    call_clean_csv(@ARGV);
+} else {
+    print "File falls outside of MIGWORKDIR ($MIGWORKDIR): $file\n";
+}
+
+exit 0;
+
+###############################################################################
+
+sub call_clean_csv {
+    my $file = abs_path(shift);
+    my @args = @_;
+
+    my $tracked_file_id = Mig::check_for_tracked_file($file);
+    if ($tracked_file_id) {
+        my $data = Mig::status_this_file($file);
+
+        if (! $data->{'utf8_filename'}) {
+            die "mig-iconv or mig-skip-iconv needed for UTF8 version of file: $file\n";
+        }
+
+        my $utf8_file = $data->{'utf8_filename'};
+        if (! -e $utf8_file) {
+            die "missing file: $utf8_file\n";
+        }
+
+        print "cleaning tracked file: $file\n";
+
+        if (scalar(@args) == 0) {
+            @args = (
+                 '--config'
+                ,'scripts/clean.conf'
+                ,'--fix'
+                ,'--apply'
+                ,'--backslash'
+                ,'--pad'
+            );
+            if (! $data->{'has_headers'}) {
+                if ($data->{'headers_file'}) {
+                    push @args, '--use-headers';
+                    push @args, $data->{'headers_file'};
+                } else {
+                    push @args, '--create-headers';
+                }
+            }
+        }
+
+        print join(' ',@args) . "\n";
+        system('clean_csv', @args, $utf8_file);
+
+        my $dbh = Mig::db_connect();
+        my $clean_file = $dbh->quote($utf8_file . '.clean');
+        if (! -e $utf8_file . '.clean') {
+            print "clean file does not exist: $clean_file\n";
+            $clean_file = $dbh->quote('');
+        }
+
+        my $rv = $dbh->do("
+            UPDATE $MIGSCHEMA.tracked_file
+            SET clean_filename = $clean_file
+            WHERE base_filename = " . $dbh->quote($file) . "
+            ;
+        ") || die "Error inserting into table $MIGSCHEMA.tracked_file: $!\n";
+        Mig::db_disconnect($dbh);
+    } else {
+        print "File not currently tracked: $file\n";
+    }
+}