From: Jason Etheridge Date: Fri, 10 Apr 2020 20:47:09 +0000 (-0400) Subject: first cut of csv2mysql, mig-convert, and a needed tweak for mig-sql X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=commitdiff_plain;h=e6feed8797d744a7fdd0965079c2e6f73151ce37 first cut of csv2mysql, mig-convert, and a needed tweak for mig-sql --- diff --git a/kmig.d/bin/csv2mysql b/kmig.d/bin/csv2mysql index 2d74d0a..f131f5b 100755 --- a/kmig.d/bin/csv2mysql +++ b/kmig.d/bin/csv2mysql @@ -1,27 +1,29 @@ #!/usr/bin/perl -w +use strict; +use Switch; +use Env qw( + HOME MYSQL_HOST MYSQL_TCP_PORT MYSQL_USER MYSQL_DATABASE MYSQL_PW + MIGSCHEMA MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use DBI; +use Cwd 'abs_path'; +use FindBin; +my $mig_bin = "$FindBin::Bin/"; +use lib "$FindBin::Bin/"; +use KMig; + use Getopt::Long; use Text::CSV::Auto; use Data::Dumper; -use DBI; use File::Basename; -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); my $dbh; my $cfg; my $csv_config; +my $help; sub init { - if ($PGHOST and $PGPORT and $PGUSER and $PGDATABASE) - { - $dbh = connect_db($PGDATABASE,$PGUSER,undef,$PGHOST) or die $DBI::errstr; - } else { - our %config; - eval `cat /openils/conf/offline-config.pl`; - $dbh = DBI->connect( $config{dsn}, $config{usr}, $config{pw} ) or die $DBI::errstr; - } + $dbh = KMig::db_connect(); $cfg = { schema => 'm_foo', @@ -59,23 +61,6 @@ sub init { } } -sub connect_db { - my ($db, $dbuser, $dbpw, $dbhost) = @_; - - my $dsn = "dbi:Pg:host=$dbhost;dbname=$db;port=5432"; - - my $attrs = { - ShowErrorStatement => 1, - RaiseError => 1, - PrintError => 1, - pg_enable_utf8 => 1, - }; - my $dbh = DBI->connect($dsn, $dbuser, $dbpw, $attrs); - - return $dbh; -} - - sub write_sql_sample { my $cfg = shift; my $info = shift; @@ -104,18 +89,20 @@ sub write_sql_table { print "\twriting table definition\n"; if ($cfg->{parent}) { - $cfg->{table_name} = $cfg->{parent} . '_legacy'; + if ($cfg->{parent} !~ '^m_') { + die "parent table is not a m_ prefixed table; afraid to drop and recreate"; + } + $cfg->{table_name} = $cfg->{parent}; + print $sql "DROP TABLE IF EXISTS $cfg->{table_name};\n"; + my $prod_table = $cfg->{parent}; $prod_table =~ s/^m_//; + print $sql "CREATE TABLE $cfg->{table_name} LIKE $prod_table;\n"; + print $sql "ALTER TABLE $cfg->{table_name} ADD COLUMN x_migrate BOOLEAN;\n"; } else { - $cfg->{table_name} = lc(basename($fn)); $cfg->{table_name} =~ s/[\-\. ]/_/g; + $cfg->{table_name} = 'm_' . lc(basename($fn)); $cfg->{table_name} =~ s/[\-\. ]/_/g; + print $sql "DROP TABLE IF EXISTS $cfg->{table_name};\n"; + print $sql "CREATE TABLE $cfg->{table_name} (x_migrate BOOLEAN);\n"; } - print $sql "DROP TABLE IF EXISTS $cfg->{schema}.$cfg->{table_name};\n"; - print $sql "CREATE UNLOGGED TABLE $cfg->{schema}.$cfg->{table_name} (\n"; - my $idx = 0; - if ($cfg->{add_x_migrate}) { - print $sql " x_migrate BOOLEAN\n"; - $idx++; - push @indices, 'x_migrate'; - } + push @indices, 'x_migrate'; foreach my $column (@{ $info }) { my $cn = $column->{'header'}; if ($cn =~ /^x_/) { @@ -123,18 +110,13 @@ sub write_sql_table { } my $col_info = Dumper($column); $col_info =~ s/^\$VAR1 = //; - print $sql " " . ($idx++ ? ',' : ' '); + print $sql "ALTER TABLE $cfg->{table_name} ADD COLUMN "; print $sql "l_" unless $cfg->{no_legacy_prefix} or $column->{'header'} =~ /^x_/ or $column->{'header'} =~ /^l_/; - print $sql "$cn " . ($cn eq 'x_eg_bib_id' ? 'BIGINT' : 'TEXT'); + print $sql "$cn TEXT; "; print $sql " /*\n $col_info */\n"; } - if ($cfg->{parent}) { - print $sql ') INHERITS (' . $cfg->{schema} . '.' . $cfg->{parent} . ");\n"; - } else { - print $sql ");\n"; - } foreach my $cn (@indices) { - print $sql "CREATE INDEX ON $cfg->{schema}.$cfg->{table_name} ($cn);\n"; + print $sql "CREATE INDEX " . $cn . "_idx ON $cfg->{table_name} ($cn);\n"; } } @@ -146,25 +128,28 @@ sub write_sql_loader { my $fn = $cfg->{auto_options}->{file} . ($cfg->{use_no_headers_file} ? '.no_headers' : ''); print "\twriting copy statement\n"; - print $sql "\n\\COPY $cfg->{schema}.$cfg->{table_name} ("; - my $idx = 0; - foreach my $column (@{ $info }) { - print $sql ($idx++ ? ',' : ''); - print $sql "l_" unless $cfg->{no_legacy_prefix} or $column->{'header'} =~ /^x_/ or $column->{'header'} =~ /^l_/; - print $sql $column->{'header'}; - } - print $sql ") FROM '$fn'"; + print $sql "\nLOAD DATA LOCAL INFILE '$fn' INTO TABLE $cfg->{table_name} "; if ($auto->csv->sep_char eq chr(9) && ! defined $auto->csv->quote_char && ! defined $auto->csv->escape_char) { # true .tsv, don't treat as csv } elsif ($auto->csv->sep_char eq chr(9)) { # probably good enough .tsv, don't treat as csv } else { - print $sql " WITH csv " . ($cfg->{use_no_headers_file} ? "" : "header"); - print $sql " delimiter " . $dbh->quote( $auto->csv->sep_char ) unless $dbh->quote( $auto->csv->sep_char ) eq 'NULL'; - print $sql " quote " . $dbh->quote( $auto->csv->quote_char ) unless $dbh->quote( $auto->csv->quote_char ) eq 'NULL'; - print $sql " escape " . $dbh->quote( $auto->csv->escape_char ) unless $dbh->quote( $auto->csv->escape_char ) eq 'NULL'; + print $sql "FIELDS"; + print $sql " TERMINATED BY " . $dbh->quote( $auto->csv->sep_char ) unless $dbh->quote( $auto->csv->sep_char ) eq 'NULL'; + print $sql " OPTIONALLY ENCLOSED BY " . $dbh->quote( $auto->csv->quote_char ) unless $dbh->quote( $auto->csv->quote_char ) eq 'NULL'; + print $sql " ESCAPED BY " . $dbh->quote( $auto->csv->escape_char ) unless $dbh->quote( $auto->csv->escape_char ) eq 'NULL'; + if (!$cfg->{use_no_headers_file}) { + print $sql " IGNORE 1 LINES " + } + } + my $idx = 0; + print $sql "("; + foreach my $column (@{ $info }) { + print $sql ($idx++ ? ',' : ''); + print $sql "l_" unless $cfg->{no_legacy_prefix} or $column->{'header'} =~ /^x_/ or $column->{'header'} =~ /^l_/; + print $sql $column->{'header'}; } - print $sql "\n"; + print $sql ");\n"; } sub main { diff --git a/kmig.d/bin/mig-convert b/kmig.d/bin/mig-convert index 215cd3f..3976f20 100755 --- a/kmig.d/bin/mig-convert +++ b/kmig.d/bin/mig-convert @@ -6,7 +6,7 @@ mig-convert -Attempts to invoke B on the .utf8.clean version of the specified +Attempts to invoke B on the .utf8.clean version of the specified tracked file, creating either [file].utf8.clean.stage.sql or _stage.sql depending on whether the file has been linked to a parent table within the migration schema or not. @@ -15,7 +15,7 @@ If given no other arguments, the invocation will lool like =over 5 -csv2sql --config scripts/clean.conf --add-x-migrate --schema [--parent ] --outfile <[.utf8.clean.stage.sql]|[parent_table_stage.sql]> .utf8.clean +csv2mysql --config scripts/clean.conf --add-x-migrate --schema [--parent ] --outfile <[.utf8.clean.stage.sql]|[parent_table_stage.sql]> .utf8.clean =back @@ -23,7 +23,7 @@ otherwise, the arguments will be passed through like so =over 5 -csv2sql [other arguments...] --schema [--parent ] --outfile <[.utf8.clean.stage.sql]|[parent_table_stage.sql]> .utf8.clean +csv2mysql [other arguments...] --schema [--parent ] --outfile <[.utf8.clean.stage.sql]|[parent_table_stage.sql]> .utf8.clean =back @@ -38,8 +38,8 @@ B [other arguments...] use strict; use Switch; use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR + HOME MYSQL_HOST MYSQL_TCP_PORT MYSQL_USER MYSQL_DATABASE MYSQL_PW + MIGSCHEMA MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR ); use Pod::Usage; use DBI; @@ -47,12 +47,12 @@ use Cwd 'abs_path'; use FindBin; my $mig_bin = "$FindBin::Bin/"; use lib "$FindBin::Bin/"; -use EMig; +use KMig; pod2usage(-verbose => 2) if ! $ARGV[0] || $ARGV[0] eq '--help'; -EMig::die_if_no_env_migschema(); -EMig::die_if_mig_tracking_table_does_not_exist(); +KMig::die_if_no_env_migschema(); +KMig::die_if_mig_tracking_table_does_not_exist(); my $file = abs_path($ARGV[0]); if ($file =~ /^$MIGBASEWORKDIR/) { @@ -70,9 +70,9 @@ sub call_convert_csv { my @args = @_; my $stage_sql_filename; - my $tracked_file_id = EMig::check_for_tracked_file($file); + my $tracked_file_id = KMig::check_for_tracked_file($file); if ($tracked_file_id) { - my $data = EMig::status_this_file($file); + my $data = KMig::status_this_file($file); if (! $data->{'utf8_filename'}) { die "mig-iconv or mig-skip-iconv needed for UTF8 version of file: $file\n"; @@ -110,21 +110,21 @@ sub call_convert_csv { push @args, $stage_sql_filename; print "args: " . join(',',@args) . "\n"; - system('csv2sql', @args, $clean_file); + system($mig_bin . 'csv2mysql', @args, $clean_file); - my $dbh = EMig::db_connect(); + my $dbh = KMig::db_connect(); if (! -e $stage_sql_filename) { print "SQL converted file does not exist: $stage_sql_filename\n"; $stage_sql_filename = ''; } my $rv = $dbh->do(" - UPDATE $MIGSCHEMA.tracked_file + UPDATE m_tracked_file SET stage_sql_filename = " . $dbh->quote($stage_sql_filename) . " WHERE base_filename = " . $dbh->quote($file) . " ; ") || die "Error updating table $MIGSCHEMA.tracked_file: $!\n"; - EMig::db_disconnect($dbh); + KMig::db_disconnect($dbh); } else { print "File not currently tracked: $file\n"; } diff --git a/kmig.d/bin/mig-sql b/kmig.d/bin/mig-sql index 993826b..f414aa9 100755 --- a/kmig.d/bin/mig-sql +++ b/kmig.d/bin/mig-sql @@ -36,6 +36,7 @@ my @MYARGV = ( ,'--port=' . $MYSQL_TCP_PORT ,'--user=' . $MYSQL_USER ,'--password=' . $MYSQL_PW + ,'--local-infile' ,"--init-command=set \@migschema = \"$MIGSCHEMA\";" ,$MYSQL_DATABASE );