diff --git a/bin/pt-table-sync b/bin/pt-table-sync index 33c4fab20..613e29a89 100755 --- a/bin/pt-table-sync +++ b/bin/pt-table-sync @@ -3612,6 +3612,14 @@ sub make_UPDATE { @cols = $self->sort_cols($row); } my $types = $self->{tbl_struct}->{type_for}; + + foreach my $col ( @cols ) { + my $is_json = ($types->{$col} || '') =~ m/json/i; + if ( $is_json && defined $row->{$col} && not utf8::is_utf8($row->{$col}) ) { + utf8::decode($row->{$col}); + } + } + return "UPDATE $self->{dst_db_tbl} SET " . join(', ', map { my $is_hex = ($types->{$_} || '') =~ m/^0x[0-9a-fA-F]+$/i; @@ -3660,6 +3668,13 @@ sub make_row { my $q = $self->{Quoter}; my $type_for = $self->{tbl_struct}->{type_for}; + foreach my $col ( @cols ) { + my $is_json = ($type_for->{$col} || '') =~ m/json/i; + if ( $is_json && defined $row->{$col} && not utf8::is_utf8($row->{$col}) ) { + utf8::decode($row->{$col}); + } + } + return "$verb INTO $self->{dst_db_tbl}(" . join(', ', map { $q->quote($_) } @cols) . ') VALUES (' diff --git a/lib/ChangeHandler.pm b/lib/ChangeHandler.pm index c09c5148a..51b299187 100644 --- a/lib/ChangeHandler.pm +++ b/lib/ChangeHandler.pm @@ -324,6 +324,16 @@ sub make_UPDATE { @cols = $self->sort_cols($row); } my $types = $self->{tbl_struct}->{type_for}; + + # MySQL uses utf8mb4 for all strings in JSON, but + # DBD::mysql does not decode it accordingly + foreach my $col ( @cols ) { + my $is_json = ($types->{$col} || '') =~ m/json/i; + if ( $is_json && defined $row->{$col} && not utf8::is_utf8($row->{$col}) ) { + utf8::decode($row->{$col}); + } + } + return "UPDATE $self->{dst_db_tbl} SET " . join(', ', map { my $is_hex = ($types->{$_} || '') =~ m/^0x[0-9a-fA-F]+$/i; @@ -403,6 +413,15 @@ sub make_row { my $q = $self->{Quoter}; my $type_for = $self->{tbl_struct}->{type_for}; + # MySQL uses utf8mb4 for all strings in JSON, but + # DBD::mysql does not decode it accordingly + foreach my $col ( @cols ) { + my $is_json = ($type_for->{$col} || '') =~ m/json/i; + if ( $is_json && defined $row->{$col} && not utf8::is_utf8($row->{$col}) ) { + utf8::decode($row->{$col}); + } + } + return "$verb INTO $self->{dst_db_tbl}(" . join(', ', map { $q->quote($_) } @cols) . ') VALUES (' diff --git a/t/lib/ChangeHandler.t b/t/lib/ChangeHandler.t index 311090794..5d7654cf4 100644 --- a/t/lib/ChangeHandler.t +++ b/t/lib/ChangeHandler.t @@ -7,6 +7,7 @@ BEGIN { }; use strict; +use utf8; use warnings FATAL => 'all'; use English qw(-no_match_vars); use Test::More; @@ -538,6 +539,52 @@ SKIP: { ); } +# ############################################################################# +# PT-2377: pt-table-sync must handle utf8 in JSON columns correctly +# ############################################################################# +SKIP: { + skip 'Cannot connect to sandbox master', 1 unless $master_dbh; + $master_dbh->do('DROP TABLE IF EXISTS `test`.`pt-2377`'); + $master_dbh->do('CREATE TABLE `test`.`pt-2377` (id INT, data JSON)'); + $master_dbh->do(q/INSERT INTO `test`.`pt-2377` VALUES (1, '{"name": "Müller"}')/); + $master_dbh->do(q/INSERT INTO `test`.`pt-2377` VALUES (2, NULL)/); + + @rows = (); + $tbl_struct = { + cols => [qw(id data)], + col_posn => {id=>0, data=>1}, + type_for => {id=>'int', data=>'json'}, + }; + $ch = new ChangeHandler( + Quoter => $q, + left_db => 'test', + left_tbl => 'pt-2377', + right_db => 'test', + right_tbl => 'pt-2377', + actions => [ sub { push @rows, $_[0]; } ], + replace => 0, + queue => 0, + tbl_struct => $tbl_struct, + ); + $ch->fetch_back($master_dbh); + + $ch->change('UPDATE', {id=>1}, [qw(id)] ); + $ch->change('INSERT', {id=>1}, [qw(id)] ); + $ch->change('UPDATE', {id=>2}, [qw(id)] ); + $ch->change('INSERT', {id=>2}, [qw(id)] ); + + is_deeply( + \@rows, + [ + q/UPDATE `test`.`pt-2377` SET `data`='{"name": "Müller"}' WHERE `id`='1' LIMIT 1/, + q/INSERT INTO `test`.`pt-2377`(`id`, `data`) VALUES ('1', '{"name": "Müller"}')/, + q/UPDATE `test`.`pt-2377` SET `data`=NULL WHERE `id`='2' LIMIT 1/, + q/INSERT INTO `test`.`pt-2377`(`id`, `data`) VALUES ('2', NULL)/ + ], + "UPDATE and INSERT quote data regardless of how it looks if tbl_struct->quote_val is true" + ); +} + # ############################################################################# # Done. # ############################################################################# diff --git a/t/pt-table-sync/pt-2377.t b/t/pt-table-sync/pt-2377.t new file mode 100644 index 000000000..5f93ede39 --- /dev/null +++ b/t/pt-table-sync/pt-2377.t @@ -0,0 +1,80 @@ +#!/usr/bin/env perl + +BEGIN { + die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n" + unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH}; + unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib"; +}; + +use strict; +use warnings FATAL => 'all'; +use English qw(-no_match_vars); +use Test::More; + +use PerconaTest; +use Sandbox; +require "$trunk/bin/pt-table-sync"; + +my $dp = new DSNParser(opts=>$dsn_opts); +my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp); +my $source_dbh = $sb->get_dbh_for('source'); +my $replica1_dbh = $sb->get_dbh_for('replica1'); + +if ( !$source_dbh ) { + plan skip_all => 'Cannot connect to sandbox source'; +} +elsif ( !$replica1_dbh ) { + plan skip_all => 'Cannot connect to sandbox replica1'; +} +else { + plan tests => 3; +} + +my $output; + +# ############################################################################# +# Test generated REPLACE statements. +# ############################################################################# +$sb->load_file('source', "t/pt-table-sync/samples/pt-2377.sql"); +$sb->wait_for_replicas(); +$replica1_dbh->do("delete from `test`.`test_table` where `id`=1"); + +$output = remove_traces(output( + sub { pt_table_sync::main('--sync-to-source', + 'h=127.0.0.1,P=12346,u=msandbox,p=msandbox', + qw(-t test.test_table --print --execute)) + }, +)); +chomp($output); +is( + $output, + q/REPLACE INTO `test`.`test_table`(`id`, `data`) VALUES ('1', '{"name": "Müller"}');/, + "UTF8 characters of JSON values are printed correctly in REPLACE statements" +); + +# ############################################################################# +# Test generated UPDATE statements. +# ############################################################################# +$sb->load_file('source', "t/pt-table-sync/samples/pt-2377.sql"); +$sb->wait_for_replicas(); +$replica1_dbh->do(q/update `test`.`test_table` set `data`='{"reaction": "哈哈哈"}' where `id`=2/); + +$output = remove_traces(output( + sub { pt_table_sync::main(qw(--print --execute), + "h=127.0.0.1,P=12346,u=msandbox,p=msandbox,D=test,t=test_table", + "h=127.0.0.1,P=12345,u=msandbox,p=msandbox,D=test,t=test_table"); + } +)); +chomp($output); +is( + $output, + q/UPDATE `test`.`test_table` SET `data`='{"reaction": "哈哈哈"}' WHERE `id`='2' LIMIT 1;/, + "UTF8 characters of JSON values are printed correctly in UPDATE statements" +); + +# ############################################################################# +# Done. +# ############################################################################# +$sb->wipe_clean($source_dbh); +ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox"); +exit; diff --git a/t/pt-table-sync/samples/pt-2377.sql b/t/pt-table-sync/samples/pt-2377.sql new file mode 100644 index 000000000..084335118 --- /dev/null +++ b/t/pt-table-sync/samples/pt-2377.sql @@ -0,0 +1,14 @@ +DROP DATABASE IF EXISTS test; +CREATE DATABASE test; +USE test; + +CREATE TABLE `test_table` ( + `id` INT AUTO_INCREMENT PRIMARY KEY, + `data` JSON NOT NULL +) ENGINE=InnoDB; + +INSERT INTO + `test_table` (`data`) +VALUES + ('{"name": "Müller"}'), + ('{"reaction": "哈哈"}');