Skip to content

Commit

Permalink
Merge branch 'fix-other-queries'
Browse files Browse the repository at this point in the history
  • Loading branch information
nathanielks committed Oct 25, 2019
2 parents 61769d6 + 8201399 commit c691faa
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 58 deletions.
63 changes: 40 additions & 23 deletions anonymize-mysqldump.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ func setupAndProcessInput(config Config, input io.Reader) chan chan string {
wg.Wait()
close(lines)
}()

return lines
}

Expand Down Expand Up @@ -124,26 +125,34 @@ func processInput(wg *sync.WaitGroup, input io.Reader, lines chan chan string, c
r := bufio.NewReaderSize(input, 2*1024*1024)
var nextLine string
insertStarted := false
for {
continueLooping := true
for continueLooping {
line, err := r.ReadString('\n')

// First, let's check for any non-EOF errors and break
if err != nil && err != io.EOF {
if err == io.EOF {
// continueLooping is used because line might be populated even when we've
// reached the end of the file, so we set a boolean once the last line is
// being processed to end the loop.
continueLooping = false
} else if err != nil {
// log any other errors and break
logrus.Error(err.Error())
break
}

// If the line is empty, just skip it
if len(line) == 0 {
if err == io.EOF {
break
}
// If the line is shorter than 6 characters, which is the shortest line for
// an insert query, let's skip processing it
if len(line) < 6 {

// TODO I'd love to clean this up so we don't make ch in three different
// places, but that's a task for another day
ch := make(chan string)
lines <- ch
ch <- line
//ch <- line + "\n"
continue
}

// clean up whitespace
line = strings.TrimSpace(line)

// Test if this is an INSERT query. We'll use this to determine if we need
// to concatenate lines together if they're spread apart multiple lines
// instead of on a single line
Expand All @@ -152,37 +161,45 @@ func processInput(wg *sync.WaitGroup, input io.Reader, lines chan chan string, c
insertStarted = true
}

line = strings.TrimSpace(line)
// Now that we've detected this is an INSERT query, let's append the lines
// together to form a single line in the event this spans multiple lines
if insertStarted {
nextLine += line
} else {
// When it's not an insert query, let's add this line and move on without
// processing it
// TODO clean this up too
ch := make(chan string)
lines <- ch
ch <- line + "\n"
continue
}

lastCharacter := line[len(line)-1:]
if lastCharacter != ";" && insertStarted {
if lastCharacter == ";" {
insertStarted = false
} else {
// If we haven't reached a query terminator and and insert query has
// begun, let's move on to the next line
continue
}

// Let's reset
insertStarted = false

// Now let's actually process the line!
wg.Add(1)
ch := make(chan string)
lines <- ch

go func(line string) {
defer wg.Done()
line = processLine(line, config)
ch <- line
}(nextLine)

// We wait until the very end to check if EOF because we may have reached
// EOF and `line` still have a value
if err == io.EOF {
logrus.Debug("Reached EOF, finished processing.")
break
}
// Now let's reset nextLine to empty so that it doesn't continue
// appending lines forever
nextLine = ""
}

}

func processLine(line string, config Config) string {
Expand Down Expand Up @@ -321,7 +338,7 @@ func rowObeysConstraints(constraints []PatternFieldConstraint, row sqlparser.Val
logrus.WithFields(logrus.Fields{
"parsedValue": parsedValue,
"constraint.value": constraint.Value,
}).Debug("Debuging constraint obediance: ")
}).Trace("Debuging constraint obediance: ")
if parsedValue != constraint.Value {
return false
}
Expand Down
85 changes: 50 additions & 35 deletions anonymize-mysqldump_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,34 @@ import (
)

var (
jsonConfig Config
jsonConfig Config
dropAndCreateTable = "DROP TABLE IF EXISTS `wp_options`;\n" +
"/*!40101 SET @saved_cs_client = @@character_set_client */;\n" +
"/*!40101 SET character_set_client = utf8 */;\n" +
"CREATE TABLE `wp_options` (\n" +
"`option_id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,\n" +
"`option_name` varchar(191) NOT NULL DEFAULT '',\n" +
"`option_value` longtext NOT NULL,\n" +
"`autoload` varchar(20) NOT NULL DEFAULT 'yes',\n" +
"PRIMARY KEY (`option_id`),\n" +
"UNIQUE KEY `option_name` (`option_name`)\n" +
") ENGINE=InnoDB AUTO_INCREMENT=123 DEFAULT CHARSET=utf8mb4;\n" +
"/*!40101 SET character_set_client = @saved_cs_client */;"

// Don't forget to escape \ because it'll translate to a newline and not pass
// the comparison test
multilineQuery = `INSERT INTO wp_usermeta VALUES
(1,1,'first_name','John'),(2,1,'last_name','Doe'),
(3,1,'foobar','bazquz'),
(4,1,'nickname','Jim'),
(5,1,'description','Lorum ipsum.');
`
multilineQueryRecompiled = "insert into wp_usermeta values (1, 1, 'first_name', 'Nat'), (2, 1, 'last_name', 'Hermiston'), (3, 1, 'foobar', 'bazquz'), (4, 1, 'nickname', 'Treva'), (5, 1, 'description', 'Enim odio nihil.');\n"
commentsQuery = "INSERT INTO `wp_comments` VALUES (1,1,'A WordPress Commenter','[email protected]','https://wordpress.org/','','2019-06-12 00:59:19','2019-06-12 00:59:19','Hi, this is a comment.\nTo get started with moderating, editing, and deleting comments, please visit the Comments screen in the dashboard.\nCommenter avatars come from <a href=\"https://gravatar.com\">Gravatar</a>.',0,'1','','',0,0);\n"
// Don't forget to escape \ because it'll translate to a newline and not pass
// the comparison test
commentsQueryRecompiled = "insert into wp_comments values (1, 1, 'sam_harvey', '[email protected]', 'http://balistreriwiegand.name/sunny', '', '2019-06-12 00:59:19', '2019-06-12 00:59:19', 'Hi, this is a comment.\\nTo get started with moderating, editing, and deleting comments, please visit the Comments screen in the dashboard.\\nCommenter avatars come from <a href=\\\"https://gravatar.com\\\">Gravatar</a>.', 0, '1', '', '', 0, 0);\n"
usersQuery = "INSERT INTO `wp_users` VALUES (1,'username','user_pass','username','[email protected]','','2019-06-12 00:59:19','',0,'username'),(2,'username','user_pass','username','[email protected]','http://notreal.com/username','2019-06-12 00:59:19','',0,'username');\n"
usersQueryRecompiled = "insert into wp_users values (1, 'fatima.fisher', 'abOSwkVS', 'lillian', '[email protected]', '', '2019-06-12 00:59:19', '', 0, 'Retta Bailey'), (2, 'juwan.kassulke', 'zgtEQA3nm4Wlro', 'evalyn', '[email protected]', 'http://dickensmurphy.info/ophelia', '2019-06-12 00:59:19', '', 0, 'Rick Fahey III');\n"
userMetaQuery = "INSERT INTO `wp_usermeta` VALUES (1,1,'first_name','John'),(2,1,'last_name','Doe'),(3,1,'foobar','bazquz'),(4,1,'nickname','Jim'),(5,1,'description','Lorum ipsum.'),(6,2,'first_name','Janet'),(7,2,'last_name','Doe'),(8,2,'foobar','bazquz'),(9,2,'nickname','Jane'),(10,2,'description','Lorum ipsum.');\n"
userMetaQueryRecompiled = "insert into wp_usermeta values (1, 1, 'first_name', 'Ed'), (2, 1, 'last_name', 'Koelpin'), (3, 1, 'foobar', 'bazquz'), (4, 1, 'nickname', 'Watson'), (5, 1, 'description', 'Qui voluptatum est.'), (6, 2, 'first_name', 'Olen'), (7, 2, 'last_name', 'Williamson'), (8, 2, 'foobar', 'bazquz'), (9, 2, 'nickname', 'Kamren'), (10, 2, 'description', 'Eveniet repellat in.');\n"
(5,1,'description','Lorum ipsum.');`
multilineQueryRecompiled = "insert into wp_usermeta values (1, 1, 'first_name', 'Jazmyn'), (2, 1, 'last_name', 'Reynolds'), (3, 1, 'foobar', 'bazquz'), (4, 1, 'nickname', 'Sherman'), (5, 1, 'description', 'Vel at et.');\n"
commentsQuery = "INSERT INTO `wp_comments` VALUES (1,1,'A WordPress Commenter','[email protected]','https://wordpress.org/','','2019-06-12 00:59:19','2019-06-12 00:59:19','Hi, this is a comment.\\nTo get started with moderating, editing, and deleting comments, please visit the Comments screen in the dashboard.\\nCommenter avatars come from <a href=\\\"https://gravatar.com\\\">Gravatar</a>.',0,'1','','',0,0);\n"
commentsQueryRecompiled = "insert into wp_comments values (1, 1, 'kamren.ohara', '[email protected]', 'http://ebert.com/korey_keeling', '', '2019-06-12 00:59:19', '2019-06-12 00:59:19', 'Hi, this is a comment.\\nTo get started with moderating, editing, and deleting comments, please visit the Comments screen in the dashboard.\\nCommenter avatars come from <a href=\\\"https://gravatar.com\\\">Gravatar</a>.', 0, '1', '', '', 0, 0);\n"
usersQuery = "INSERT INTO `wp_users` VALUES (1,'username','user_pass','username','[email protected]','','2019-06-12 00:59:19','',0,'username'),(2,'username','user_pass','username','[email protected]','http://notreal.com/username','2019-06-12 00:59:19','',0,'username');\n"
usersQueryRecompiled = "insert into wp_users values (1, 'treva_cremin', 'NjaK5HeMAMuv', 'hailey', '[email protected]', '', '2019-06-12 00:59:19', '', 0, 'Kylie Rice'), (2, 'eduardo', 'J3JRQ4XoIxXX6A', 'albert.okeefe', '[email protected]', 'http://pfannerstill.net/brando', '2019-06-12 00:59:19', '', 0, 'Ardella Jenkins PhD');\n"
userMetaQuery = "INSERT INTO `wp_usermeta` VALUES (1,1,'first_name','John'),(2,1,'last_name','Doe'),(3,1,'foobar','bazquz'),(4,1,'nickname','Jim'),(5,1,'description','Lorum ipsum.'),(6,2,'first_name','Janet'),(7,2,'last_name','Doe'),(8,2,'foobar','bazquz'),(9,2,'nickname','Jane'),(10,2,'description','Lorum ipsum.');\n"
userMetaQueryRecompiled = "insert into wp_usermeta values (1, 1, 'first_name', 'Stephania'), (2, 1, 'last_name', 'Hamill'), (3, 1, 'foobar', 'bazquz'), (4, 1, 'nickname', 'Noah'), (5, 1, 'description', 'Dolorum nostrum alias.'), (6, 2, 'first_name', 'Ed'), (7, 2, 'last_name', 'Koelpin'), (8, 2, 'foobar', 'bazquz'), (9, 2, 'nickname', 'Watson'), (10, 2, 'description', 'Qui voluptatum est.');\n"
)

func init() {
Expand All @@ -38,50 +50,53 @@ func BenchmarkProcessLine(b *testing.B) {
}
}

func TestProcessFile(t *testing.T) {
input := bytes.NewBufferString(multilineQuery)

lines := setupAndProcessInput(jsonConfig, input)

var result string
for line := range lines {
result = <-line
}

if result != multilineQueryRecompiled {
t.Error("\nExpected:\n", multilineQueryRecompiled, "\nActual:\n", result)
}
}

func TestApplyConfigToQuery(t *testing.T) {
func TestSetupAndProcessInput(t *testing.T) {

var tests = []struct {
testName string
line string
query string
wants string
}{
{
testName: "users query",
line: usersQuery,
query: usersQuery,
wants: usersQueryRecompiled,
},
{
testName: "usermeta query",
line: userMetaQuery,
query: userMetaQuery,
wants: userMetaQueryRecompiled,
},
{
testName: "comments query",
line: commentsQuery,
query: commentsQuery,
wants: commentsQueryRecompiled,
},
{
testName: "multiline query",
query: multilineQuery,
wants: multilineQueryRecompiled,
},
{
testName: "table creation",
query: dropAndCreateTable,
wants: dropAndCreateTable + "\n",
},
}

for _, test := range tests {
t.Run(test.testName, func(t *testing.T) {
line := processLine(test.line, jsonConfig)
if line != test.wants {
t.Error("\nExpected:\n", test.wants, "\nActual:\n", line)

input := bytes.NewBufferString(test.query)
lines := setupAndProcessInput(jsonConfig, input)

var result string
for line := range lines {
result += <-line
}

if result != test.wants {
t.Error("\nExpected:\n", test.wants, "\nActual:\n", result)
}
})
}
Expand Down

0 comments on commit c691faa

Please sign in to comment.