Skip to content

Commit

Permalink
Add a snakifyColumns method that uses the snakify method defined in t…
Browse files Browse the repository at this point in the history
…he Lyft web development framework
  • Loading branch information
MrPowers committed Jan 24, 2019
1 parent 308e65b commit 687c1f5
Show file tree
Hide file tree
Showing 7 changed files with 122 additions and 29 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package com.github.mrpowers.spark.daria.sql

import org.apache.spark.sql.Column
import org.apache.spark.sql.{functions=>F}
import org.apache.spark.sql.{functions => F}

object FunctionsAsColumnExt {

Expand All @@ -14,7 +14,12 @@ object FunctionsAsColumnExt {

def lower(): Column = t(F.lower)

def regexp_replace(pattern: String, replacement: String): Column = F.regexp_replace(col, pattern,replacement)
def regexp_replace(pattern: String, replacement: String): Column =
F.regexp_replace(
col,
pattern,
replacement
)

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,36 @@ object transformations {
}
}

/**
* snakifies all the columns of a DataFrame
*
* import com.github.mrpowers.spark.daria.sql.transformations._
*
* {{{
* val sourceDf = Seq(
* ("funny", "joke")
* ).toDF("ThIs", "BiH")
*
* val actualDf = sourceDf.transform(snakeCaseColumns)
*
* actualDf.show()
*
* +-----+----+
* |th_is|bi_h|
* +-----+----+
* |funny|joke|
* +-----+----+
* }}}
*/
def snakifyColumns()(df: DataFrame): DataFrame = {
df.columns.foldLeft(df) { (memoDF, colName) =>
memoDF.withColumnRenamed(
colName,
com.github.mrpowers.spark.daria.utils.StringHelpers.snakify(colName)
)
}
}

/**
* Convert camel case columns to snake case
* Example: SomeColumn -> some_column
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,29 @@ object StringHelpers {
.toLowerCase
}

/**
* Copied from the Lyft Framework https://github.com/lift/framework/blob/master/core/util/src/main/scala/net/liftweb/util/StringHelpers.scala
*
* Turn a string of format "FooBar" into snake case "foo_bar"
*
* Note: snakify is not reversible, ie. in general the following will _not_ be true:
*
* s == camelify(snakify(s))
*
* @return the underscored string
*/
def snakify(name: String) =
name
.replaceAll(
"([A-Z]+)([A-Z][a-z])",
"$1_$2"
)
.replaceAll(
"([a-z\\d])([A-Z])",
"$1_$2"
)
.toLowerCase

def camelCaseToSnakeCase(str: String): String = {
str
.replaceAll(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,33 @@ object TransformationsTest extends TestSuite with DataFrameComparer with ColumnC

}

'snakifyColumns - {

val sourceDF = spark.createDF(
List(("funny", "joke")),
List(
("ThIs", StringType, true),
("BiH", StringType, true)
)
)

val actualDF = sourceDF.transform(transformations.snakifyColumns())

val expectedDF = spark.createDF(
List(("funny", "joke")),
List(
("th_is", StringType, true),
("bi_h", StringType, true)
)
)

assertSmallDataFrameEquality(
actualDF,
expectedDF
)

}

'camelCaseToSnakeCaseColumns - {
"convert camel case columns to snake case" - {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package com.github.mrpowers.spark.daria.utils

import utest._

class ArrayHelpersSpec extends TestSuite {
object ArrayHelpersTest extends TestSuite {

val tests = Tests {

Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package com.github.mrpowers.spark.daria.utils

import utest._

object StringHelpersTest extends TestSuite {

val tests = Tests {

'escapeForSqlRegexp - {
assert(StringHelpers.escapeForSqlRegexp("D/E") == Some("D\\/E"))
assert(StringHelpers.escapeForSqlRegexp("(E/F)") == Some("\\(E\\/F\\)"))
assert(StringHelpers.escapeForSqlRegexp("") == Some(""))
assert(StringHelpers.escapeForSqlRegexp("E|G") == Some("E\\|G"))
assert(StringHelpers.escapeForSqlRegexp("E;;G") == Some("E;;G"))
assert(StringHelpers.escapeForSqlRegexp("^AB-C") == Some("^AB\\-C"))
assert(StringHelpers.escapeForSqlRegexp("^AB+C") == Some("^AB\\+C"))
assert(StringHelpers.escapeForSqlRegexp(null) == None)
}

'toSnakeCase - {
assert(StringHelpers.toSnakeCase("A b C") == "a_b_c")
}

'snakify - {
assert(StringHelpers.snakify("SomeColumn") == "some_column")
}

'camelCaseToSnakeCase - {
assert(StringHelpers.camelCaseToSnakeCase("thisIsCool") == "this_is_cool")
}

}

}

0 comments on commit 687c1f5

Please sign in to comment.