diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala index 11f3740d99a72..35ade136cc607 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala @@ -22,20 +22,16 @@ import scala.util.control.Exception.allCatch import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.analysis.TypeCoercion import org.apache.spark.sql.catalyst.expressions.ExprUtils -import org.apache.spark.sql.catalyst.util.{DateFormatter, TimestampFormatter} +import org.apache.spark.sql.catalyst.util.TimestampFormatter import org.apache.spark.sql.types._ class CSVInferSchema(val options: CSVOptions) extends Serializable { @transient - private lazy val timestampFormatter = TimestampFormatter( + private lazy val timestampParser = TimestampFormatter( options.timestampFormat, options.timeZone, options.locale) - @transient - private lazy val dateFormatter = DateFormatter( - options.dateFormat, - options.locale) private val decimalParser = { ExprUtils.getDecimalParser(options.locale) @@ -108,7 +104,6 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable { compatibleType(typeSoFar, tryParseDecimal(field)).getOrElse(StringType) case DoubleType => tryParseDouble(field) case TimestampType => tryParseTimestamp(field) - case DateType => tryParseDate(field) case BooleanType => tryParseBoolean(field) case StringType => StringType case other: DataType => @@ -164,16 +159,9 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable { } private def tryParseTimestamp(field: String): DataType = { - if ((allCatch opt timestampFormatter.parse(field)).isDefined) { + // This case infers a custom `dataFormat` is set. + if ((allCatch opt timestampParser.parse(field)).isDefined) { TimestampType - } else { - tryParseDate(field) - } - } - - private def tryParseDate(field: String): DataType = { - if ((allCatch opt dateFormatter.parse(field)).isDefined) { - DateType } else { tryParseBoolean(field) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala index 84b2e616a4426..c2b525ad1a9f8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala @@ -187,22 +187,4 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper { Seq("en-US", "ko-KR", "ru-RU", "de-DE").foreach(checkDecimalInfer(_, DecimalType(7, 0))) } - - test("inferring date type") { - var options = new CSVOptions(Map("dateFormat" -> "yyyy/MM/dd"), false, "GMT") - var inferSchema = new CSVInferSchema(options) - assert(inferSchema.inferField(NullType, "2018/12/02") == DateType) - - options = new CSVOptions(Map("dateFormat" -> "MMM yyyy"), false, "GMT") - inferSchema = new CSVInferSchema(options) - assert(inferSchema.inferField(NullType, "Dec 2018") == DateType) - - options = new CSVOptions( - Map("dateFormat" -> "yyyy-MM-dd", "timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss"), - columnPruning = false, - defaultTimeZoneId = "GMT") - inferSchema = new CSVInferSchema(options) - assert(inferSchema.inferField(NullType, "2018-12-03T11:00:00") == TimestampType) - assert(inferSchema.inferField(NullType, "2018-12-03") == DateType) - } }