Skip to content

Commit

Permalink
save progress
Browse files Browse the repository at this point in the history
  • Loading branch information
andygrove committed May 2, 2024
1 parent 23847cd commit 4bf6c16
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 16 deletions.
4 changes: 3 additions & 1 deletion common/src/main/scala/org/apache/comet/CometConf.scala
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,9 @@ object CometConf {

val COMET_CAST_ALLOW_INCOMPATIBLE: ConfigEntry[Boolean] =
conf("spark.comet.cast.allowIncompatible")
.doc("Comet is not currently fully compatible with Spark for all cast operations.")
.doc(
"Comet is not currently fully compatible with Spark for all cast operations. Set this config " +
"to true to allow them anyway. See compatibility guide for more information.")
.booleanConf
// TODO change this to false and set this config explicitly in tests where needed
.createWithDefault(true)
Expand Down
20 changes: 11 additions & 9 deletions spark/src/main/scala/org/apache/comet/expressions/CometCast.scala
Original file line number Diff line number Diff line change
Expand Up @@ -62,16 +62,14 @@ object CometCast {
Incompatible
case (DataTypes.TimestampType, DataTypes.LongType) =>
Incompatible
case (DataTypes.BinaryType | DataTypes.FloatType, DataTypes.StringType) =>
Incompatible
case (DataTypes.StringType, DataTypes.BinaryType) =>
case (DataTypes.BinaryType, DataTypes.StringType) =>
Incompatible
case (_: DecimalType, _: DecimalType) =>
// TODO we need to file an issue for adding specific tests for casting
// between decimal types with different precision and scale
Compatible
Incompatible
case (DataTypes.StringType, _) =>
canCastFromString(toType, timeZoneId)
canCastFromString(toType, timeZoneId, evalMode)
case (_, DataTypes.StringType) =>
canCastToString(fromType)
case (DataTypes.TimestampType, _) =>
Expand All @@ -92,7 +90,10 @@ object CometCast {
}
}

private def canCastFromString(toType: DataType, timeZoneId: Option[String]): SupportLevel = {
private def canCastFromString(
toType: DataType,
timeZoneId: Option[String],
evalMode: String): SupportLevel = {
toType match {
case DataTypes.BooleanType =>
Compatible
Expand All @@ -111,8 +112,9 @@ object CometCast {
// https://github.com/apache/datafusion-comet/issues/327
Unsupported
case DataTypes.TimestampType if !timeZoneId.contains("UTC") =>
UnsupportedWithReason(
s"Unsupported timezone $timeZoneId for cast from string to timestamp")
UnsupportedWithReason(s"Unsupported timezone $timeZoneId")
case DataTypes.TimestampType if evalMode == "ANSI" =>
UnsupportedWithReason(s"ANSI mode not supported")
case DataTypes.TimestampType =>
// https://github.com/apache/datafusion-comet/issues/328
Incompatible
Expand All @@ -131,7 +133,7 @@ object CometCast {
case DataTypes.TimestampType => Compatible
case DataTypes.FloatType | DataTypes.DoubleType =>
// https://github.com/apache/datafusion-comet/issues/326
Unsupported
Incompatible
case _ => Unsupported
}
}
Expand Down
15 changes: 9 additions & 6 deletions spark/src/test/scala/org/apache/comet/CometCastSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,6 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
} else if (!testExists) {
fail(s"Missing test: $expectedTestName")
}
} else if (testExists) {
fail(s"Found test for cast that Spark does not support: $expectedTestName")
}
}
}
Expand Down Expand Up @@ -543,10 +541,15 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {

test("cast StringType to TimestampType disabled by default") {
val values = Seq("2020-01-01T12:34:56.123456", "T2").toDF("a")
castFallbackTest(
values.toDF("a"),
DataTypes.TimestampType,
"spark.comet.cast.stringToTimestamp is disabled")
castFallbackTest(values.toDF("a"), DataTypes.TimestampType, "Unsupported timezone")
}

ignore("cast StringType to TimestampType (fuzz test)") {
// https://github.com/apache/datafusion-comet/issues/328
withSQLConf((CometConf.COMET_CAST_ALLOW_INCOMPATIBLE.key, "true")) {
val values = Seq("2020-01-01T12:34:56.123456", "T2") ++ generateStrings(timestampPattern, 8)
castTest(values.toDF("a"), DataTypes.TimestampType)
}
}

test("cast StringType to TimestampType") {
Expand Down

0 comments on commit 4bf6c16

Please sign in to comment.