From f30f192052915811f2446d44f1614d86455e4763 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 13 Feb 2024 07:45:52 -0700 Subject: [PATCH 1/8] Add some basic fuzz testing for cast operations --- .../org/apache/comet/CometCastSuite.scala | 64 ++++++++++++++++++ spark/test.parquet/._SUCCESS.crc | Bin 0 -> 8 bytes ...-a8ac-edeeb67278ce-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-a8ac-edeeb67278ce-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...-a8ac-edeeb67278ce-c000.snappy.parquet.crc | Bin 0 -> 12 bytes spark/test.parquet/_SUCCESS | 0 ...4b9b-a8ac-edeeb67278ce-c000.snappy.parquet | Bin 0 -> 451 bytes ...4b9b-a8ac-edeeb67278ce-c000.snappy.parquet | Bin 0 -> 459 bytes ...4b9b-a8ac-edeeb67278ce-c000.snappy.parquet | Bin 0 -> 466 bytes 9 files changed, 64 insertions(+) create mode 100644 spark/src/test/scala/org/apache/comet/CometCastSuite.scala create mode 100644 spark/test.parquet/._SUCCESS.crc create mode 100644 spark/test.parquet/.part-00000-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet.crc create mode 100644 spark/test.parquet/.part-00001-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet.crc create mode 100644 spark/test.parquet/.part-00002-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet.crc create mode 100644 spark/test.parquet/_SUCCESS create mode 100644 spark/test.parquet/part-00000-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet create mode 100644 spark/test.parquet/part-00001-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet create mode 100644 spark/test.parquet/part-00002-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala new file mode 100644 index 000000000..1a319fb61 --- /dev/null +++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.comet + +import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper +import org.apache.spark.sql.{CometTestBase, SaveMode} +import org.apache.spark.sql.functions.col +import org.apache.spark.sql.types.{DataType, DataTypes} + +import scala.util.Random + +class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper { + import testImplicits._ + + ignore("cast string to short") { + fuzzTest("0123456789e+- \t\r\n", 8, DataTypes.ShortType) + } + + ignore("cast string to date") { + fuzzTest("0123456789/ \t\r\n", 16, DataTypes.DateType) + } + + ignore("cast string to timestamp") { + castTest(Seq("2020-01-01T12:34:56.123456", "T2"), DataTypes.TimestampType) + fuzzTest("0123456789/:T \t\r\n", 32, DataTypes.TimestampType) + } + + private def genString(r: Random, chars: String, maxLen: Int): String = { + val len = r.nextInt(maxLen) + Range(0,len).map(_ => chars.charAt(r.nextInt(chars.length))).mkString + } + + private def fuzzTest(chars: String, maxLen: Int, toType: DataType) { + val r = new Random(0) + val inputs = Range(0, 10000).map(_ => genString(r, chars, maxLen)) + castTest(inputs, toType) + } + + private def castTest(inputs: Seq[String], toType: DataType) { + val filename = s"/tmp/castTest_${System.currentTimeMillis()}.parquet" + inputs.toDF("str").write.mode(SaveMode.Overwrite).parquet(filename) + val df = spark.read.parquet(filename) + .withColumn("converted", col("str").cast(toType)) + checkSparkAnswer(df) + } + +} diff --git a/spark/test.parquet/._SUCCESS.crc b/spark/test.parquet/._SUCCESS.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c GIT binary patch literal 8 PcmYc;N@ieSU}69O2$TUk literal 0 HcmV?d00001 diff --git a/spark/test.parquet/.part-00000-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet.crc b/spark/test.parquet/.part-00000-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..f8efc0c13b3d1dc06e40ccf8c63d9a878996238d GIT binary patch literal 12 TcmYc;N@ieSU}89Sy($O*6k`LV literal 0 HcmV?d00001 diff --git a/spark/test.parquet/.part-00001-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet.crc b/spark/test.parquet/.part-00001-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..50972a93892625371d4a5f3290a2fe87bdb6e240 GIT binary patch literal 12 TcmYc;N@ieSU}CuX;Mh|D73Bn* literal 0 HcmV?d00001 diff --git a/spark/test.parquet/.part-00002-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet.crc b/spark/test.parquet/.part-00002-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..850582bebba1194868e5fe7c0f2323845cd99c9f GIT binary patch literal 12 TcmYc;N@ieSU}6YqbvO?I5v~Ix literal 0 HcmV?d00001 diff --git a/spark/test.parquet/_SUCCESS b/spark/test.parquet/_SUCCESS new file mode 100644 index 000000000..e69de29bb diff --git a/spark/test.parquet/part-00000-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet b/spark/test.parquet/part-00000-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..50b1444b2978f324a6bae950b1ffe6c0eeaae67f GIT binary patch literal 451 zcmZWm!AiqG5S?zZ1gQt1yJRIuSZHabE=|+aQoML8(nAr!gNSUBZ7rs0o1}`AdiF#7 z3IEDZa1tsWybSZ^&Aj*6om|~G6zB;(rJvs)UxyYY2rJY8IO-4pD3{7uovzD|2{+4m zQ&dVg1K<`j=olM7+#Xfal}er;)8I)j6eU2bbz+v{I*ct`VT4=UY8V!+at|s7M~V_g z*ociA#$_Q}6b8-QsOZrG1ED(s-xa;#W}&x4YfL4$SMl-}xf3_W^B3I$i17O@A|wLVG$0 r106bpZqN&ysUPZ~9|XSZ3_Q=h@IBRWbYF*kzs0TmIg=8=1i$YG>9%V> literal 0 HcmV?d00001 diff --git a/spark/test.parquet/part-00001-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet b/spark/test.parquet/part-00001-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..089e3c2717b201516997413f7cbf797669ef9c4f GIT binary patch literal 459 zcmZWmO>4qH5S?tTr$XLb?EoU=U36D1Ywby0B0Qn04GKPYtwCe{G4#B zuP+uW|2HZ5@CfQ~EOw)C7@u$sn^ uGwwy9j@({5?1b((h;-Nu!@zTUzVBTGzG}I;tD|mE=l1SR*9O1{zwrlZHEm4* literal 0 HcmV?d00001 diff --git a/spark/test.parquet/part-00002-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet b/spark/test.parquet/part-00002-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d6805f1db6191c68827f49e2b83c2f024304f1be GIT binary patch literal 466 zcmZWm%SyvQ6ulj3$)X@aXUIU1FwoLM9Ga$&P~5l^=_0sM5RpkTt;IBLlT?w?AMgYG z82>`Q$Xgq6;b!hVbM8IoFcVx{yAc?5PxQ|EA{Y3~{ Date: Tue, 13 Feb 2024 07:46:14 -0700 Subject: [PATCH 2/8] remove test file --- spark/test.parquet/._SUCCESS.crc | Bin 8 -> 0 bytes ...4b9b-a8ac-edeeb67278ce-c000.snappy.parquet.crc | Bin 12 -> 0 bytes ...4b9b-a8ac-edeeb67278ce-c000.snappy.parquet.crc | Bin 12 -> 0 bytes ...4b9b-a8ac-edeeb67278ce-c000.snappy.parquet.crc | Bin 12 -> 0 bytes spark/test.parquet/_SUCCESS | 0 ...94e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet | Bin 451 -> 0 bytes ...94e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet | Bin 459 -> 0 bytes ...94e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet | Bin 466 -> 0 bytes 8 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 spark/test.parquet/._SUCCESS.crc delete mode 100644 spark/test.parquet/.part-00000-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet.crc delete mode 100644 spark/test.parquet/.part-00001-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet.crc delete mode 100644 spark/test.parquet/.part-00002-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet.crc delete mode 100644 spark/test.parquet/_SUCCESS delete mode 100644 spark/test.parquet/part-00000-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet delete mode 100644 spark/test.parquet/part-00001-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet delete mode 100644 spark/test.parquet/part-00002-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet diff --git a/spark/test.parquet/._SUCCESS.crc b/spark/test.parquet/._SUCCESS.crc deleted file mode 100644 index 3b7b044936a890cd8d651d349a752d819d71d22c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8 PcmYc;N@ieSU}69O2$TUk diff --git a/spark/test.parquet/.part-00000-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet.crc b/spark/test.parquet/.part-00000-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet.crc deleted file mode 100644 index f8efc0c13b3d1dc06e40ccf8c63d9a878996238d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}89Sy($O*6k`LV diff --git a/spark/test.parquet/.part-00001-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet.crc b/spark/test.parquet/.part-00001-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet.crc deleted file mode 100644 index 50972a93892625371d4a5f3290a2fe87bdb6e240..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}CuX;Mh|D73Bn* diff --git a/spark/test.parquet/.part-00002-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet.crc b/spark/test.parquet/.part-00002-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet.crc deleted file mode 100644 index 850582bebba1194868e5fe7c0f2323845cd99c9f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}6YqbvO?I5v~Ix diff --git a/spark/test.parquet/_SUCCESS b/spark/test.parquet/_SUCCESS deleted file mode 100644 index e69de29bb..000000000 diff --git a/spark/test.parquet/part-00000-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet b/spark/test.parquet/part-00000-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet deleted file mode 100644 index 50b1444b2978f324a6bae950b1ffe6c0eeaae67f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 451 zcmZWm!AiqG5S?zZ1gQt1yJRIuSZHabE=|+aQoML8(nAr!gNSUBZ7rs0o1}`AdiF#7 z3IEDZa1tsWybSZ^&Aj*6om|~G6zB;(rJvs)UxyYY2rJY8IO-4pD3{7uovzD|2{+4m zQ&dVg1K<`j=olM7+#Xfal}er;)8I)j6eU2bbz+v{I*ct`VT4=UY8V!+at|s7M~V_g z*ociA#$_Q}6b8-QsOZrG1ED(s-xa;#W}&x4YfL4$SMl-}xf3_W^B3I$i17O@A|wLVG$0 r106bpZqN&ysUPZ~9|XSZ3_Q=h@IBRWbYF*kzs0TmIg=8=1i$YG>9%V> diff --git a/spark/test.parquet/part-00001-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet b/spark/test.parquet/part-00001-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet deleted file mode 100644 index 089e3c2717b201516997413f7cbf797669ef9c4f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 459 zcmZWmO>4qH5S?tTr$XLb?EoU=U36D1Ywby0B0Qn04GKPYtwCe{G4#B zuP+uW|2HZ5@CfQ~EOw)C7@u$sn^ uGwwy9j@({5?1b((h;-Nu!@zTUzVBTGzG}I;tD|mE=l1SR*9O1{zwrlZHEm4* diff --git a/spark/test.parquet/part-00002-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet b/spark/test.parquet/part-00002-b15f0510-894e-4b9b-a8ac-edeeb67278ce-c000.snappy.parquet deleted file mode 100644 index d6805f1db6191c68827f49e2b83c2f024304f1be..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 466 zcmZWm%SyvQ6ulj3$)X@aXUIU1FwoLM9Ga$&P~5l^=_0sM5RpkTt;IBLlT?w?AMgYG z82>`Q$Xgq6;b!hVbM8IoFcVx{yAc?5PxQ|EA{Y3~{ Date: Tue, 13 Feb 2024 07:46:47 -0700 Subject: [PATCH 3/8] Add comment --- spark/src/test/scala/org/apache/comet/CometCastSuite.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala index 1a319fb61..70f350943 100644 --- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala @@ -54,6 +54,7 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper { } private def castTest(inputs: Seq[String], toType: DataType) { + //TODO create true temp file and delete after test completes val filename = s"/tmp/castTest_${System.currentTimeMillis()}.parquet" inputs.toDF("str").write.mode(SaveMode.Overwrite).parquet(filename) val df = spark.read.parquet(filename) From 105b768bdcb7f1aca9759de6d2e8dec1df26ec0d Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 13 Feb 2024 08:02:32 -0700 Subject: [PATCH 4/8] add more tests --- .../scala/org/apache/comet/CometCastSuite.scala | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala index 70f350943..bd8812e62 100644 --- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala @@ -29,10 +29,23 @@ import scala.util.Random class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper { import testImplicits._ + ignore("cast string to bool") { + castTest(Seq("TRUE", "True", "true", "FALSE", "False", "false", "1", "0", ""), DataTypes.BooleanType) + fuzzTest("truefalseTRUEFALSEyesno10 \t\r\n", 8, DataTypes.BooleanType) + } + ignore("cast string to short") { fuzzTest("0123456789e+- \t\r\n", 8, DataTypes.ShortType) } + ignore("cast string to float") { + fuzzTest("0123456789e+- \t\r\n", 8, DataTypes.FloatType) + } + + ignore("cast string to double") { + fuzzTest("0123456789e+- \t\r\n", 8, DataTypes.DoubleType) + } + ignore("cast string to date") { fuzzTest("0123456789/ \t\r\n", 16, DataTypes.DateType) } From 64748e4aca4f608a239d3fe229fe8674f4edc1cd Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 13 Feb 2024 09:08:24 -0700 Subject: [PATCH 5/8] code cleanup and add a few more tests --- .../org/apache/comet/CometCastSuite.scala | 64 ++++++++++++++----- 1 file changed, 47 insertions(+), 17 deletions(-) diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala index bd8812e62..c48792bb4 100644 --- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala @@ -20,7 +20,7 @@ package org.apache.comet import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper -import org.apache.spark.sql.{CometTestBase, SaveMode} +import org.apache.spark.sql.{CometTestBase, DataFrame, SaveMode} import org.apache.spark.sql.functions.col import org.apache.spark.sql.types.{DataType, DataTypes} @@ -29,30 +29,56 @@ import scala.util.Random class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper { import testImplicits._ + ignore("cast long to short") { + castTest(generateLongs, DataTypes.ShortType) + } + + test("cast float to bool") { + castTest(generateFloats, DataTypes.BooleanType) + } + + test("cast float to int") { + castTest(generateFloats, DataTypes.IntegerType) + } + + ignore("cast float to string") { + castTest(generateFloats, DataTypes.StringType) + } + ignore("cast string to bool") { - castTest(Seq("TRUE", "True", "true", "FALSE", "False", "false", "1", "0", ""), DataTypes.BooleanType) - fuzzTest("truefalseTRUEFALSEyesno10 \t\r\n", 8, DataTypes.BooleanType) + castTest(Seq("TRUE", "True", "true", "FALSE", "False", "false", "1", "0", "").toDF("a"), DataTypes.BooleanType) + fuzzCastFromString("truefalseTRUEFALSEyesno10 \t\r\n", 8, DataTypes.BooleanType) } ignore("cast string to short") { - fuzzTest("0123456789e+- \t\r\n", 8, DataTypes.ShortType) + fuzzCastFromString("0123456789e+- \t\r\n", 8, DataTypes.ShortType) } ignore("cast string to float") { - fuzzTest("0123456789e+- \t\r\n", 8, DataTypes.FloatType) + fuzzCastFromString("0123456789e+- \t\r\n", 8, DataTypes.FloatType) } ignore("cast string to double") { - fuzzTest("0123456789e+- \t\r\n", 8, DataTypes.DoubleType) + fuzzCastFromString("0123456789e+- \t\r\n", 8, DataTypes.DoubleType) } ignore("cast string to date") { - fuzzTest("0123456789/ \t\r\n", 16, DataTypes.DateType) + fuzzCastFromString("0123456789/ \t\r\n", 16, DataTypes.DateType) } ignore("cast string to timestamp") { - castTest(Seq("2020-01-01T12:34:56.123456", "T2"), DataTypes.TimestampType) - fuzzTest("0123456789/:T \t\r\n", 32, DataTypes.TimestampType) + castTest(Seq("2020-01-01T12:34:56.123456", "T2").toDF("a"), DataTypes.TimestampType) + fuzzCastFromString("0123456789/:T \t\r\n", 32, DataTypes.TimestampType) + } + + private def generateFloats = { + val r = new Random(0) + Range(0, 10000).map(_ => r.nextFloat()).toDF("a") + } + + private def generateLongs = { + val r = new Random(0) + Range(0, 10000).map(_ => r.nextLong()).toDF("a") } private def genString(r: Random, chars: String, maxLen: Int): String = { @@ -60,19 +86,23 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper { Range(0,len).map(_ => chars.charAt(r.nextInt(chars.length))).mkString } - private def fuzzTest(chars: String, maxLen: Int, toType: DataType) { + private def fuzzCastFromString(chars: String, maxLen: Int, toType: DataType) { val r = new Random(0) val inputs = Range(0, 10000).map(_ => genString(r, chars, maxLen)) - castTest(inputs, toType) + castTest(inputs.toDF("a"), toType) } - private def castTest(inputs: Seq[String], toType: DataType) { - //TODO create true temp file and delete after test completes - val filename = s"/tmp/castTest_${System.currentTimeMillis()}.parquet" - inputs.toDF("str").write.mode(SaveMode.Overwrite).parquet(filename) - val df = spark.read.parquet(filename) - .withColumn("converted", col("str").cast(toType)) + private def castTest(input: DataFrame, toType: DataType) { + val df = roundtripParquet(input) + .withColumn("converted", col("a").cast(toType)) checkSparkAnswer(df) } + private def roundtripParquet(df: DataFrame): DataFrame = { + // TODO create true temp file and delete after test completes + val filename = s"/tmp/castTest_${System.currentTimeMillis()}.parquet" + df.write.mode(SaveMode.Overwrite).parquet(filename) + spark.read.parquet(filename) + } + } From 59632963df6467dbca5bcf1400a2e5cdc595f25e Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 15 Feb 2024 08:17:27 -0700 Subject: [PATCH 6/8] formatting and address TODO comment --- .../org/apache/comet/CometCastSuite.scala | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala index c48792bb4..32b439c4f 100644 --- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala @@ -19,16 +19,20 @@ package org.apache.comet -import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper +import java.nio.file.Files + +import scala.util.Random + import org.apache.spark.sql.{CometTestBase, DataFrame, SaveMode} +import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.functions.col import org.apache.spark.sql.types.{DataType, DataTypes} -import scala.util.Random - class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper { import testImplicits._ + private val tempDir = Files.createTempDirectory("CometCastSuite") + ignore("cast long to short") { castTest(generateLongs, DataTypes.ShortType) } @@ -46,7 +50,9 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper { } ignore("cast string to bool") { - castTest(Seq("TRUE", "True", "true", "FALSE", "False", "false", "1", "0", "").toDF("a"), DataTypes.BooleanType) + castTest( + Seq("TRUE", "True", "true", "FALSE", "False", "false", "1", "0", "").toDF("a"), + DataTypes.BooleanType) fuzzCastFromString("truefalseTRUEFALSEyesno10 \t\r\n", 8, DataTypes.BooleanType) } @@ -83,7 +89,7 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper { private def genString(r: Random, chars: String, maxLen: Int): String = { val len = r.nextInt(maxLen) - Range(0,len).map(_ => chars.charAt(r.nextInt(chars.length))).mkString + Range(0, len).map(_ => chars.charAt(r.nextInt(chars.length))).mkString } private def fuzzCastFromString(chars: String, maxLen: Int, toType: DataType) { @@ -99,8 +105,7 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper { } private def roundtripParquet(df: DataFrame): DataFrame = { - // TODO create true temp file and delete after test completes - val filename = s"/tmp/castTest_${System.currentTimeMillis()}.parquet" + val filename = tempDir.resolve(s"castTest_${System.currentTimeMillis()}.parquet").toString df.write.mode(SaveMode.Overwrite).parquet(filename) spark.read.parquet(filename) } From 297d5cfd211f909c648da92a7e951b8cd30189be Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 15 Feb 2024 13:02:19 -0700 Subject: [PATCH 7/8] make temp dir more robust --- .../src/test/scala/org/apache/comet/CometCastSuite.scala | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala index 32b439c4f..0820143d2 100644 --- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala @@ -31,7 +31,13 @@ import org.apache.spark.sql.types.{DataType, DataTypes} class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper { import testImplicits._ - private val tempDir = Files.createTempDirectory("CometCastSuite") + private lazy val tempDir = { + val tmp = Files.createTempDirectory("CometCastSuite") + if (!tmp.toFile.exists()) { + assert(tmp.toFile.mkdirs()) + } + tmp + } ignore("cast long to short") { castTest(generateLongs, DataTypes.ShortType) From e32a40a28d2e0a70b9c77db1f41a0728f967980a Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 16 Feb 2024 08:43:02 -0700 Subject: [PATCH 8/8] use withTempDir --- .../org/apache/comet/CometCastSuite.scala | 22 +++++++------------ 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala index 0820143d2..565d2264b 100644 --- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala @@ -19,7 +19,7 @@ package org.apache.comet -import java.nio.file.Files +import java.io.File import scala.util.Random @@ -31,14 +31,6 @@ import org.apache.spark.sql.types.{DataType, DataTypes} class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper { import testImplicits._ - private lazy val tempDir = { - val tmp = Files.createTempDirectory("CometCastSuite") - if (!tmp.toFile.exists()) { - assert(tmp.toFile.mkdirs()) - } - tmp - } - ignore("cast long to short") { castTest(generateLongs, DataTypes.ShortType) } @@ -105,13 +97,15 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper { } private def castTest(input: DataFrame, toType: DataType) { - val df = roundtripParquet(input) - .withColumn("converted", col("a").cast(toType)) - checkSparkAnswer(df) + withTempPath { dir => + val df = roundtripParquet(input, dir) + .withColumn("converted", col("a").cast(toType)) + checkSparkAnswer(df) + } } - private def roundtripParquet(df: DataFrame): DataFrame = { - val filename = tempDir.resolve(s"castTest_${System.currentTimeMillis()}.parquet").toString + private def roundtripParquet(df: DataFrame, tempDir: File): DataFrame = { + val filename = new File(tempDir, s"castTest_${System.currentTimeMillis()}.parquet").toString df.write.mode(SaveMode.Overwrite).parquet(filename) spark.read.parquet(filename) }