diff --git a/fuzz-testing/README.md b/fuzz-testing/README.md index 40eff8520..5ff127dfa 100644 --- a/fuzz-testing/README.md +++ b/fuzz-testing/README.md @@ -43,7 +43,7 @@ Set appropriate values for `SPARK_HOME`, `SPARK_MASTER`, and `COMET_JAR` environ $SPARK_HOME/bin/spark-submit \ --master $SPARK_MASTER \ --class org.apache.comet.fuzz.Main \ - target/comet-fuzz-0.1.0-SNAPSHOT-jar-with-dependencies.jar \ + target/comet-fuzz-spark3.4_2.12-0.1.0-SNAPSHOT-jar-with-dependencies.jar \ data --num-files=2 --num-rows=200 --num-columns=100 ``` @@ -55,7 +55,7 @@ Generate random queries that are based on the available test files. $SPARK_HOME/bin/spark-submit \ --master $SPARK_MASTER \ --class org.apache.comet.fuzz.Main \ - target/cometfuzz-0.1.0-SNAPSHOT-jar-with-dependencies.jar \ + target/comet-fuzz-spark3.4_2.12-0.1.0-SNAPSHOT-jar-with-dependencies.jar \ queries --num-files=2 --num-queries=500 ``` @@ -76,7 +76,7 @@ $SPARK_HOME/bin/spark-submit \ --jars $COMET_JAR \ --driver-class-path $COMET_JAR \ --class org.apache.comet.fuzz.Main \ - target/cometfuzz-0.1.0-SNAPSHOT-jar-with-dependencies.jar \ + target/comet-fuzz-spark3.4_2.12-0.1.0-SNAPSHOT-jar-with-dependencies.jar \ run --num-files=2 --filename=queries.sql ``` diff --git a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryGen.scala b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryGen.scala index da59dbb5b..a8ef117bb 100644 --- a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryGen.scala +++ b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryGen.scala @@ -133,15 +133,19 @@ object QueryGen { val args = Range(0, func.num_args) // TODO support using literals as well as columns .map(_ => Utils.randomChoice(table.columns, r)) + + // TODO avoid grouping and sorting on floating-point columns val groupingCols = Range(0, 2).map(_ => Utils.randomChoice(table.columns, r)) + if (groupingCols.isEmpty) { s"SELECT ${args.mkString(", ")}, ${func.name}(${args.mkString(", ")}) AS x " + - s"FROM $tableName" + + s"FROM $tableName " + + // TODO avoid sorting on floating-point columns s"ORDER BY ${args.mkString(", ")}" } else { s"SELECT ${groupingCols.mkString(", ")}, ${func.name}(${args.mkString(", ")}) " + s"FROM $tableName " + - s"GROUP BY ${groupingCols.mkString(",")}" + + s"GROUP BY ${groupingCols.mkString(",")} " + s"ORDER BY ${groupingCols.mkString(", ")}" } } @@ -157,6 +161,7 @@ object QueryGen { s"SELECT ${args.mkString(", ")}, ${func.name}(${args.mkString(", ")}) AS x " + s"FROM $tableName " + + // TODO avoid sorting on floating-point columns s"ORDER BY ${args.mkString(", ")}" } @@ -175,11 +180,18 @@ object QueryGen { val joinTypes = Seq(("INNER", 0.4), ("LEFT", 0.3), ("RIGHT", 0.3)) val joinType = Utils.randomWeightedChoice(joinTypes) - "SELECT * " + - s"FROM $leftTableName " + - s"$joinType JOIN $rightTableName " + - s"ON $leftTableName.$leftCol = $rightTableName.$rightCol " + - s"ORDER BY $leftCol;" + // TODO avoid sorting on floating-point columns + val leftColProjection = leftTable.columns.map(c => s"l.$c").mkString(", ") + val rightColProjection = rightTable.columns.map(c => s"r.$c").mkString(", ") + s"SELECT " + + s"$leftColProjection, " + + s"$rightColProjection " + + s"FROM $leftTableName l " + + s"$joinType JOIN $rightTableName r " + + s"ON l.$leftCol = r.$rightCol " + + s"ORDER BY " + + s"$leftColProjection, " + + s"$rightColProjection;" } } diff --git a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala index 82e681e73..8d0c2828f 100644 --- a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala +++ b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala @@ -78,7 +78,11 @@ object QueryRunner { assert(l.length == r.length) for (j <- 0 until l.length) { val same = (l(j), r(j)) match { + case (a: Float, b: Float) if a.isInfinity => b.isInfinity + case (a: Float, b: Float) if a.isNaN => b.isNaN case (a: Float, b: Float) => (a - b).abs <= 0.000001f + case (a: Double, b: Double) if a.isInfinity => b.isInfinity + case (a: Double, b: Double) if a.isNaN => b.isNaN case (a: Double, b: Double) => (a - b).abs <= 0.000001 case (a, b) => a == b } @@ -87,7 +91,7 @@ object QueryRunner { showPlans(w, sparkPlan, cometPlan) w.write(s"First difference at row $i:\n") w.write("Spark: `" + l.mkString(",") + "`\n") - w.write("Comet: `" + r.mkString(", ") + "`\n") + w.write("Comet: `" + r.mkString(",") + "`\n") i = sparkRows.length } }