Skip to content

Commit

Permalink
Add benchmark with multiple join column
Browse files Browse the repository at this point in the history
  • Loading branch information
zeotuan committed Dec 10, 2024
1 parent 37eca14 commit ba7c5cb
Showing 1 changed file with 12 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import java.util.concurrent.TimeUnit
import scala.util.Try

private class DatasetComparerBenchmark extends DatasetComparer {
lazy val spark: SparkSession = {
def getSparkSession: SparkSession = {
val session = SparkSession
.builder()
.master("local")
Expand All @@ -26,8 +26,9 @@ private class DatasetComparerBenchmark extends DatasetComparer {
@Measurement(iterations = 10)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
def assertLargeDatasetEqualityV2(blackHole: Blackhole): Boolean = {
val ds1 = spark.range(0, 1000000, 1, 8)
val ds3 = ds1
val spark = getSparkSession
val ds1 = spark.range(0, 1000000, 1, 8)
val ds3 = ds1

val result = Try(assertLargeDatasetEqualityV2(ds1, ds3))

Expand All @@ -42,8 +43,9 @@ private class DatasetComparerBenchmark extends DatasetComparer {
@Measurement(iterations = 10)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
def assertLargeDatasetEqualityV2WithSinglePrimaryKey(blackHole: Blackhole): Boolean = {
val ds1 = spark.range(0, 1000000, 1, 8)
val ds3 = ds1
val spark = getSparkSession
val ds1 = spark.range(0, 1000000, 1, 8)
val ds3 = ds1

val result = Try(assertLargeDatasetEqualityV2(ds1, ds3, primaryKeys = Seq("id")))

Expand All @@ -58,8 +60,9 @@ private class DatasetComparerBenchmark extends DatasetComparer {
@Measurement(iterations = 10)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
def assertLargeDatasetEquality(blackHole: Blackhole): Boolean = {
val ds1 = spark.range(0, 1000000, 1, 8)
val ds3 = ds1
val spark = getSparkSession
val ds1 = spark.range(0, 1000000, 1, 8)
val ds3 = ds1

val result = Try(assertLargeDatasetEquality(ds1, ds3))

Expand All @@ -74,6 +77,7 @@ private class DatasetComparerBenchmark extends DatasetComparer {
@Measurement(iterations = 10)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
def assertLargeDatasetEqualityV2WithCompositePrimaryKey2(blackHole: Blackhole): Boolean = {
val spark = getSparkSession
val ds1 = spark.range(0, 1000000, 1, 8).withColumn("id2", col("id") + 1)
val ds3 = ds1
val result = Try(assertLargeDatasetEqualityV2(ds1, ds3, primaryKeys = Seq("id", "id2")))
Expand All @@ -89,6 +93,7 @@ private class DatasetComparerBenchmark extends DatasetComparer {
@Measurement(iterations = 10)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
def assertLargeDatasetEqualityV2WithCompositePrimaryKey3(blackHole: Blackhole): Boolean = {
val spark = getSparkSession
val ds1 = spark.range(0, 1000000, 1, 8).withColumn("id2", col("id") + 1).withColumn("id3", col("id2") + 1)
val ds3 = ds1
val result = Try(assertLargeDatasetEqualityV2(ds1, ds3, primaryKeys = Seq("id", "id2", "id3")))
Expand Down

0 comments on commit ba7c5cb

Please sign in to comment.