Skip to content

Commit

Permalink
Latest results for thesis manuscript
Browse files Browse the repository at this point in the history
  • Loading branch information
gillesmag committed May 29, 2024
1 parent 5f784d1 commit 6eeb7a2
Show file tree
Hide file tree
Showing 43 changed files with 25,662 additions and 10,246 deletions.
20 changes: 13 additions & 7 deletions lib/src/main/scala/benchmark/Benchmark.scala
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,10 @@ object Benchmark {
CaptureFilter(
provenanceFilter = ProvenancePredicate(
nodePredicate = ProvenanceGraph.allNodes,
edgePredicate = provenanceFilter(description.setup)
edgePredicate =
provenanceFilter(description.setup, description.algorithm)
),
dataFilter = dataFilter(gl, description.setup, description.algorithm)
dataFilter = dataFilter(description.setup, description.algorithm)
)
)

Expand Down Expand Up @@ -182,7 +183,7 @@ object Benchmark {
)

// Clean up lineage folder after being done with it
// fs.delete(lineagePath, true)
fs.delete(lineagePath, true)
}

def computeFlags(expSetup: ExperimentSetup): (Boolean, Boolean) = {
Expand All @@ -201,7 +202,6 @@ object Benchmark {
}

def dataFilter(
gl: GraphLineage[Unit, Double],
experimentSetup: ExperimentSetup,
algorithm: GraphAlgorithm
): DataPredicate = {
Expand Down Expand Up @@ -239,14 +239,20 @@ object Benchmark {
}
}

def provenanceFilter(expSetup: ExperimentSetup): Relation => Boolean = {
def provenanceFilter(
expSetup: ExperimentSetup,
algorithm: GraphAlgorithm
): Relation => Boolean = {
expSetup match {
case ExperimentSetup.ProvenanceGraphPruning |
ExperimentSetup.CombinedPruning =>
(r: ProvenanceGraph.Relation) => {
r.edge.event match {
case Operation("joinVertices") => true
case _ => false
case Operation("outerJoinVertices") =>
algorithm == GraphAlgorithm.fromString("pr")
case Operation("joinVertices") =>
algorithm != GraphAlgorithm.fromString("pr")
case _ => false
}
}
case _ =>
Expand Down
125 changes: 66 additions & 59 deletions lib/src/test/resources/runner-config-example.conf
Original file line number Diff line number Diff line change
@@ -1,61 +1,68 @@
runner {
# Inputs
repetitions = 1
algorithms = [
BFS
PageRank
WCC
SSSP
]

graphs = [
kgs
wiki-Talk
#cit-Patents
# S graphs
#datagen-7_5-fb
#datagen-7_6-fb
#datagen-7_7-zf
#datagen-7_8-zf
#datagen-7_9-fb
#dota-league
#graph500-22
# M graphs
datagen-8_4-fb
# L graphs
#datagen-8_8-zf
]

storageFormats = [
TextFile()
ObjectFile()
ParquetFile()
AvroFile()
ORCFile()
CSVFile()
JSONFormat()
TextFile(true)
CSVFile(true)
JSONFormat(true)
]

jar = "invalid-path"
datasetPath = "./src/test/resources"
experimentsPath = "/var/scratch/gmo520/thesis/experiments"
setups = [
Baseline
StorageFormats
# Compression
# Storage
# Tracing
# SmartPruning
# AlgorithmOpOnly
# JoinVerticesOpOnly
# Combined
]

# Outputs
lineagePath = "file:///tmp/lineage"
outputPath = "file:///tmp/output"
sparkLogs = "file:///tmp/spark-logs"
// Inputs
repetitions = 1
algorithms = [
"BFS",
"PageRank",
"WCC",
"SSSP",
]

setups = [
"Baseline",
"StorageFormats",
// "Compression",
// "Storage",
// "Tracing",
// "SmartPruning",
// "AlgorithmOpOnly",
// "JoinVerticesOpOnly",
// "Combined",
]

graphs = [
// XS graphs
"kgs"
"wiki-Talk"
// "cit-Patents",

// S graphs
// "datagen-7_5-fb",
// "datagen-7_6-fb",
// "datagen-7_7-zf",
// "datagen-7_8-zf",
// "datagen-7_9-fb",
// "dota-league",
// "graph500-22",

// M graphs
"datagen-8_4-fb",

// L graphs
// "datagen-8_8-zf",
]

storageFormats = [
"TextFile()",
"ObjectFile()",
"ParquetFile()",
"AvroFile()",
"ORCFile()",
"CSVFile()",
"JSONFormat()",
"TextFile(true)",
"CSVFile(true)",
"JSONFormat(true)",
]

jar = "invalid-path"
datasetPath = "./src/test/resources"
experimentsPath = "/var/scratch/gmo520/thesis/experiments"

// Outputs
lineagePath = "file:///tmp/lineage"
outputPath = "file:///tmp/output"
sparkLogs = "file:///tmp/spark-logs"

timeoutMinutes = 10
}
111 changes: 64 additions & 47 deletions lib/src/test/scala/benchmark/BenchmarkTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ import provenance.{ProvenanceGraph, ProvenanceGraphNode}
import provenance.events.{BFS, Operation}
import provenance.metrics.ObservationSet

import lu.magalhaes.gilles.provxlib.provenance.query.{
DeltaPredicate,
GraphPredicate
}
import lu.magalhaes.gilles.provxlib.provenance.storage.TextFile
import lu.magalhaes.gilles.provxlib.utils.LocalSparkSession.withSparkSession
import org.apache.spark.graphx.{Edge, Graph}
Expand Down Expand Up @@ -61,7 +65,8 @@ class BenchmarkTests extends AnyFunSuite {
outputDir = outputDir,
graphalyticsConfigPath = graphalyticsConfigPath,
lineageDir = runnerConfig.runner.lineagePath,
setup = ExperimentSetup.Baseline
setup = ExperimentSetup.Baseline,
numExecutors = 7
)
)
Benchmark.run(sc, config)
Expand All @@ -70,32 +75,24 @@ class BenchmarkTests extends AnyFunSuite {

test("Benchmark flags computation") {
assert(
Benchmark.computeFlags(ExperimentSetup.Compression) == (true, true)
)
assert(
Benchmark.computeFlags(ExperimentSetup.Storage) == (true, true)
Benchmark.computeFlags(ExperimentSetup.CompleteProvenance) == (true, true)
)
assert(
Benchmark.computeFlags(ExperimentSetup.Tracing) == (true, false)
)
assert(
Benchmark.computeFlags(
ExperimentSetup.SmartPruning
) == (true, true)
)
assert(
Benchmark.computeFlags(
ExperimentSetup.AlgorithmOpOnly
ExperimentSetup.DataGraphPruning
) == (true, true)
)
assert(
Benchmark.computeFlags(
ExperimentSetup.JoinVerticesOpOnly
ExperimentSetup.ProvenanceGraphPruning
) == (true, true)
)
assert(
Benchmark.computeFlags(
ExperimentSetup.Combined
ExperimentSetup.CombinedPruning
) == (true, true)
)
assert(
Expand Down Expand Up @@ -131,32 +128,61 @@ class BenchmarkTests extends AnyFunSuite {

val g = Graph(longVertices, edges)

assert(
g.subgraph(vpred =
Benchmark
.dataFilter(ExperimentSetup.SmartPruning, GraphAlgorithm.WCC)
).vertices
.collect()
.length == 1
)
{
val filter = Benchmark.dataFilter(
ExperimentSetup.DataGraphPruning,
GraphAlgorithm.WCC
) match {
case GraphPredicate(nodePredicate, _) => nodePredicate
case DeltaPredicate(_) => ???
case _ => ???
}

assert(
g.subgraph(vpred =
Benchmark.dataFilter(ExperimentSetup.Baseline, GraphAlgorithm.WCC)
).vertices
.collect()
.length == 3
)
assert(
g.subgraph(vpred = filter)
.vertices
.collect()
.length == 1
)
}

{
val filter = Benchmark.dataFilter(
ExperimentSetup.Baseline,
GraphAlgorithm.WCC
) match {
case GraphPredicate(nodePredicate, _) => nodePredicate
case DeltaPredicate(_) => ???
case _ => ???
}
assert(
g.subgraph(vpred = filter)
.vertices
.collect()
.length == 3
)
}

{
val filter = Benchmark
.dataFilter(
ExperimentSetup.DataGraphPruning,
GraphAlgorithm.SSSP
) match {
case DeltaPredicate(_) => ???
case GraphPredicate(nodePredicate, _) => nodePredicate
case _ => ???
}

val g2 = Graph(doubleVertices, edges)
assert(
g2.subgraph(vpred =
Benchmark
.dataFilter(ExperimentSetup.SmartPruning, GraphAlgorithm.SSSP)
).vertices
.collect()
.length == 1
)
val g2 = Graph(doubleVertices, edges)
assert(
g2.subgraph(vpred = filter)
.vertices
.collect()
.length == 1
)

}
}
}

Expand Down Expand Up @@ -185,17 +211,8 @@ class BenchmarkTests extends AnyFunSuite {
ProvenanceGraph.Edge(BFS(3), ObservationSet())
)

val algOpFilter =
Benchmark.provenanceFilter(ExperimentSetup.AlgorithmOpOnly)

val res = pg.filter(nodeP = ProvenanceGraph.allNodes, edgeP = algOpFilter)

assert(res.graph.edges.count((e: ProvenanceGraph.Type#EdgeT) => {
algOpFilter(e.outer)
}) == 1)

val joinVerticesFilter =
Benchmark.provenanceFilter(ExperimentSetup.JoinVerticesOpOnly)
Benchmark.provenanceFilter(ExperimentSetup.ProvenanceGraphPruning)

val res2 =
pg.filter(nodeP = ProvenanceGraph.allNodes, edgeP = joinVerticesFilter)
Expand Down
3 changes: 2 additions & 1 deletion lib/src/test/scala/benchmark/ConfigFilesTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ class ConfigFilesTest extends AnyFunSuite {
outputDir = outputDir,
graphalyticsConfigPath = graphalyticsConfigPath,
lineageDir = runnerConfig.runner.lineagePath,
setup = ExperimentSetup.Baseline
setup = ExperimentSetup.Baseline,
numExecutors = 7
)

println(BenchmarkAppConfig.write(config))
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
5 changes: 0 additions & 5 deletions results/plots/das6/20240521-022009-tracing/desc.csv

This file was deleted.

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file removed results/plots/das6/conclusion/factor.pdf
Binary file not shown.
22 changes: 22 additions & 0 deletions results/plots/das6/final/csv/es01-duration.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
algorithm,dataset,min,mean,max,std
BFS,graph500-22,30.44,32.87,35.11,2.00
BFS,datagen-7\_5-fb,33.20,34.32,36.05,1.20
BFS,datagen-7\_9-fb,62.32,69.31,80.59,8.00
BFS,cit-Patents,79.37,82.97,88.44,4.02
BFS,datagen-8\_8-zf,184.12,218.72,247.36,26.50
BFS,datagen-8\_4-fb,224.23,241.79,251.48,9.34
PageRank,datagen-7\_5-fb,38.60,39.98,43.88,2.02
PageRank,datagen-7\_9-fb,67.77,69.88,71.44,1.65
PageRank,graph500-22,75.87,78.38,81.21,2.04
PageRank,cit-Patents,76.20,85.10,88.41,4.45
PageRank,datagen-8\_4-fb,205.04,215.87,227.36,7.12
PageRank,datagen-8\_8-zf,223.81,245.95,258.35,11.81
SSSP,datagen-7\_5-fb,34.57,38.12,45.16,3.77
SSSP,datagen-7\_9-fb,60.92,76.50,94.05,14.17
SSSP,datagen-8\_8-zf,162.00,209.25,248.77,30.70
SSSP,datagen-8\_4-fb,234.95,255.83,264.24,11.53
WCC,datagen-7\_5-fb,33.54,36.77,38.80,1.94
WCC,datagen-7\_9-fb,62.89,66.34,72.11,3.28
WCC,graph500-22,66.38,72.05,82.57,7.97
WCC,cit-Patents,152.93,157.94,165.29,4.65
WCC,datagen-8\_4-fb,230.89,239.02,243.93,5.38
7 changes: 7 additions & 0 deletions results/plots/das6/final/csv/es01-size.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
dataset,size
cit-Patents,280 MB
datagen-7\_5-fb,1014 MB
datagen-7\_9-fb,2 GB
datagen-8\_4-fb,7 GB
datagen-8\_8-zf,13 GB
graph500-22,963 MB
Loading

0 comments on commit 6eeb7a2

Please sign in to comment.