Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add basic sp selection score modelling #298

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 182 additions & 0 deletions pkg/session/model/cmd/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
package main

import (
"fmt"
"math/rand"
"os"
"strconv"
"time"

"github.com/dustin/go-humanize"
"github.com/filecoin-project/lassie/pkg/session"
"github.com/filecoin-project/lassie/pkg/session/model"
"github.com/multiformats/go-multicodec"
)

var (
GRAPHSYNC_FAST_RELIABLE_LOTS_OF_POPULAR_DATA = model.Provider{
Name: "graphsync fast, semi-reliable, lots of popular data",
Probabilities: map[multicodec.Code]model.Probabilities{
multicodec.TransportGraphsyncFilecoinv1: {
Candidate: model.Chance(0.5),
Success: model.Chance(0.6),
ConnectTimeMs: model.ProbDist{StdDev: 6, Mean: 10},
TimeToFirstByteMs: model.ProbDist{StdDev: 6, Mean: 10},
BandwidthBps: model.ProbDist{StdDev: 1e6, Mean: 1e8}, // Mean of 100Mb/s +/- 1MB/s
LatencyMs: model.ProbDist{StdDev: 1, Mean: 20},
FastRetrieval: model.Chance(0.9),
Verified: model.Chance(0.9),
},
},
}

GRAPHSYNC_MEDIUM_RELIABLE_SOME_POPULAR_DATA = model.Provider{
Name: "graphsync medium, semi-reliable, some popular data",
Probabilities: map[multicodec.Code]model.Probabilities{
multicodec.TransportGraphsyncFilecoinv1: {
Candidate: model.Chance(0.3),
Success: model.Chance(0.5),
ConnectTimeMs: model.ProbDist{StdDev: 6, Mean: 50},
TimeToFirstByteMs: model.ProbDist{StdDev: 10, Mean: 25},
BandwidthBps: model.ProbDist{StdDev: 1e6, Mean: 1e7}, // Mean of 10MB/s +/- 1MB/s
LatencyMs: model.ProbDist{StdDev: 10, Mean: 40},
FastRetrieval: model.Chance(0.9),
Verified: model.Chance(0.9),
},
},
}

GRAPHSYNC_MEDIUM_RELIABLE_MINIMAL_POPULAR_DATA = model.Provider{
Name: "graphsync medium, semi-reliable, minimal popular data",
Probabilities: map[multicodec.Code]model.Probabilities{
multicodec.TransportGraphsyncFilecoinv1: {
Candidate: model.Chance(0.1),
Success: model.Chance(0.5),
ConnectTimeMs: model.ProbDist{StdDev: 6, Mean: 50},
TimeToFirstByteMs: model.ProbDist{StdDev: 10, Mean: 25},
BandwidthBps: model.ProbDist{StdDev: 1e6, Mean: 1e7}, // Mean of 10MB/s +/- 1MB/s
LatencyMs: model.ProbDist{StdDev: 10, Mean: 40},
FastRetrieval: model.Chance(0.9),
Verified: model.Chance(0.9),
},
},
}

GRAPHSYNC_MEDIUM_UNRELIABLE_SOME_POPULAR_DATA = model.Provider{
Name: "graphsync medium, unreliable, some popular data",
Probabilities: map[multicodec.Code]model.Probabilities{
multicodec.TransportGraphsyncFilecoinv1: {
Candidate: model.Chance(0.3),
Success: model.Chance(0.3),
ConnectTimeMs: model.ProbDist{StdDev: 6, Mean: 50},
TimeToFirstByteMs: model.ProbDist{StdDev: 20, Mean: 50},
BandwidthBps: model.ProbDist{StdDev: 1e5, Mean: 1e6}, // Mean of 1MB/s +/- 100KB/s
LatencyMs: model.ProbDist{StdDev: 10, Mean: 40},
FastRetrieval: model.Chance(0.5),
Verified: model.Chance(0.5),
},
},
}

GRAPHSYNC_MEDIUM_VERY_UNRELIABLE_SOME_POPULAR_DATA = model.Provider{
Name: "graphsync medium, very unreliable, some popular data",
Probabilities: map[multicodec.Code]model.Probabilities{
multicodec.TransportGraphsyncFilecoinv1: {
Candidate: model.Chance(0.3),
Success: model.Chance(0.1),
ConnectTimeMs: model.ProbDist{StdDev: 100, Mean: 200},
TimeToFirstByteMs: model.ProbDist{StdDev: 6, Mean: 100},
BandwidthBps: model.ProbDist{StdDev: 1e5, Mean: 1e6}, // Mean of 1MB/s +/- 100KB/s
LatencyMs: model.ProbDist{StdDev: 10, Mean: 100},
FastRetrieval: model.Chance(0.2),
Verified: model.Chance(0.2),
},
},
}

HTTP_FAST_SEMIRELIABLE_LOTS_OF_POPULAR_DATA = model.Provider{
Name: "http fast, semi-reliable, lots of popular data", // e-ipfs?
Probabilities: map[multicodec.Code]model.Probabilities{
multicodec.TransportIpfsGatewayHttp: {
Candidate: model.Chance(0.5),
Success: model.Chance(0.5),
ConnectTimeMs: model.ProbDist{StdDev: 0, Mean: 0},
TimeToFirstByteMs: model.ProbDist{StdDev: 6, Mean: 10},
BandwidthBps: model.ProbDist{StdDev: 1e6, Mean: 1e8}, // Mean of 100Mb/s +/- 1MB/s
LatencyMs: model.ProbDist{StdDev: 1, Mean: 20},
},
},
}

HTTP_MEDIUM_FLAKY_SOME_POPULAR_DATA = model.Provider{
Name: "http medium, semi-reliable, lots of popular data", // e-ipfs?
Probabilities: map[multicodec.Code]model.Probabilities{
multicodec.TransportIpfsGatewayHttp: {
Candidate: model.Chance(0.7),
Success: model.Chance(0.6),
ConnectTimeMs: model.ProbDist{StdDev: 0, Mean: 0},
TimeToFirstByteMs: model.ProbDist{StdDev: 6, Mean: 10},
BandwidthBps: model.ProbDist{StdDev: 1e6, Mean: 1e7}, // Mean of 10MB/s +/- 1MB/s
LatencyMs: model.ProbDist{StdDev: 10, Mean: 40},
},
},
}
)

func main() {
seed := time.Now().UnixNano()
switch len(os.Args) {
case 1:
case 2:
// first arg is a seed if it's a number
if s, err := strconv.ParseInt(os.Args[1], 10, 64); err == nil {
seed = s
} else {
fmt.Println("Usage: go run main.go [seed]")
os.Exit(1)
}
default:
fmt.Println("Usage: go run main.go [seed]")
os.Exit(1)
}

simRand := rand.New(rand.NewSource(seed))

// TODO: generate static population up-front with fixed characteristics
pop := &model.Population{}
pop.Add(GRAPHSYNC_FAST_RELIABLE_LOTS_OF_POPULAR_DATA, 4)
pop.Add(GRAPHSYNC_MEDIUM_RELIABLE_SOME_POPULAR_DATA, 20)
pop.Add(GRAPHSYNC_MEDIUM_UNRELIABLE_SOME_POPULAR_DATA, 20)
pop.Add(GRAPHSYNC_MEDIUM_RELIABLE_MINIMAL_POPULAR_DATA, 50)
pop.Add(HTTP_FAST_SEMIRELIABLE_LOTS_OF_POPULAR_DATA, 1)

sim := model.Simulation{
Population: pop,
Retrievals: 50000,
RetrievalSize: model.ProbDist{StdDev: 2e7, Mean: 1e7}, // Mean of 20MB +/- 10MB
HttpChance: model.Chance(0.5),
GraphsyncChance: model.Chance(0.5),
}

ret := sim.Run(simRand)
cfg := session.DefaultConfig()
cfg.Random = simRand
ses := session.NewSession(cfg, true)
res := ret.RunWith(simRand, ses)

fmt.Println("---------------------------------------------------------------")
fmt.Println("Simulation of of", len(ret), "retrievals, seed:", seed)
fmt.Println()
fmt.Printf("\t Size per retrieval: %s < %s < %s\n", humanize.IBytes(uint64(ret.MinSize())), humanize.IBytes(uint64(ret.AvgSize())), humanize.IBytes(uint64(ret.MaxSize())))
fmt.Printf("\tCandidate per retrieval: %s < %s < %s\n", humanize.Comma(int64(ret.MinCandidateCount())), humanize.Comma(int64(ret.AvgCandidateCount())), humanize.Comma(int64(ret.MaxCandidateCount())))
fmt.Println("---------------------------------------------------------------")
fmt.Printf("\t Runs: %d\n", res.Runs)
fmt.Printf("\t Successes: %d\n", res.Successes)
fmt.Printf("\t Retrieval failures: %d\n", res.RetrievalFailures)
fmt.Printf("\t Size: %s\n", humanize.IBytes(uint64(res.Size)))
fmt.Printf("\t Total time: %v\n", time.Duration(res.TotalTimeMs)*time.Millisecond)
fmt.Printf("\t Average TTFB: %s\n", time.Duration(res.AverageTimeToFirstByteMs)*time.Millisecond)
fmt.Printf("\t Average bandwidth: %s/s\n", humanize.IBytes(uint64(res.AverageBandwidth)))
fmt.Printf("\t Total bandwidth: %s/s\n", humanize.IBytes(uint64(res.Size)/uint64(res.TotalTimeMs/1000)))
fmt.Println("---------------------------------------------------------------")
}
24 changes: 24 additions & 0 deletions pkg/session/model/pop.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package model

import "github.com/multiformats/go-multicodec"

type Provider struct {
Name string
Probabilities map[multicodec.Code]Probabilities
}

type Population struct {
Providers []PC
}

type PC struct {
Provider Provider
Count int
}

func (p *Population) Add(provider Provider, count int) {
if p.Providers == nil {
p.Providers = make([]PC, 0)
}
p.Providers = append(p.Providers, PC{Provider: provider, Count: count})
}
45 changes: 45 additions & 0 deletions pkg/session/model/prob.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package model

import "math/rand"

// Probabilities defines the probabilistic behaviour of a provider for a
// particular protocol
type Probabilities struct {
// Probability of being a candidate for any given retrieval [0,1]
Candidate Chance
// Probability of a successful retrieval [0,1]
Success Chance
// Distribution for connect time in milliseconds
ConnectTimeMs ProbDist
// Distribution for time to first byte in milliseconds
TimeToFirstByteMs ProbDist
// Distribution in bandwidth in bytes per second, this has to account for
// block fetching speed on the remote, not just the pipe
BandwidthBps ProbDist
// Distribution for latency in milliseconds, this will be multiplied to
// simulate connection initialisation round-trips
LatencyMs ProbDist
// Probability of having FastRetrieval for a graphsync retrieval [0,1]
FastRetrieval Chance
// Probability of having Verified for a graphsync retrieval [0,1]
Verified Chance
}

type ProbDist struct {
StdDev float64
Mean float64
}

func (pd ProbDist) Sample(rand *rand.Rand) float64 {
return rand.NormFloat64()*pd.StdDev + pd.Mean
}

// Chance is the probability of a Roll() being true, the higher the value in the
// range [0,1] the more likely it is to be true.
type Chance float64

func (c Chance) Roll(rand *rand.Rand) bool {
return rand.Float64() < float64(c)
}

const FIFTY_FIFTY = Chance(0.5)
Loading