Skip to content

Commit

Permalink
Improve resolve metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
anbsky committed Jul 24, 2024
1 parent 64217c0 commit fe34ae4
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 17 deletions.
33 changes: 28 additions & 5 deletions internal/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@ const (

StreamOriginal = "original"
StreamTranscoded = "transcoded"

ResolveSource = "source"
ResolveKind = "kind"

ResolveSourceCache = "cache"
ResolveSourceOApi = "oapi"
ResolveFailureGeneral = "general"
ResolveFailureClaimNotFound = "claim_not_found"
)

var (
Expand Down Expand Up @@ -85,18 +93,33 @@ var (
Name: "evictions_total",
Help: "Total number of items evicted from the cache",
})
ResolveFailures = promauto.NewCounter(prometheus.CounterOpts{

ResolveFailures = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: ns,
Subsystem: "resolve",
Name: "failures",
Help: "Total number of failed SDK resolves",
})
ResolveSuccesses = promauto.NewCounter(prometheus.CounterOpts{
}, []string{ResolveSource, ResolveKind})
ResolveFailuresDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: ns,
Subsystem: "resolve",
Name: "failures_duration",
Help: "Failed resolves durations",
}, []string{ResolveSource, ResolveKind})

ResolveSuccesses = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: ns,
Subsystem: "resolve",
Name: "successes",
Help: "Total number of succeeded SDK resolves",
})
Help: "Total number of succeeded resolves",
}, []string{ResolveSource})
ResolveSuccessesDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: ns,
Subsystem: "resolve",
Name: "successes_duration",
Help: "Successful resolves durations",
}, []string{ResolveSource})

ResolveTimeMS = promauto.NewHistogram(prometheus.HistogramOpts{
Namespace: ns,
Subsystem: "resolve",
Expand Down
2 changes: 0 additions & 2 deletions player/http_handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,6 @@ func (h *RequestHandler) Handle(c *gin.Context) {
stream, err := h.player.ResolveStream(uri)
addBreadcrumb(c.Request, "sdk", fmt.Sprintf("resolve %v", uri))
if err != nil {
metrics.ResolveFailures.Inc()
processStreamError("resolve", uri, c.Writer, c.Request, err)
return
}
Expand Down Expand Up @@ -339,7 +338,6 @@ func (h *RequestHandler) HandleTranscodedFragment(c *gin.Context) {
stream, err := h.player.ResolveStream(uri)
addBreadcrumb(c.Request, "sdk", fmt.Sprintf("resolve %v", uri))
if err != nil {
metrics.ResolveFailures.Inc()
processStreamError("resolve", uri, c.Writer, c.Request, err)
return
}
Expand Down
55 changes: 45 additions & 10 deletions player/player.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@ package player
import (
"encoding/hex"
"errors"
"math/rand"
"regexp"
"strings"
"time"

"github.com/OdyseeTeam/player-server/internal/metrics"
"github.com/OdyseeTeam/player-server/pkg/logger"
"github.com/OdyseeTeam/player-server/pkg/paid"
"github.com/prometheus/client_golang/prometheus"

tclient "github.com/OdyseeTeam/transcoder/client"
ljsonrpc "github.com/lbryio/lbry.go/v2/extras/jsonrpc"
Expand All @@ -20,8 +20,9 @@ import (
)

const (
edgeTokenHeader = "Authorization"
edgeTokenPrefix = "Token "
edgeTokenHeader = "Authorization"
edgeTokenPrefix = "Token "
resolveCacheDuration = 5 * time.Minute
)

var (
Expand Down Expand Up @@ -107,16 +108,15 @@ func (p *Player) Play(s *Stream, c *gin.Context) error {

// ResolveStream resolves provided URI by calling the SDK.
func (p *Player) ResolveStream(claimId string) (*Stream, error) {
start := time.Now()
defer func(t time.Time) {
metrics.ResolveTimeMS.Observe(float64(time.Since(t).Milliseconds()))
}(time.Now())
}(start)

var claim *ljsonrpc.Claim

cachedClaim, err := p.resolveCache.Get(claimId)
if err == nil {
claim = cachedClaim.(*ljsonrpc.Claim)
} else {
cachedClaim, cErr := p.resolveCache.Get(claimId)
if cErr != nil {
var err error
claim, err = p.resolve(claimId)
if err != nil {
Expand All @@ -142,8 +142,12 @@ func (p *Player) ResolveStream(claimId string) (*Stream, error) {
return nil, err
}
}
metrics.ResolveSuccesses.Inc()
_ = p.resolveCache.SetWithExpire(claimId, claim, time.Duration(rand.Intn(5)+5)*time.Minute) // random time between 5 and 10 min, to spread load on wallet servers
metrics.ResolveSuccesses.WithLabelValues(metrics.ResolveSourceOApi).Inc()
_ = p.resolveCache.SetWithExpire(claimId, claim, resolveCacheDuration)
} else {
metrics.ResolveSuccessesDuration.WithLabelValues(metrics.ResolveSourceCache).Observe(float64(time.Since(start)))
metrics.ResolveSuccesses.WithLabelValues(metrics.ResolveSourceCache).Inc()
claim = cachedClaim.(*ljsonrpc.Claim)
}

if claim.Value.GetStream() == nil {
Expand All @@ -158,24 +162,55 @@ func (p *Player) ResolveStream(claimId string) (*Stream, error) {

// resolve the claim
func (p *Player) resolve(claimID string) (*ljsonrpc.Claim, error) {
generalFailureLabels := prometheus.Labels{
metrics.ResolveSource: metrics.ResolveSourceOApi,
metrics.ResolveKind: metrics.ResolveFailureGeneral,
}
notFoundFailureLabels := prometheus.Labels{
metrics.ResolveSource: metrics.ResolveSourceOApi,
metrics.ResolveKind: metrics.ResolveFailureClaimNotFound,
}

start := time.Now()

// TODO: Get rid of the resolve call when ClaimSearchArgs acquires URI param
if !reClaim.MatchString(claimID) {
resolved, err := p.lbrynetClient.Resolve(claimID)
if err != nil {
metrics.ResolveFailuresDuration.With(generalFailureLabels).Observe(float64(time.Since(start)))
metrics.ResolveFailures.With(generalFailureLabels).Inc()
return nil, err
}

claim := (*resolved)[claimID]
if claim.CanonicalURL == "" {
metrics.ResolveFailuresDuration.With(notFoundFailureLabels).Observe(float64(time.Since(start)))
metrics.ResolveFailures.With(notFoundFailureLabels).Inc()
return nil, ErrClaimNotFound
}
return &claim, nil
}
resp, err := p.lbrynetClient.ClaimSearch(ljsonrpc.ClaimSearchArgs{ClaimID: &claimID, PageSize: 1, Page: 1})
if err != nil {
metrics.ResolveFailuresDuration.With(prometheus.Labels{
metrics.ResolveSource: metrics.ResolveSourceOApi,
metrics.ResolveKind: metrics.ResolveFailureGeneral,
}).Observe(float64(time.Since(start)))
metrics.ResolveFailures.With(prometheus.Labels{
metrics.ResolveSource: metrics.ResolveSourceOApi,
metrics.ResolveKind: metrics.ResolveFailureGeneral,
}).Inc()
return nil, err
}
if len(resp.Claims) == 0 {
metrics.ResolveFailuresDuration.With(prometheus.Labels{
metrics.ResolveSource: metrics.ResolveSourceOApi,
metrics.ResolveKind: metrics.ResolveFailureClaimNotFound,
}).Observe(float64(time.Since(start)))
metrics.ResolveFailures.With(prometheus.Labels{
metrics.ResolveSource: metrics.ResolveSourceOApi,
metrics.ResolveKind: metrics.ResolveFailureClaimNotFound,
}).Inc()
return nil, ErrClaimNotFound
}
return &resp.Claims[0], nil
Expand Down

0 comments on commit fe34ae4

Please sign in to comment.