Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor Pusher Engine with updated interface #6780

Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions engine/collection/epochmgr/factories/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ import (
"github.com/dgraph-io/badger/v2"
"github.com/rs/zerolog"

"github.com/onflow/flow-go/engine/collection"
"github.com/onflow/flow-go/module"
builder "github.com/onflow/flow-go/module/builder/collection"
finalizer "github.com/onflow/flow-go/module/finalizer/collection"
"github.com/onflow/flow-go/module/mempool"
"github.com/onflow/flow-go/network"
clusterstate "github.com/onflow/flow-go/state/cluster"
"github.com/onflow/flow-go/state/protocol"
"github.com/onflow/flow-go/storage"
Expand All @@ -23,7 +23,7 @@ type BuilderFactory struct {
trace module.Tracer
opts []builder.Opt
metrics module.CollectionMetrics
pusher network.Engine // engine for pushing finalized collection to consensus committee
pusher collection.GuaranteedCollectionPublisher // engine for pushing finalized collection to consensus committee
log zerolog.Logger
}

Expand All @@ -33,7 +33,7 @@ func NewBuilderFactory(
mainChainHeaders storage.Headers,
trace module.Tracer,
metrics module.CollectionMetrics,
pusher network.Engine,
pusher collection.GuaranteedCollectionPublisher,
log zerolog.Logger,
opts ...builder.Opt,
) (*BuilderFactory, error) {
Expand Down
15 changes: 15 additions & 0 deletions engine/collection/guaranteed_collection_publisher.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package collection

import (
"github.com/onflow/flow-go/model/flow"
)

// GuaranteedCollectionPublisher defines the interface to send collection guarantees
// from a collection node to consensus nodes. Collection guarantees are broadcast on a best-effort basis,
// and it is acceptable to discard some guarantees (especially those that are out of date).
// Implementation is non-blocking and concurrency safe.
type GuaranteedCollectionPublisher interface {
// SubmitCollectionGuarantee adds a guarantee to an internal queue
// to be published to consensus nodes.
SubmitCollectionGuarantee(guarantee *flow.CollectionGuarantee)
}
32 changes: 32 additions & 0 deletions engine/collection/mock/guaranteed_collection_publisher.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

50 changes: 11 additions & 39 deletions engine/collection/pusher/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import (
"github.com/onflow/flow-go/engine/common/fifoqueue"
"github.com/onflow/flow-go/model/flow"
"github.com/onflow/flow-go/model/flow/filter"
"github.com/onflow/flow-go/model/messages"
"github.com/onflow/flow-go/module"
"github.com/onflow/flow-go/module/component"
"github.com/onflow/flow-go/module/irrecoverable"
Expand Down Expand Up @@ -44,8 +43,7 @@ type Engine struct {
cm *component.ComponentManager
}

// TODO convert to network.MessageProcessor
var _ network.Engine = (*Engine)(nil)
var _ network.MessageProcessor = (*Engine)(nil)
var _ component.Component = (*Engine)(nil)

// New creates a new pusher engine.
Expand Down Expand Up @@ -120,17 +118,17 @@ func (e *Engine) outboundQueueWorker(ctx irrecoverable.SignalerContext, ready co
// No errors expected during normal operations.
func (e *Engine) processOutboundMessages(ctx context.Context) error {
for {
nextMessage, ok := e.queue.Pop()
item, ok := e.queue.Pop()
if !ok {
return nil
}

asSCGMsg, ok := nextMessage.(*messages.SubmitCollectionGuarantee)
guarantee, ok := item.(*flow.CollectionGuarantee)
if !ok {
return fmt.Errorf("invalid message type in pusher engine queue")
return fmt.Errorf("invalid type in pusher engine queue")
}

err := e.publishCollectionGuarantee(&asSCGMsg.Guarantee)
err := e.publishCollectionGuarantee(guarantee)
if err != nil {
return err
}
Expand All @@ -143,44 +141,18 @@ func (e *Engine) processOutboundMessages(ctx context.Context) error {
}
}

// SubmitLocal submits an event originating on the local node.
func (e *Engine) SubmitLocal(event interface{}) {
ev, ok := event.(*messages.SubmitCollectionGuarantee)
if ok {
e.SubmitCollectionGuarantee(ev)
} else {
engine.LogError(e.log, fmt.Errorf("invalid message argument to pusher engine"))
}
}

// Submit submits the given event from the node with the given origin ID
// for processing in a non-blocking manner. It returns instantly and logs
// a potential processing error internally when done.
func (e *Engine) Submit(channel channels.Channel, originID flow.Identifier, event interface{}) {
engine.LogError(e.log, fmt.Errorf("pusher engine should only receive local messages on the same node"))
}

// ProcessLocal processes an event originating on the local node.
func (e *Engine) ProcessLocal(event interface{}) error {
ev, ok := event.(*messages.SubmitCollectionGuarantee)
if ok {
e.SubmitCollectionGuarantee(ev)
return nil
} else {
return fmt.Errorf("invalid message argument to pusher engine")
}
}

// Process processes the given event from the node with the given origin ID in
// a blocking manner. It returns the potential processing error when done.
// a non-blocking manner. It returns the potential processing error when done.
// Because the pusher engine does not accept inputs from the network,
// always drop any messages and return an error.
func (e *Engine) Process(channel channels.Channel, originID flow.Identifier, message any) error {
return fmt.Errorf("pusher engine should only receive local messages on the same node")
return fmt.Errorf("pusher engine should only receive local messages on the same node: got message %T on channel %v from origin %v", message, channel, originID)
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This suggestion is largely about mildly adjusting the framing of Process to align with the protocol. Jordan had a helpful explanation in his comment on the prior PR, which might be a useful reference here.

Here’s how I’d frame the context:

  • The Flow protocol requires that honest Collector nodes broadcast Collection guarantees to Consensus Nodes (and only those). "Broadcasting" here refers to epidemic gossip or similar algorithms used to disseminate messages efficiently while minimizing bandwidth usage. You don’t need to dive into the details for this PR, but I thought a quick note on terminology might be helpful. The relevant broadcasting operation is this:
    err = e.conduit.Publish(guarantee, consensusNodes.NodeIDs()...)
  • Now, consider the case where an honest Collector node receives a message from another node—let’s call it "Byzantine Bob." Bob, being byzantine, might deviate from protocol rules when broadcasting messages. While the protocol specifies that Collection Guarantees should be sent to the consensus committee, Bob might specify a different set of recipients for its message.
  • From the perspective of the pusher.Engine (only running within honest Collector nodes), we would receive such messages via the channels.PushGuarantees networking channel. It’s important to note that the networking layer operates as a low-level tool and doesn’t understand the higher-level protocol logic. Its role is simply to relay messages, even if they’re outside the protocol rules (there are only very very basic checks in the networking layer, as briefly explained in this notion doc).

However, there’s an opportunity to refine this behavior. Returning an error here informs the networking layer of an issue, but since the networking layer doesn’t understand the meaning of those errors, it’s essentially acting as a "glorified logger." Instead, I think it would be cleaner and more maintainable for the pusher.Engine, which has the detailed protocol knowledge, to handle this edge case directly. For example:

  • We could use the dedicated logging keys for flagging suspected protocolviolations:
    // KeySuspicious is a logging label that is used to flag the log event as suspicious behavior
    // This is used to add an easily searchable label to the log event
    KeySuspicious = "suspicious"
    These keys make it easier to identify and analyze specific issues in the logs.
  • Additionally, we can add an explanatory log entry here to provide context for the behavior without needing to involve the networking layer.
Suggested change
// Process processes the given event from the node with the given origin ID in
// a blocking manner. It returns the potential processing error when done.
// a non-blocking manner. It returns the potential processing error when done.
// Because the pusher engine does not accept inputs from the network,
// always drop any messages and return an error.
func (e *Engine) Process(channel channels.Channel, originID flow.Identifier, message any) error {
return fmt.Errorf("pusher engine should only receive local messages on the same node")
return fmt.Errorf("pusher engine should only receive local messages on the same node: got message %T on channel %v from origin %v", message, channel, originID)
}
// Process is called by the networking layer, when peers broadcast messages with this node
// as one of the recipients. The protocol specifies that Collector nodes broadcast Collection
// Guarantees to Consensus Nodes and _only_ those. When the pusher engine (running only on
// Collectors) receives a message, this message is evidence of byzantine behavior.
// Byzantine inputs are internally handled by the pusher.Engine and do *not* result in
// error returns. No errors expected during normal operation (including byzantine inputs).
func (e *Engine) Process(channel channels.Channel, originID flow.Identifier, message any) error {
// Targeting a collector node's pusher.Engine with messages could be considered as a slashable offense.
// Though, for generating cryptographic evidence, we need Message Forensics - see reference [1].
// Much further into the future, when we are implementing slashing challenges, we'll probably implement a
// dedicated consumer to post-process evidence of protocol violations into slashing challenges. For now,
// we just log this with the `KeySuspicious` to alert the node operator.
// [1] Message Forensics FLIP https://github.com/onflow/flips/pull/195)
errs := fmt.Errorf("collector node's pusher.Engine was targeted by message %T on channel %v", message, channel)
e.log.Warn().
Err(errs).
Bool(logging.KeySuspicious, true).
Str("peer_id", originID.String()).
Msg("potentially byzantine networking traffic detected")
return nil
}


// SubmitCollectionGuarantee adds a collection guarantee to the engine's queue
// to later be published to consensus nodes.
func (e *Engine) SubmitCollectionGuarantee(msg *messages.SubmitCollectionGuarantee) {
if e.queue.Push(msg) {
func (e *Engine) SubmitCollectionGuarantee(guarantee *flow.CollectionGuarantee) {
if e.queue.Push(guarantee) {
e.notifier.Notify()
} else {
e.engMetrics.OutboundMessageDropped(metrics.EngineCollectionProvider, metrics.MessageCollectionGuarantee)
Expand Down
12 changes: 2 additions & 10 deletions engine/collection/pusher/engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import (
"github.com/onflow/flow-go/engine/collection/pusher"
"github.com/onflow/flow-go/model/flow"
"github.com/onflow/flow-go/model/flow/filter"
"github.com/onflow/flow-go/model/messages"
"github.com/onflow/flow-go/module/irrecoverable"
"github.com/onflow/flow-go/module/metrics"
module "github.com/onflow/flow-go/module/mock"
Expand Down Expand Up @@ -97,11 +96,7 @@ func (suite *Suite) TestSubmitCollectionGuarantee() {
suite.conduit.On("Publish", guarantee, consensus[0].NodeID).
Run(func(_ mock.Arguments) { close(done) }).Return(nil).Once()

msg := &messages.SubmitCollectionGuarantee{
Guarantee: *guarantee,
}
err := suite.engine.ProcessLocal(msg)
suite.Require().Nil(err)
suite.engine.SubmitCollectionGuarantee(guarantee)

unittest.RequireCloseBefore(suite.T(), done, time.Second, "message not sent")

Expand All @@ -116,10 +111,7 @@ func (suite *Suite) TestSubmitCollectionGuaranteeNonLocal() {
// send from a non-allowed role
sender := suite.identities.Filter(filter.HasRole[flow.Identity](flow.RoleVerification))[0]

msg := &messages.SubmitCollectionGuarantee{
Guarantee: *guarantee,
}
err := suite.engine.Process(channels.PushGuarantees, sender.NodeID, msg)
err := suite.engine.Process(channels.PushGuarantees, sender.NodeID, guarantee)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unfortunately, with the change I suggested above, we would hide the case where pusher.Engine rejected the input from the external-facing MessageProcessor interface

// TODO: This function should not return an error.
// The networking layer's responsibility is fulfilled once it delivers a message to an engine.
// It does not possess the context required to handle errors that may arise during an engine's processing
// of the message, as error handling for message processing falls outside the domain of the networking layer.
// Consequently, it is reasonable to remove the error from the Process function's signature,
// since returning an error to the networking layer would not be useful in this context.
Process(channel channels.Channel, originID flow.Identifier, message interface{}) error

Trying to adjust the test so we verify that pusher.Engine handles any input interface and does not broadcast when Process is called (?) ... maybe something like 👇 ?

Suggested change
// send from a non-allowed role
sender := suite.identities.Filter(filter.HasRole[flow.Identity](flow.RoleVerification))[0]
msg := &messages.SubmitCollectionGuarantee{
Guarantee: *guarantee,
}
err := suite.engine.Process(channels.PushGuarantees, sender.NodeID, msg)
err := suite.engine.Process(channels.PushGuarantees, sender.NodeID, guarantee)
// verify that pusher.Engine handles any (potentially byzantine) input:
// A byzantine peer could target the collector node's pusher engine with messages
// The pusher should discard those and explicitly not get tricked into broadcasting
// collection guarantees which a byzantine peer might try to inject into the system.
sender := suite.identities.Filter(filter.HasRole[flow.Identity](flow.RoleVerification))[0]
err := suite.engine.Process(channels.PushGuarantees, sender.NodeID, guarantee)
suite.Require().NoError(err)

suite.Require().Error(err)

suite.conduit.AssertNumberOfCalls(suite.T(), "Multicast", 0)
Expand Down
6 changes: 0 additions & 6 deletions model/messages/collection.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,6 @@ import (
"github.com/onflow/flow-go/model/flow"
)

// SubmitCollectionGuarantee is a request to submit the given collection
// guarantee to consensus nodes. Only valid as a node-local message.
type SubmitCollectionGuarantee struct {
Guarantee flow.CollectionGuarantee
}

// CollectionRequest request all transactions from a collection with the given
// fingerprint.
type CollectionRequest struct {
Expand Down
25 changes: 11 additions & 14 deletions module/finalizer/collection/finalizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@ import (

"github.com/dgraph-io/badger/v2"

"github.com/onflow/flow-go/engine/collection"
"github.com/onflow/flow-go/model/cluster"
"github.com/onflow/flow-go/model/flow"
"github.com/onflow/flow-go/model/messages"
"github.com/onflow/flow-go/module"
"github.com/onflow/flow-go/module/mempool"
"github.com/onflow/flow-go/network"
"github.com/onflow/flow-go/storage/badger/operation"
"github.com/onflow/flow-go/storage/badger/procedure"
)
Expand All @@ -22,21 +21,21 @@ import (
type Finalizer struct {
db *badger.DB
transactions mempool.Transactions
prov network.Engine
pusher collection.GuaranteedCollectionPublisher
metrics module.CollectionMetrics
}

// NewFinalizer creates a new finalizer for collection nodes.
func NewFinalizer(
db *badger.DB,
transactions mempool.Transactions,
prov network.Engine,
pusher collection.GuaranteedCollectionPublisher,
metrics module.CollectionMetrics,
) *Finalizer {
f := &Finalizer{
db: db,
transactions: transactions,
prov: prov,
pusher: pusher,
metrics: metrics,
}
return f
Expand Down Expand Up @@ -159,15 +158,13 @@ func (f *Finalizer) MakeFinal(blockID flow.Identifier) error {
// For now, we just use the parent signers as the guarantors of this
// collection.

// TODO add real signatures here (2711)
f.prov.SubmitLocal(&messages.SubmitCollectionGuarantee{
Guarantee: flow.CollectionGuarantee{
CollectionID: payload.Collection.ID(),
ReferenceBlockID: payload.ReferenceBlockID,
ChainID: header.ChainID,
SignerIndices: step.ParentVoterIndices,
Signature: nil, // TODO: to remove because it's not easily verifiable by consensus nodes
},
// TODO add real signatures here (https://github.com/onflow/flow-go-internal/issues/4569)
f.pusher.SubmitCollectionGuarantee(&flow.CollectionGuarantee{
CollectionID: payload.Collection.ID(),
ReferenceBlockID: payload.ReferenceBlockID,
ChainID: header.ChainID,
SignerIndices: step.ParentVoterIndices,
Signature: nil, // TODO: to remove because it's not easily verifiable by consensus nodes
})
}

Expand Down
Loading
Loading