From a41833c466d3f3d44047a88146866e5e10b875b2 Mon Sep 17 00:00:00 2001 From: Eugene Siegel Date: Thu, 16 Nov 2023 11:02:23 -0500 Subject: [PATCH 1/7] peer: send reestablish, shutdown messages before starting writeHandler This is to avoid a potential race on WriteMessage and Flush internals. Because there is no locking on WriteMessage and Flush, if we allow writeMessage calls in Start after the writeHandler has started, the writeMessage calls may call WriteMessage/Flush at the same time that writeMessage calls from the writeHandler does. Since there is no locking, internals like b.nextHeaderSend can race and cause panics. --- peer/brontide.go | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/peer/brontide.go b/peer/brontide.go index 1810f44c2b..46954e9cd8 100644 --- a/peer/brontide.go +++ b/peer/brontide.go @@ -640,33 +640,33 @@ func (p *Brontide) Start() error { p.startTime = time.Now() - p.wg.Add(5) - go p.queueHandler() - go p.writeHandler() - go p.readHandler() - go p.channelManager() - go p.pingHandler() - - // Signal to any external processes that the peer is now active. - close(p.activeSignal) - - // Now that the peer has started up, we send any channel sync messages - // that must be resent for borked channels. + // Before launching the writeHandler goroutine, we send any channel + // sync messages that must be resent for borked channels. We do this to + // avoid data races with WriteMessage & Flush calls. if len(msgs) > 0 { p.log.Infof("Sending %d channel sync messages to peer after "+ "loading active channels", len(msgs)) // Send the messages directly via writeMessage and bypass the - // writeHandler goroutine to avoid cases where writeHandler - // may exit and cause a deadlock. + // writeHandler goroutine. for _, msg := range msgs { if err := p.writeMessage(msg); err != nil { - return fmt.Errorf("unable to send reestablish"+ - "msg: %v", err) + return fmt.Errorf("unable to send "+ + "reestablish msg: %v", err) } } } + p.wg.Add(5) + go p.queueHandler() + go p.writeHandler() + go p.readHandler() + go p.channelManager() + go p.pingHandler() + + // Signal to any external processes that the peer is now active. + close(p.activeSignal) + // Node announcements don't propagate very well throughout the network // as there isn't a way to efficiently query for them through their // timestamp, mostly affecting nodes that were offline during the time @@ -2025,6 +2025,12 @@ func (p *Brontide) logWireMessage(msg lnwire.Message, read bool) { // message buffered on the connection. It is safe to call this method again // with a nil message iff a timeout error is returned. This will continue to // flush the pending message to the wire. +// +// NOTE: +// Besides its usage in Start, this function should not be used elsewhere +// except in writeHandler. If multiple goroutines call writeMessage at the same +// time, panics can occur because WriteMessage and Flush don't use any locking +// internally. func (p *Brontide) writeMessage(msg lnwire.Message) error { // Only log the message on the first attempt. if msg != nil { From 2988c85479da3d9fc5a6d79d455da43923519d35 Mon Sep 17 00:00:00 2001 From: Olaoluwa Osuntokun Date: Thu, 16 Nov 2023 13:07:49 -0800 Subject: [PATCH 2/7] docs/release-notes: add release notes for v0.17.2 --- docs/release-notes/release-notes-0.17.2.md | 50 ++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 docs/release-notes/release-notes-0.17.2.md diff --git a/docs/release-notes/release-notes-0.17.2.md b/docs/release-notes/release-notes-0.17.2.md new file mode 100644 index 0000000000..4d68f76c61 --- /dev/null +++ b/docs/release-notes/release-notes-0.17.2.md @@ -0,0 +1,50 @@ +# Release Notes +- [Bug Fixes](#bug-fixes) +- [New Features](#new-features) + - [Functional Enhancements](#functional-enhancements) + - [RPC Additions](#rpc-additions) + - [lncli Additions](#lncli-additions) +- [Improvements](#improvements) + - [Functional Updates](#functional-updates) + - [RPC Updates](#rpc-updates) + - [lncli Updates](#lncli-updates) + - [Breaking Changes](#breaking-changes) + - [Performance Improvements](#performance-improvements) + - [Technical and Architectural Updates](#technical-and-architectural-updates) + - [BOLT Spec Updates](#bolt-spec-updates) + - [Testing](#testing) + - [Database](#database) + - [Code Health](#code-health) + - [Tooling and Documentation](#tooling-and-documentation) + +# Bug Fixes + +* A bug that would cause the peer goroutine to panic if SCB related messages + needed to be retransmitted [has been + fixed](https://github.com/lightningnetwork/lnd/pull/8186). + +# New Features +## Functional Enhancements + +## RPC Additions + + +## lncli Additions + +# Improvements +## Functional Updates +## RPC Updates +## lncli Updates +## Code Health +## Breaking Changes +## Performance Improvements + +# Technical and Architectural Updates +## BOLT Spec Updates +## Testing +## Database +## Code Health +## Tooling and Documentation + +# Contributors (Alphabetical Order) +* Eugene Siegel From 0f5e4077ba88c0739acf61c411ff38f459caf72e Mon Sep 17 00:00:00 2001 From: Matt Morehouse Date: Thu, 16 Nov 2023 17:31:18 -0600 Subject: [PATCH 3/7] peer: add missing Close() method to mockMessageConn --- peer/test_utils.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/peer/test_utils.go b/peer/test_utils.go index add15cf19d..5e288c4d39 100644 --- a/peer/test_utils.go +++ b/peer/test_utils.go @@ -561,3 +561,7 @@ func (m *mockMessageConn) RemoteAddr() net.Addr { func (m *mockMessageConn) LocalAddr() net.Addr { return nil } + +func (m *mockMessageConn) Close() error { + return nil +} From a283fc62fa15bc4fe8ecf00e17124292f768f543 Mon Sep 17 00:00:00 2001 From: Matt Morehouse Date: Thu, 16 Nov 2023 17:32:19 -0600 Subject: [PATCH 4/7] peer: enable mockMessageConn to detect data races We use unsynchronized counters to trigger a report under the race detector if multiple reads or writes happen concurrently. --- peer/test_utils.go | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/peer/test_utils.go b/peer/test_utils.go index 5e288c4d39..c7509c0c82 100644 --- a/peer/test_utils.go +++ b/peer/test_utils.go @@ -497,6 +497,16 @@ type mockMessageConn struct { readMessages chan []byte curReadMessage []byte + + // writeRaceDetectingCounter is incremented on any function call + // associated with writing to the connection. The race detector will + // trigger on this counter if a data race exists. + writeRaceDetectingCounter int + + // readRaceDetectingCounter is incremented on any function call + // associated with reading from the connection. The race detector will + // trigger on this counter if a data race exists. + readRaceDetectingCounter int } func newMockConn(t *testing.T, expectedMessages int) *mockMessageConn { @@ -509,17 +519,20 @@ func newMockConn(t *testing.T, expectedMessages int) *mockMessageConn { // SetWriteDeadline mocks setting write deadline for our conn. func (m *mockMessageConn) SetWriteDeadline(time.Time) error { + m.writeRaceDetectingCounter++ return nil } // Flush mocks a message conn flush. func (m *mockMessageConn) Flush() (int, error) { + m.writeRaceDetectingCounter++ return 0, nil } // WriteMessage mocks sending of a message on our connection. It will push // the bytes sent into the mock's writtenMessages channel. func (m *mockMessageConn) WriteMessage(msg []byte) error { + m.writeRaceDetectingCounter++ select { case m.writtenMessages <- msg: case <-time.After(timeout): @@ -542,15 +555,18 @@ func (m *mockMessageConn) assertWrite(expected []byte) { } func (m *mockMessageConn) SetReadDeadline(t time.Time) error { + m.readRaceDetectingCounter++ return nil } func (m *mockMessageConn) ReadNextHeader() (uint32, error) { + m.readRaceDetectingCounter++ m.curReadMessage = <-m.readMessages return uint32(len(m.curReadMessage)), nil } func (m *mockMessageConn) ReadNextBody(buf []byte) ([]byte, error) { + m.readRaceDetectingCounter++ return m.curReadMessage, nil } From a331308a02c3a900e24e909ec563671842ffeacb Mon Sep 17 00:00:00 2001 From: Matt Morehouse Date: Thu, 16 Nov 2023 17:33:44 -0600 Subject: [PATCH 5/7] peer: add test for startup race on writeMessage The test reliably detects https://github.com/lightningnetwork/lnd/issues/8184. --- peer/brontide_test.go | 106 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/peer/brontide_test.go b/peer/brontide_test.go index 428ea934e8..103b59ea53 100644 --- a/peer/brontide_test.go +++ b/peer/brontide_test.go @@ -1340,3 +1340,109 @@ func TestHandleRemovePendingChannel(t *testing.T) { }) } } + +// TestStartupWriteMessageRace checks that no data race occurs when starting up +// a peer with an existing channel, while an outgoing message is queuing. Such +// a race occurred in https://github.com/lightningnetwork/lnd/issues/8184, where +// a channel reestablish message raced with another outgoing message. +// +// Note that races will only be detected with the Go race detector enabled. +func TestStartupWriteMessageRace(t *testing.T) { + t.Parallel() + + // Set up parameters for createTestPeer. + notifier := &mock.ChainNotifier{ + SpendChan: make(chan *chainntnfs.SpendDetail), + EpochChan: make(chan *chainntnfs.BlockEpoch), + ConfChan: make(chan *chainntnfs.TxConfirmation), + } + broadcastTxChan := make(chan *wire.MsgTx) + mockSwitch := &mockMessageSwitch{} + + // Use a callback to extract the channel created by createTestPeer, so + // we can mark it borked below. We can't mark it borked within the + // callback, since the channel hasn't been saved to the DB yet when the + // callback executes. + var channel *channeldb.OpenChannel + getChannels := func(a, b *channeldb.OpenChannel) { + channel = a + } + + // createTestPeer creates a peer and a channel with that peer. + peer, _, err := createTestPeer( + t, notifier, broadcastTxChan, getChannels, mockSwitch, + ) + require.NoError(t, err, "unable to create test channel") + + // Avoid the need to mock the channel graph by marking the channel + // borked. Borked channels still get a reestablish message sent on + // reconnect, while skipping channel graph checks and link creation. + require.NoError(t, channel.MarkBorked()) + + // Use a mock conn to detect read/write races on the conn. + mockConn := newMockConn(t, 2) + peer.cfg.Conn = mockConn + + // Set up other configuration necessary to successfully execute + // peer.Start(). + peer.cfg.LegacyFeatures = lnwire.EmptyFeatureVector() + writeBufferPool := pool.NewWriteBuffer( + pool.DefaultWriteBufferGCInterval, + pool.DefaultWriteBufferExpiryInterval, + ) + writePool := pool.NewWrite( + writeBufferPool, 1, timeout, + ) + require.NoError(t, writePool.Start()) + peer.cfg.WritePool = writePool + readBufferPool := pool.NewReadBuffer( + pool.DefaultReadBufferGCInterval, + pool.DefaultReadBufferExpiryInterval, + ) + readPool := pool.NewRead( + readBufferPool, 1, timeout, + ) + require.NoError(t, readPool.Start()) + peer.cfg.ReadPool = readPool + + // Send a message while starting the peer. As the peer starts up, it + // should not trigger a data race between the sending of this message + // and the sending of the channel reestablish message. + sendPingDone := make(chan struct{}) + go func() { + require.NoError(t, peer.SendMessage(true, lnwire.NewPing(0))) + close(sendPingDone) + }() + + // Handle init messages. + go func() { + // Read init message. + <-mockConn.writtenMessages + + // Write the init reply message. + initReplyMsg := lnwire.NewInitMessage( + lnwire.NewRawFeatureVector( + lnwire.DataLossProtectRequired, + ), + lnwire.NewRawFeatureVector(), + ) + var b bytes.Buffer + _, err = lnwire.WriteMessage(&b, initReplyMsg, 0) + require.NoError(t, err) + + mockConn.readMessages <- b.Bytes() + }() + + // Start the peer. No data race should occur. + require.NoError(t, peer.Start()) + + // Ensure messages were sent during startup. + <-sendPingDone + for i := 0; i < 2; i++ { + select { + case <-mockConn.writtenMessages: + default: + t.Fatalf("Failed to send all messages during startup") + } + } +} From 01761ab99ba74d13024195e68580cb7ceca895f2 Mon Sep 17 00:00:00 2001 From: Olaoluwa Osuntokun Date: Fri, 17 Nov 2023 18:09:17 -0600 Subject: [PATCH 6/7] docs/release-notes: update release notes for v0.17.2 --- docs/release-notes/release-notes-0.17.2.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/release-notes/release-notes-0.17.2.md b/docs/release-notes/release-notes-0.17.2.md index 4d68f76c61..490dbbcf55 100644 --- a/docs/release-notes/release-notes-0.17.2.md +++ b/docs/release-notes/release-notes-0.17.2.md @@ -48,3 +48,4 @@ # Contributors (Alphabetical Order) * Eugene Siegel +* Matt Morehouse From 0aaf144b768d61255fc6909c75a0e1604b66471e Mon Sep 17 00:00:00 2001 From: Olaoluwa Osuntokun Date: Fri, 17 Nov 2023 18:09:34 -0600 Subject: [PATCH 7/7] build: bump version to v0.17.2-beta.rc2 --- build/version.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build/version.go b/build/version.go index 840c58a8a9..74c31c0f6a 100644 --- a/build/version.go +++ b/build/version.go @@ -43,11 +43,11 @@ const ( AppMinor uint = 17 // AppPatch defines the application patch for this binary. - AppPatch uint = 1 + AppPatch uint = 2 // AppPreRelease MUST only contain characters from semanticAlphabet per // the semantic versioning spec. - AppPreRelease = "beta" + AppPreRelease = "beta.rc1" ) func init() {