Skip to content

Commit

Permalink
MB-35073: Release StreamContainer lock before calling Stream::setDead
Browse files Browse the repository at this point in the history
TSan found lock inversion as DcpProducer::closeAllStreams() holds
`StreamContainer->wlock()` and then acquires `vb->getStateLock()`
whereas `VBucket::set()` acquires them in the opposite order.

Release the stream container lock before calling `Stream::setDead()` to
avoid holding both in the `closeAllStreams()` path.

Also, preemptively apply the same change to `setStreamDeadStatus`
though TSan has not identified inversion in this case.

TSan report:
[ RUN      ] DurabilityTest.MB34780
==================
WARNING: ThreadSanitizer: lock-order-inversion (potential deadlock) (pid=16422)
  Cycle in lock order graph: M3987 (0x7b68000308f8) => M225878274331574312 (0x000000000000) => M3987

  Mutex M225878274331574312 acquired here while holding mutex M3987 in thread T7:
    #0 pthread_rwlock_rdlock <null> (libtsan.so.0+0x00000002953b)
    couchbase#1 cb_rw_reader_enter(pthread_rwlock_t*) .../platform/src/cb_pthreads.cc:195 (libplatform_so.so.0.1.0+0x000000009cfa)
    couchbase#2 cb::RWLock::readerLock() .../platform/include/platform/rwlock.h:87 (ep.so+0x0000000f423a)
    couchbase#3 cb::RWLock::lock_shared() .../platform/include/platform/rwlock.h:67 (ep.so+0x00000012f578)
    couchbase#4 std::shared_lock<cb::RWLock>::shared_lock(cb::RWLock&) /usr/local/include/c++/7.3.0/shared_mutex:553 (ep.so+0x00000012f578)
    couchbase#5 StreamContainer<std::shared_ptr<Stream> >::ReadLockedHandle::ReadLockedHandle(StreamContainer<std::shared_ptr<Stream> > const&) .../kv_engine/engines/ep/src/dcp/stream_container.h:213 (ep.so+0x00000012f578)
    couchbase#6 StreamContainer<std::shared_ptr<Stream> >::rlock() const .../kv_engine/engines/ep/src/dcp/stream_container.h:273 (ep.so+0x000000122ea7)
    #7 DcpProducer::notifySeqnoAvailable(Vbid, unsigned long) .../kv_engine/engines/ep/src/dcp/producer.cc:1312 (ep.so+0x000000122ea7)
    #8 DcpConnMap::notifyVBConnections(Vbid, unsigned long) .../kv_engine/engines/ep/src/dcp/dcpconnmap.cc:424 (ep.so+0x0000000fa071)
    #9 KVBucket::notifyReplication(Vbid, long) .../kv_engine/engines/ep/src/kv_bucket.cc:2570 (ep.so+0x000000210711)
    #10 EPBucket::notifyNewSeqno(Vbid, VBNotifyCtx const&) .../kv_engine/engines/ep/src/ep_bucket.cc:1327 (ep.so+0x00000016232b)
    #11 NotifyNewSeqnoCB::callback(Vbid const&, VBNotifyCtx const&) .../kv_engine/engines/ep/src/kv_bucket.h:837 (ep.so+0x0000002267d9)
    #12 VBucket::notifyNewSeqno(VBNotifyCtx const&) .../kv_engine/engines/ep/src/vbucket.cc:3631 (ep.so+0x000000264871)
    #13 VBucket::set() .../kv_engine/engines/ep/src/vbucket.cc:1568 (ep.so+0x00000026c768)
    #14 KVBucket::set() .../kv_engine/engines/ep/src/kv_bucket.cc:692 (ep.so+0x000000220856)
    #15 EventuallyPersistentEngine::storeIfInner() .../kv_engine/engines/ep/src/ep_engine.cc:2440 (ep.so+0x000000181fef)

  Mutex M3987 previously acquired by the same thread here:
    #0 AnnotateRWLockAcquired <null> (libtsan.so.0+0x00000005b63d)
    couchbase#1 folly::detail::annotate_rwlock_acquired_impl(void const volatile*, folly::annotate_rwlock_level, char const*, int) .../folly/follytsan-prefix/src/follytsan/folly/synchronization/SanitizeThread.cpp:91 (memcached+0x0000006463de)
    couchbase#2 annotate_rwlock_acquired .../build/tlm/deps/folly.exploded/include/folly/synchronization/SanitizeThread.h:99 (ep.so+0x000000220340)
    couchbase#3 folly::SharedMutexImpl<false, void, std::atomic, false, true>::annotateAcquired(folly::annotate_rwlock_level) .../build/tlm/deps/folly.exploded/include/folly/SharedMutex.h:696 (ep.so+0x000000220340)
    couchbase#4 folly::SharedMutexImpl<false, void, std::atomic, false, true>::lock_shared(folly::SharedMutexToken&) .../build/tlm/deps/folly.exploded/include/folly/SharedMutex.h:376 (ep.so+0x000000220340)
    couchbase#5 folly::SharedMutexImpl<false, void, std::atomic, false, true>::ReadHolder::ReadHolder(folly::SharedMutexImpl<false, void, std::atomic, false, true> const&) .../build/tlm/deps/folly.exploded/include/folly/SharedMutex.h:1315 (ep.so+0x000000220340)
    couchbase#6 KVBucket::set() .../kv_engine/engines/ep/src/kv_bucket.cc:659 (ep.so+0x000000220340)
    #7 EventuallyPersistentEngine::storeIfInner() .../kv_engine/engines/ep/src/ep_engine.cc:2440 (ep.so+0x000000181fef)

  Mutex M3987 acquired here while holding mutex M225878274331574312 in thread T5:
    #0 AnnotateRWLockAcquired <null> (libtsan.so.0+0x00000005b63d)
    couchbase#1 folly::detail::annotate_rwlock_acquired_impl(void const volatile*, folly::annotate_rwlock_level, char const*, int) .../folly/follytsan-prefix/src/follytsan/folly/synchronization/SanitizeThread.cpp:91 (memcached+0x0000006463de)
    couchbase#2 annotate_rwlock_acquired .../build/tlm/deps/folly.exploded/include/folly/synchronization/SanitizeThread.h:99 (ep.so+0x0000000bb626)
    couchbase#3 folly::SharedMutexImpl<false, void, std::atomic, false, true>::annotateAcquired(folly::annotate_rwlock_level) .../build/tlm/deps/folly.exploded/include/folly/SharedMutex.h:696 (ep.so+0x0000000bb626)
    couchbase#4 folly::SharedMutexImpl<false, void, std::atomic, false, true>::lock_shared(folly::SharedMutexToken&) .../build/tlm/deps/folly.exploded/include/folly/SharedMutex.h:376 (ep.so+0x0000000bb626)
    couchbase#5 folly::SharedMutexImpl<false, void, std::atomic, false, true>::ReadHolder::ReadHolder(folly::SharedMutexImpl<false, void, std::atomic, false, true> const&) .../build/tlm/deps/folly.exploded/include/folly/SharedMutex.h:1315 (ep.so+0x0000000bb626)
    couchbase#6 ActiveStream::setDead(end_stream_status_t) .../kv_engine/engines/ep/src/dcp/active_stream.cc:1181 (ep.so+0x0000000bb626)
    #7 operator() .../kv_engine/engines/ep/src/dcp/producer.cc:1383 (ep.so+0x0000001257d1)
    #8 for_each<...> /usr/local/include/c++/7.3.0/bits/stl_algo.h:3884 (ep.so+0x0000001257d1)
    #9 DcpProducer::closeAllStreams() .../kv_engine/engines/ep/src/dcp/producer.cc:1377 (ep.so+0x000000125c00)

  Mutex M225878274331574312 previously acquired by the same thread here:
    #0 pthread_rwlock_wrlock <null> (libtsan.so.0+0x0000000297eb)
    couchbase#1 cb_rw_writer_enter(pthread_rwlock_t*) .../platform/src/cb_pthreads.cc:217 (libplatform_so.so.0.1.0+0x000000009e80)
    couchbase#2 cb::RWLock::writerLock() .../platform/include/platform/rwlock.h:103 (ep.so+0x000000125597)
    couchbase#3 cb::RWLock::lock() .../platform/include/platform/rwlock.h:77 (ep.so+0x000000125597)
    couchbase#4 std::unique_lock<cb::RWLock>::lock() /usr/local/include/c++/7.3.0/bits/std_mutex.h:267 (ep.so+0x000000125597)
    couchbase#5 std::unique_lock<cb::RWLock>::unique_lock(cb::RWLock&) /usr/local/include/c++/7.3.0/bits/std_mutex.h:197 (ep.so+0x000000125597)
    couchbase#6 StreamContainer<std::shared_ptr<Stream> >::WriteLockedHandle::WriteLockedHandle(StreamContainer<std::shared_ptr<Stream> >&) .../kv_engine/engines/ep/src/dcp/stream_container.h:237 (ep.so+0x000000125597)
    #7 StreamContainer<std::shared_ptr<Stream> >::wlock() .../kv_engine/engines/ep/src/dcp/stream_container.h:277 (ep.so+0x000000125597)
    #8 operator() .../kv_engine/engines/ep/src/dcp/producer.cc:1381 (ep.so+0x000000125597)
    #9 for_each<...> /usr/local/include/c++/7.3.0/bits/stl_algo.h:3884 (ep.so+0x000000125597)
    #10 DcpProducer::closeAllStreams() .../kv_engine/engines/ep/src/dcp/producer.cc:1377 (ep.so+0x000000125c00)

Change-Id: Icc15e74e80d7f1926ce6c75bbdd8aa1c43f5ca2c
Reviewed-on: http://review.couchbase.org/111989
Reviewed-by: Dave Rigby <[email protected]>
Tested-by: Dave Rigby <[email protected]>
  • Loading branch information
jameseh96 authored and daverigby committed Jul 17, 2019
1 parent 45c199e commit f1dc64e
Showing 1 changed file with 27 additions and 9 deletions.
36 changes: 27 additions & 9 deletions engines/ep/src/dcp/producer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1358,12 +1358,20 @@ bool DcpProducer::setStreamDeadStatus(Vbid vbid,
end_stream_status_t status) {
auto rv = streams.find(vbid.get());
if (rv != streams.end()) {
for (auto handle = rv->second->rlock(); !handle.end(); handle.next()) {
if (handle.get()->compareStreamId(sid)) {
handle.get()->setDead(status);
return true;
std::shared_ptr<Stream> streamPtr;
// MB-35073: holding StreamContainer rlock while calling setDead
// has been seen to cause lock inversion elsewhere.
// Collect sharedptr then setDead once lock is released (itr out of
// scope).
for (auto itr = rv->second->rlock(); !itr.end(); itr.next()) {
if (itr.get()->compareStreamId(sid)) {
streamPtr = itr.get();
break;
}
}
if (streamPtr) {
streamPtr->setDead(status);
}
return true;
}

Expand All @@ -1378,12 +1386,22 @@ void DcpProducer::closeAllStreams() {
streams.end(),
[&vbvector](StreamsMap::value_type& vt) {
vbvector.push_back((Vbid)vt.first);
auto handle = vt.second->wlock();
while (!handle.end()) {
handle.get()->setDead(END_STREAM_DISCONNECTED);
handle.next();
std::vector<std::shared_ptr<Stream>> streamPtrs;
// MB-35073: holding StreamContainer lock while
// calling setDead leads to lock inversion - so
// collect sharedptrs in one pass then setDead once
// lock is released (itr out of scope).
{
auto handle = vt.second->wlock();
for (; !handle.end(); handle.next()) {
streamPtrs.push_back(handle.get());
}
handle.clear();
}

for (auto streamPtr : streamPtrs) {
streamPtr->setDead(END_STREAM_DISCONNECTED);
}
handle.clear();
});
}
for (const auto vbid: vbvector) {
Expand Down

0 comments on commit f1dc64e

Please sign in to comment.