Source file src/runtime/testdata/testgoroutineleakprofile/goker/cockroach10214.go

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a MIT
     3  // license that can be found in the LICENSE file.
     4  
     5  /*
     6   * Project: cockroach
     7   * Issue or PR  : https://github.com/cockroachdb/cockroach/pull/10214
     8   * Buggy version: 7207111aa3a43df0552509365fdec741a53f873f
     9   * fix commit-id: 27e863d90ab0660494778f1c35966cc5ddc38e32
    10   * Flaky: 3/100
    11   * Description: This goroutine leak is caused by different order when acquiring
    12   * coalescedMu.Lock() and raftMu.Lock(). The fix is to refactor sendQueuedHeartbeats()
    13   * so that cockroachdb can unlock coalescedMu before locking raftMu.
    14   */
    15  package main
    16  
    17  import (
    18  	"os"
    19  	"runtime/pprof"
    20  	"sync"
    21  	"time"
    22  	"unsafe"
    23  )
    24  
    25  func init() {
    26  	register("Cockroach10214", Cockroach10214)
    27  }
    28  
    29  type Store_cockroach10214 struct {
    30  	coalescedMu struct {
    31  		sync.Mutex // L1
    32  		heartbeatResponses []int
    33  	}
    34  	mu struct {
    35  		replicas map[int]*Replica_cockroach10214
    36  	}
    37  }
    38  
    39  func (s *Store_cockroach10214) sendQueuedHeartbeats() {
    40  	s.coalescedMu.Lock() // L1 acquire
    41  	defer s.coalescedMu.Unlock() // L2 release
    42  	for i := 0; i < len(s.coalescedMu.heartbeatResponses); i++ {
    43  		s.sendQueuedHeartbeatsToNode() // L2
    44  	}
    45  }
    46  
    47  func (s *Store_cockroach10214) sendQueuedHeartbeatsToNode() {
    48  	for i := 0; i < len(s.mu.replicas); i++ {
    49  		r := s.mu.replicas[i]
    50  		r.reportUnreachable() // L2
    51  	}
    52  }
    53  
    54  type Replica_cockroach10214 struct {
    55  	raftMu sync.Mutex // L2
    56  	mu     sync.Mutex // L3
    57  	store  *Store_cockroach10214
    58  }
    59  
    60  func (r *Replica_cockroach10214) reportUnreachable() {
    61  	r.raftMu.Lock() // L2 acquire
    62  	time.Sleep(time.Millisecond)
    63  	defer r.raftMu.Unlock() // L2 release
    64  }
    65  
    66  func (r *Replica_cockroach10214) tick() {
    67  	r.raftMu.Lock() // L2 acquire
    68  	defer r.raftMu.Unlock() // L2 release
    69  	r.tickRaftMuLocked()
    70  }
    71  
    72  func (r *Replica_cockroach10214) tickRaftMuLocked() {
    73  	r.mu.Lock() // L3 acquire
    74  	defer r.mu.Unlock() // L3 release
    75  	if r.maybeQuiesceLocked() {
    76  		return
    77  	}
    78  }
    79  
    80  func (r *Replica_cockroach10214) maybeQuiesceLocked() bool {
    81  	for i := 0; i < 2; i++ {
    82  		if !r.maybeCoalesceHeartbeat() {
    83  			return true
    84  		}
    85  	}
    86  	return false
    87  }
    88  
    89  func (r *Replica_cockroach10214) maybeCoalesceHeartbeat() bool {
    90  	msgtype := uintptr(unsafe.Pointer(r)) % 3
    91  	switch msgtype {
    92  	case 0, 1, 2:
    93  		r.store.coalescedMu.Lock() // L1 acquire
    94  	default:
    95  		return false
    96  	}
    97  	r.store.coalescedMu.Unlock() // L1 release
    98  	return true
    99  }
   100  
   101  func Cockroach10214() {
   102  	prof := pprof.Lookup("goroutineleak")
   103  	defer func() {
   104  		time.Sleep(100 * time.Millisecond)
   105  		prof.WriteTo(os.Stdout, 2)
   106  	}()
   107  	for i := 0; i < 1000; i++ {
   108  		go func() {
   109  			store := &Store_cockroach10214{}
   110  			responses := &store.coalescedMu.heartbeatResponses
   111  			*responses = append(*responses, 1, 2)
   112  			store.mu.replicas = make(map[int]*Replica_cockroach10214)
   113  
   114  			rp1 := &Replica_cockroach10214{ // L2,3[0]
   115  				store: store,
   116  			}
   117  			rp2 := &Replica_cockroach10214{ // L2,3[1]
   118  				store: store,
   119  			}
   120  			store.mu.replicas[0] = rp1
   121  			store.mu.replicas[1] = rp2
   122  
   123  			go store.sendQueuedHeartbeats() // G1
   124  			go rp1.tick()                   // G2
   125  		}()
   126  	}
   127  }
   128  
   129  // Example of goroutine leak trace:
   130  //
   131  // G1                                      G2
   132  //------------------------------------------------------------------------------------
   133  // s.sendQueuedHeartbeats()                .
   134  // s.coalescedMu.Lock() [L1]               .
   135  // s.sendQueuedHeartbeatsToNode()          .
   136  // s.mu.replicas[0].reportUnreachable()    .
   137  // s.mu.replicas[0].raftMu.Lock() [L2]     .
   138  // .                                       s.mu.replicas[0].tick()
   139  // .                                       s.mu.replicas[0].raftMu.Lock() [L2]
   140  // .                                       s.mu.replicas[0].tickRaftMuLocked()
   141  // .                                       s.mu.replicas[0].mu.Lock() [L3]
   142  // .                                       s.mu.replicas[0].maybeQuiesceLocked()
   143  // .                                       s.mu.replicas[0].maybeCoalesceHeartbeat()
   144  // .                                       s.coalescedMu.Lock() [L1]
   145  //--------------------------------G1,G2 leak------------------------------------------

View as plain text