Source file src/runtime/goroutineleakprofile_test.go

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime_test
     6  
     7  import (
     8  	"fmt"
     9  	"internal/testenv"
    10  	"os"
    11  	"regexp"
    12  	"strings"
    13  	"testing"
    14  )
    15  
    16  func TestGoroutineLeakProfile(t *testing.T) {
    17  	if strings.Contains(os.Getenv("GOFLAGS"), "mayMoreStackPreempt") {
    18  		// Some tests have false negatives under mayMoreStackPreempt. This may be a test-only issue,
    19  		// but needs more investigation.
    20  		testenv.SkipFlaky(t, 75729)
    21  	}
    22  
    23  	// Goroutine leak test case.
    24  	//
    25  	// Test cases can be configured with test name, the name of the entry point function,
    26  	// a set of expected leaks identified by regular expressions, and the number of times
    27  	// the test should be repeated.
    28  	//
    29  	// Repeated runs reduce flakiness in some tests.
    30  	type testCase struct {
    31  		name          string
    32  		simple        bool
    33  		repetitions   int
    34  		expectedLeaks map[*regexp.Regexp]bool
    35  
    36  		// flakyLeaks are goroutine leaks that are too flaky to be reliably detected.
    37  		// Still, they might pop up every once in a while. The test will pass regardless
    38  		// if they occur or nor, as they are not unexpected.
    39  		//
    40  		// Note that all flaky leaks are true positives, i.e. real goroutine leaks,
    41  		// and it is only their detection that is unreliable due to scheduling
    42  		// non-determinism.
    43  		flakyLeaks map[*regexp.Regexp]struct{}
    44  	}
    45  
    46  	// makeAnyTest is a short-hand for creating test cases.
    47  	// Each of the leaks in the list is identified by a regular expression.
    48  	// If a leak is flaky, it is added to the flakyLeaks map.
    49  	makeAnyTest := func(name string, flaky bool, repetitions int, leaks ...string) testCase {
    50  		tc := testCase{
    51  			name:          name,
    52  			expectedLeaks: make(map[*regexp.Regexp]bool, len(leaks)),
    53  			flakyLeaks:    make(map[*regexp.Regexp]struct{}, len(leaks)),
    54  			// Make sure the test is repeated at least once.
    55  			repetitions: repetitions | 1,
    56  		}
    57  
    58  		for _, leak := range leaks {
    59  			if !flaky {
    60  				tc.expectedLeaks[regexp.MustCompile(leak)] = false
    61  			} else {
    62  				tc.flakyLeaks[regexp.MustCompile(leak)] = struct{}{}
    63  			}
    64  		}
    65  
    66  		return tc
    67  	}
    68  
    69  	// makeTest is a short-hand for creating non-flaky test cases.
    70  	makeTest := func(name string, leaks ...string) testCase {
    71  		tcase := makeAnyTest(name, false, 2, leaks...)
    72  		tcase.simple = true
    73  		return tcase
    74  	}
    75  
    76  	// makeFlakyTest is a short-hand for creating flaky test cases.
    77  	makeFlakyTest := func(name string, leaks ...string) testCase {
    78  		if testing.Short() {
    79  			return makeAnyTest(name, true, 2, leaks...)
    80  		}
    81  		return makeAnyTest(name, true, 10, leaks...)
    82  	}
    83  
    84  	goroutineHeader := regexp.MustCompile(`goroutine \d+ \[`)
    85  
    86  	// extractLeaks takes the output of a test and splits it into a
    87  	// list of strings denoting goroutine leaks.
    88  	//
    89  	// If the input is:
    90  	//
    91  	// goroutine 1 [wait reason (leaked)]:
    92  	// main.leaked()
    93  	// 	./testdata/testgoroutineleakprofile/foo.go:37 +0x100
    94  	// created by main.main()
    95  	// 	./testdata/testgoroutineleakprofile/main.go:10 +0x20
    96  	//
    97  	// goroutine 2 [wait reason (leaked)]:
    98  	// main.leaked2()
    99  	// 	./testdata/testgoroutineleakprofile/foo.go:37 +0x100
   100  	// created by main.main()
   101  	// 	./testdata/testgoroutineleakprofile/main.go:10 +0x20
   102  	//
   103  	// The output is (as a list of strings):
   104  	//
   105  	// leaked() [wait reason]
   106  	// leaked2() [wait reason]
   107  	extractLeaks := func(output string) []string {
   108  		stacks := strings.Split(output, "\n\ngoroutine")
   109  		var leaks []string
   110  		for _, stack := range stacks {
   111  			lines := strings.Split(stack, "\n")
   112  			if len(lines) < 5 {
   113  				// Expecting at least the following lines (where n=len(lines)-1):
   114  				//
   115  				// [0] goroutine n [wait reason (leaked)]
   116  				// ...
   117  				// [n-3] bottom.leak.frame(...)
   118  				// [n-2]  ./bottom/leak/frame/source.go:line
   119  				// [n-1] created by go.instruction()
   120  				// [n] 	  ./go/instruction/source.go:line
   121  				continue
   122  			}
   123  
   124  			if !strings.Contains(lines[0], "(leaked)") {
   125  				// Ignore non-leaked goroutines.
   126  				continue
   127  			}
   128  
   129  			// Get the wait reason from the goroutine header.
   130  			header := lines[0]
   131  			waitReason := goroutineHeader.ReplaceAllString(header, "[")
   132  			waitReason = strings.ReplaceAll(waitReason, " (leaked)", "")
   133  
   134  			// Get the function name from the stack trace (should be two lines above `created by`).
   135  			var funcName string
   136  			for i := len(lines) - 1; i >= 0; i-- {
   137  				if strings.Contains(lines[i], "created by") {
   138  					funcName = strings.TrimPrefix(lines[i-2], "main.")
   139  					break
   140  				}
   141  			}
   142  			if funcName == "" {
   143  				t.Fatalf("failed to extract function name from stack trace: %s", lines)
   144  			}
   145  
   146  			leaks = append(leaks, funcName+" "+waitReason)
   147  		}
   148  		return leaks
   149  	}
   150  
   151  	// Micro tests involve very simple leaks for each type of concurrency primitive operation.
   152  	microTests := []testCase{
   153  		makeTest("NilRecv",
   154  			`NilRecv\.func1\(.* \[chan receive \(nil chan\)\]`,
   155  		),
   156  		makeTest("NilSend",
   157  			`NilSend\.func1\(.* \[chan send \(nil chan\)\]`,
   158  		),
   159  		makeTest("SelectNoCases",
   160  			`SelectNoCases\.func1\(.* \[select \(no cases\)\]`,
   161  		),
   162  		makeTest("ChanRecv",
   163  			`ChanRecv\.func1\(.* \[chan receive\]`,
   164  		),
   165  		makeTest("ChanSend",
   166  			`ChanSend\.func1\(.* \[chan send\]`,
   167  		),
   168  		makeTest("Select",
   169  			`Select\.func1\(.* \[select\]`,
   170  		),
   171  		makeTest("WaitGroup",
   172  			`WaitGroup\.func1\(.* \[sync\.WaitGroup\.Wait\]`,
   173  		),
   174  		makeTest("MutexStack",
   175  			`MutexStack\.func1\(.* \[sync\.Mutex\.Lock\]`,
   176  		),
   177  		makeTest("MutexHeap",
   178  			`MutexHeap\.func1.1\(.* \[sync\.Mutex\.Lock\]`,
   179  		),
   180  		makeTest("Cond",
   181  			`Cond\.func1\(.* \[sync\.Cond\.Wait\]`,
   182  		),
   183  		makeTest("RWMutexRLock",
   184  			`RWMutexRLock\.func1\(.* \[sync\.RWMutex\.RLock\]`,
   185  		),
   186  		makeTest("RWMutexLock",
   187  			`RWMutexLock\.func1\(.* \[sync\.(RW)?Mutex\.Lock\]`,
   188  		),
   189  		makeTest("Mixed",
   190  			`Mixed\.func1\(.* \[sync\.WaitGroup\.Wait\]`,
   191  			`Mixed\.func1.1\(.* \[chan send\]`,
   192  		),
   193  		makeTest("NoLeakGlobal"),
   194  	}
   195  
   196  	// Stress tests are flaky and we do not strictly care about their output.
   197  	// They are only intended to stress the goroutine leak detector and profiling
   198  	// infrastructure in interesting ways.
   199  	stressTestCases := []testCase{
   200  		makeFlakyTest("SpawnGC",
   201  			`spawnGC.func1\(.* \[chan receive\]`,
   202  		),
   203  		makeTest("DaisyChain"),
   204  	}
   205  
   206  	// Common goroutine leak patterns.
   207  	// Extracted from "Unveiling and Vanquishing Goroutine Leaks in Enterprise Microservices: A Dynamic Analysis Approach"
   208  	// doi:10.1109/CGO57630.2024.10444835
   209  	patternTestCases := []testCase{
   210  		makeTest("NoCloseRange",
   211  			`noCloseRange\(.* \[chan send\]`,
   212  			`noCloseRange\.func1\(.* \[chan receive\]`,
   213  		),
   214  		makeTest("MethodContractViolation",
   215  			`worker\.Start\.func1\(.* \[select\]`,
   216  		),
   217  		makeTest("DoubleSend",
   218  			`DoubleSend\.func3\(.* \[chan send\]`,
   219  		),
   220  		makeTest("EarlyReturn",
   221  			`earlyReturn\.func1\(.* \[chan send\]`,
   222  		),
   223  		makeTest("NCastLeak",
   224  			`nCastLeak\.func1\(.* \[chan send\]`,
   225  			`NCastLeak\.func2\(.* \[chan receive\]`,
   226  		),
   227  		makeTest("Timeout",
   228  			// (vsaioc): Timeout is *theoretically* flaky, but the
   229  			// pseudo-random choice for select case branches makes it
   230  			// practically impossible for it to fail.
   231  			`timeout\.func1\(.* \[chan send\]`,
   232  		),
   233  	}
   234  
   235  	// GoKer tests from "GoBench: A Benchmark Suite of Real-World Go Concurrency Bugs".
   236  	// Refer to testdata/testgoroutineleakprofile/goker/README.md.
   237  	//
   238  	// This list is curated for tests that are not excessively flaky.
   239  	// Some tests are also excluded because they are redundant.
   240  	//
   241  	// TODO(vsaioc): Some of these might be removable (their patterns may overlap).
   242  	gokerTestCases := []testCase{
   243  		makeFlakyTest("Cockroach584",
   244  			`Cockroach584\.func2\(.* \[sync\.Mutex\.Lock\]`,
   245  		),
   246  		makeFlakyTest("Cockroach1055",
   247  			`Cockroach1055\.func2\(.* \[chan receive\]`,
   248  			`Cockroach1055\.func2\.2\(.* \[sync\.WaitGroup\.Wait\]`,
   249  			`Cockroach1055\.func2\.1\(.* \[chan receive\]`,
   250  			`Cockroach1055\.func2\.1\(.* \[sync\.Mutex\.Lock\]`,
   251  		),
   252  		makeFlakyTest("Cockroach1462",
   253  			`\(\*Stopper_cockroach1462\)\.RunWorker\.func1\(.* \[chan send\]`,
   254  			`Cockroach1462\.func2\(.* \[sync\.WaitGroup\.Wait\]`,
   255  		),
   256  		makeFlakyTest("Cockroach2448",
   257  			`\(\*Store_cockroach2448\)\.processRaft\(.* \[select\]`,
   258  			`\(\*state_cockroach2448\)\.start\(.* \[select\]`,
   259  		),
   260  		makeFlakyTest("Cockroach3710",
   261  			`\(\*Store_cockroach3710\)\.ForceRaftLogScanAndProcess\(.* \[sync\.RWMutex\.RLock\]`,
   262  			`\(\*Store_cockroach3710\)\.processRaft\.func1\(.* \[sync\.RWMutex\.Lock\]`,
   263  		),
   264  		makeFlakyTest("Cockroach6181",
   265  			`testRangeCacheCoalescedRequests_cockroach6181\(.* \[sync\.WaitGroup\.Wait\]`,
   266  			`testRangeCacheCoalescedRequests_cockroach6181\.func1\.1\(.* \[sync\.(RW)?Mutex\.Lock\]`,
   267  			`testRangeCacheCoalescedRequests_cockroach6181\.func1\.1\(.* \[sync\.RWMutex\.RLock\]`,
   268  		),
   269  		makeTest("Cockroach7504",
   270  			`Cockroach7504\.func2\.1.* \[sync\.Mutex\.Lock\]`,
   271  			`Cockroach7504\.func2\.2.* \[sync\.Mutex\.Lock\]`,
   272  		),
   273  		makeFlakyTest("Cockroach9935",
   274  			`\(\*loggingT_cockroach9935\)\.outputLogEntry\(.* \[sync\.Mutex\.Lock\]`,
   275  		),
   276  		makeFlakyTest("Cockroach10214",
   277  			`\(*Store_cockroach10214\)\.sendQueuedHeartbeats\(.* \[sync\.Mutex\.Lock\]`,
   278  			`\(*Replica_cockroach10214\)\.tick\(.* \[sync\.Mutex\.Lock\]`,
   279  		),
   280  		makeFlakyTest("Cockroach10790",
   281  			`\(\*Replica_cockroach10790\)\.beginCmds\.func1\(.* \[chan receive\]`,
   282  		),
   283  		makeTest("Cockroach13197",
   284  			`\(\*Tx_cockroach13197\)\.awaitDone\(.* \[chan receive\]`,
   285  		),
   286  		makeTest("Cockroach13755",
   287  			`\(\*Rows_cockroach13755\)\.awaitDone\(.* \[chan receive\]`,
   288  		),
   289  		makeFlakyTest("Cockroach16167",
   290  			`Cockroach16167\.func2\(.* \[sync\.RWMutex\.RLock\]`,
   291  			`\(\*Executor_cockroach16167\)\.Start\(.* \[sync\.RWMutex\.Lock\]`,
   292  		),
   293  		makeFlakyTest("Cockroach18101",
   294  			`restore_cockroach18101\.func1\(.* \[chan send\]`,
   295  		),
   296  		makeTest("Cockroach24808",
   297  			`Cockroach24808\.func2\(.* \[chan send\]`,
   298  		),
   299  		makeTest("Cockroach25456",
   300  			`Cockroach25456\.func2\(.* \[chan receive\]`,
   301  		),
   302  		makeTest("Cockroach35073",
   303  			`Cockroach35073\.func2.1\(.* \[chan send\]`,
   304  			`Cockroach35073\.func2\(.* \[chan send\]`,
   305  		),
   306  		makeTest("Cockroach35931",
   307  			`Cockroach35931\.func2\(.* \[chan send\]`,
   308  		),
   309  		makeTest("Etcd5509",
   310  			`Etcd5509\.func2\(.* \[sync\.RWMutex\.Lock\]`,
   311  		),
   312  		makeTest("Etcd6708",
   313  			`Etcd6708\.func2\(.* \[sync\.RWMutex\.RLock\]`,
   314  		),
   315  		makeFlakyTest("Etcd6857",
   316  			`\(\*node_etcd6857\)\.Status\(.* \[chan send\]`,
   317  		),
   318  		makeFlakyTest("Etcd6873",
   319  			`\(\*watchBroadcasts_etcd6873\)\.stop\(.* \[chan receive\]`,
   320  			`newWatchBroadcasts_etcd6873\.func1\(.* \[sync\.Mutex\.Lock\]`,
   321  		),
   322  		makeFlakyTest("Etcd7492",
   323  			`Etcd7492\.func2\(.* \[sync\.WaitGroup\.Wait\]`,
   324  			`Etcd7492\.func2\.1\(.* \[chan send\]`,
   325  			`\(\*simpleTokenTTLKeeper_etcd7492\)\.run\(.* \[sync\.Mutex\.Lock\]`,
   326  		),
   327  		makeFlakyTest("Etcd7902",
   328  			`doRounds_etcd7902\.func1\(.* \[chan receive\]`,
   329  			`doRounds_etcd7902\.func1\(.* \[sync\.Mutex\.Lock\]`,
   330  			`runElectionFunc_etcd7902\(.* \[sync\.WaitGroup\.Wait\]`,
   331  		),
   332  		makeTest("Etcd10492",
   333  			`Etcd10492\.func2\(.* \[sync\.Mutex\.Lock\]`,
   334  		),
   335  		makeTest("Grpc660",
   336  			`\(\*benchmarkClient_grpc660\)\.doCloseLoopUnary\.func1\(.* \[chan send\]`,
   337  		),
   338  		makeFlakyTest("Grpc795",
   339  			`\(\*Server_grpc795\)\.Serve\(.* \[sync\.Mutex\.Lock\]`,
   340  			`testServerGracefulStopIdempotent_grpc795\(.* \[sync\.Mutex\.Lock\]`,
   341  		),
   342  		makeTest("Grpc862",
   343  			`DialContext_grpc862\.func2\(.* \[chan receive\]`),
   344  		makeTest("Grpc1275",
   345  			`testInflightStreamClosing_grpc1275\.func1\(.* \[chan receive\]`),
   346  		makeTest("Grpc1424",
   347  			`DialContext_grpc1424\.func1\(.* \[chan receive\]`),
   348  		makeFlakyTest("Grpc1460",
   349  			`\(\*http2Client_grpc1460\)\.keepalive\(.* \[chan receive\]`,
   350  			`\(\*http2Client_grpc1460\)\.NewStream\(.* \[sync\.Mutex\.Lock\]`,
   351  		),
   352  		makeFlakyTest("Grpc3017",
   353  			// grpc/3017 involves a goroutine leak that also simultaneously engages many GC assists.
   354  			`Grpc3017\.func2\(.* \[chan receive\]`,
   355  			`Grpc3017\.func2\.1\(.* \[sync\.Mutex\.Lock\]`,
   356  			`\(\*lbCacheClientConn_grpc3017\)\.RemoveSubConn\.func1\(.* \[sync\.Mutex\.Lock\]`,
   357  		),
   358  		makeFlakyTest("Hugo3251",
   359  			`Hugo3251\.func2\(.* \[sync\.WaitGroup\.Wait\]`,
   360  			`Hugo3251\.func2\.1\(.* \[sync\.Mutex\.Lock\]`,
   361  			`Hugo3251\.func2\.1\(.* \[sync\.RWMutex\.RLock\]`,
   362  		),
   363  		makeFlakyTest("Hugo5379",
   364  			`\(\*Page_hugo5379\)\.initContent\.func1\.1\(.* \[sync\.Mutex\.Lock\]`,
   365  			`pageRenderer_hugo5379\(.* \[sync\.Mutex\.Lock\]`,
   366  			`Hugo5379\.func2\(.* \[sync\.WaitGroup\.Wait\]`,
   367  		),
   368  		makeFlakyTest("Istio16224",
   369  			`Istio16224\.func2\(.* \[sync\.Mutex\.Lock\]`,
   370  			`\(\*controller_istio16224\)\.Run\(.* \[chan send\]`,
   371  			`\(\*controller_istio16224\)\.Run\(.* \[chan receive\]`,
   372  		),
   373  		makeFlakyTest("Istio17860",
   374  			`\(\*agent_istio17860\)\.runWait\(.* \[chan send\]`,
   375  		),
   376  		makeFlakyTest("Istio18454",
   377  			`\(\*Worker_istio18454\)\.Start\.func1\(.* \[chan receive\]`,
   378  			`\(\*Worker_istio18454\)\.Start\.func1\(.* \[chan send\]`,
   379  		),
   380  		// NOTE(vsaioc):
   381  		// Kubernetes/1321 is excluded due to a race condition in the original program
   382  		// that may, in extremely rare cases, lead to nil pointer dereference crashes.
   383  		// (Reproducible even with regular GC). Only kept here for posterity.
   384  		//
   385  		// makeTest(testCase{name: "Kubernetes1321"},
   386  		// 	`NewMux_kubernetes1321\.gowrap1\(.* \[chan send\]`,
   387  		// 	`testMuxWatcherClose_kubernetes1321\(.* \[sync\.Mutex\.Lock\]`),
   388  		makeTest("Kubernetes5316",
   389  			`finishRequest_kubernetes5316\.func1\(.* \[chan send\]`,
   390  		),
   391  		makeFlakyTest("Kubernetes6632",
   392  			`\(\*idleAwareFramer_kubernetes6632\)\.monitor\(.* \[sync\.Mutex\.Lock\]`,
   393  			`\(\*idleAwareFramer_kubernetes6632\)\.WriteFrame\(.* \[chan send\]`,
   394  		),
   395  		makeFlakyTest("Kubernetes10182",
   396  			`\(\*statusManager_kubernetes10182\)\.Start\.func1\(.* \[sync\.Mutex\.Lock\]`,
   397  			`\(\*statusManager_kubernetes10182\)\.SetPodStatus\(.* \[chan send\]`,
   398  		),
   399  		makeFlakyTest("Kubernetes11298",
   400  			`After_kubernetes11298\.func1\(.* \[chan receive\]`,
   401  			`After_kubernetes11298\.func1\(.* \[sync\.Cond\.Wait\]`,
   402  			`Kubernetes11298\.func2\(.* \[chan receive\]`,
   403  		),
   404  		makeFlakyTest("Kubernetes13135",
   405  			`Util_kubernetes13135\(.* \[sync\.Mutex\.Lock\]`,
   406  			`\(\*WatchCache_kubernetes13135\)\.Add\(.* \[sync\.Mutex\.Lock\]`,
   407  		),
   408  		makeTest("Kubernetes25331",
   409  			`\(\*watchChan_kubernetes25331\)\.run\(.* \[chan send\]`,
   410  		),
   411  		makeFlakyTest("Kubernetes26980",
   412  			`Kubernetes26980\.func2\(.* \[chan receive\]`,
   413  			`Kubernetes26980\.func2\.1\(.* \[sync\.Mutex\.Lock\]`,
   414  			`\(\*processorListener_kubernetes26980\)\.pop\(.* \[chan receive\]`,
   415  		),
   416  		makeFlakyTest("Kubernetes30872",
   417  			`\(\*DelayingDeliverer_kubernetes30872\)\.StartWithHandler\.func1\(.* \[sync\.Mutex\.Lock\]`,
   418  			`\(\*Controller_kubernetes30872\)\.Run\(.* \[sync\.Mutex\.Lock\]`,
   419  			`\(\*NamespaceController_kubernetes30872\)\.Run\.func1\(.* \[sync\.Mutex\.Lock\]`,
   420  		),
   421  		makeTest("Kubernetes38669",
   422  			`\(\*cacheWatcher_kubernetes38669\)\.process\(.* \[chan send\]`,
   423  		),
   424  		makeFlakyTest("Kubernetes58107",
   425  			`\(\*ResourceQuotaController_kubernetes58107\)\.worker\(.* \[sync\.Cond\.Wait\]`,
   426  			`\(\*ResourceQuotaController_kubernetes58107\)\.worker\(.* \[sync\.RWMutex\.RLock\]`,
   427  			`\(\*ResourceQuotaController_kubernetes58107\)\.Sync\(.* \[sync\.RWMutex\.Lock\]`,
   428  		),
   429  		makeFlakyTest("Kubernetes62464",
   430  			`\(\*manager_kubernetes62464\)\.reconcileState\(.* \[sync\.RWMutex\.RLock\]`,
   431  			`\(\*staticPolicy_kubernetes62464\)\.RemoveContainer\(.* \[sync\.(RW)?Mutex\.Lock\]`,
   432  		),
   433  		makeFlakyTest("Kubernetes70277",
   434  			`Kubernetes70277\.func2\(.* \[chan receive\]`,
   435  		),
   436  		makeFlakyTest("Moby4951",
   437  			`\(\*DeviceSet_moby4951\)\.DeleteDevice\(.* \[sync\.Mutex\.Lock\]`,
   438  		),
   439  		makeTest("Moby7559",
   440  			`\(\*UDPProxy_moby7559\)\.Run\(.* \[sync\.Mutex\.Lock\]`,
   441  		),
   442  		makeTest("Moby17176",
   443  			`testDevmapperLockReleasedDeviceDeletion_moby17176\.func1\(.* \[sync\.Mutex\.Lock\]`,
   444  		),
   445  		makeFlakyTest("Moby21233",
   446  			`\(\*Transfer_moby21233\)\.Watch\.func1\(.* \[chan send\]`,
   447  			`\(\*Transfer_moby21233\)\.Watch\.func1\(.* \[select\]`,
   448  			`testTransfer_moby21233\(.* \[chan receive\]`,
   449  		),
   450  		makeTest("Moby25348",
   451  			`\(\*Manager_moby25348\)\.init\(.* \[sync\.WaitGroup\.Wait\]`,
   452  		),
   453  		makeFlakyTest("Moby27782",
   454  			`\(\*JSONFileLogger_moby27782\)\.readLogs\(.* \[sync\.Cond\.Wait\]`,
   455  			`\(\*Watcher_moby27782\)\.readEvents\(.* \[select\]`,
   456  		),
   457  		makeFlakyTest("Moby28462",
   458  			`monitor_moby28462\(.* \[sync\.Mutex\.Lock\]`,
   459  			`\(\*Daemon_moby28462\)\.StateChanged\(.* \[chan send\]`,
   460  		),
   461  		makeTest("Moby30408",
   462  			`Moby30408\.func2\(.* \[chan receive\]`,
   463  			`testActive_moby30408\.func1\(.* \[sync\.Cond\.Wait\]`,
   464  		),
   465  		makeFlakyTest("Moby33781",
   466  			`monitor_moby33781\.func1\(.* \[chan send\]`,
   467  		),
   468  		makeFlakyTest("Moby36114",
   469  			`\(\*serviceVM_moby36114\)\.hotAddVHDsAtStart\(.* \[sync\.Mutex\.Lock\]`,
   470  		),
   471  		makeFlakyTest("Serving2137",
   472  			`\(\*Breaker_serving2137\)\.concurrentRequest\.func1\(.* \[chan send\]`,
   473  			`\(\*Breaker_serving2137\)\.concurrentRequest\.func1\(.* \[sync\.Mutex\.Lock\]`,
   474  			`Serving2137\.func2\(.* \[chan receive\]`,
   475  		),
   476  		makeTest("Syncthing4829",
   477  			`Syncthing4829\.func2\(.* \[sync\.RWMutex\.RLock\]`,
   478  		),
   479  		makeTest("Syncthing5795",
   480  			`\(\*rawConnection_syncthing5795\)\.dispatcherLoop\(.* \[chan receive\]`,
   481  			`Syncthing5795\.func2.* \[chan receive\]`,
   482  		),
   483  	}
   484  
   485  	// Combine all test cases into a single list.
   486  	testCases := append(microTests, stressTestCases...)
   487  	testCases = append(testCases, patternTestCases...)
   488  
   489  	// Test cases must not panic or cause fatal exceptions.
   490  	failStates := regexp.MustCompile(`fatal|panic`)
   491  
   492  	testApp := func(exepath string, testCases []testCase) {
   493  
   494  		// Build the test program once.
   495  		exe, err := buildTestProg(t, exepath)
   496  		if err != nil {
   497  			t.Fatal(fmt.Sprintf("building testgoroutineleakprofile failed: %v", err))
   498  		}
   499  
   500  		for _, tcase := range testCases {
   501  			t.Run(tcase.name, func(t *testing.T) {
   502  				t.Parallel()
   503  
   504  				cmdEnv := []string{
   505  					"GODEBUG=asyncpreemptoff=1",
   506  					"GOEXPERIMENT=greenteagc,goroutineleakprofile",
   507  				}
   508  
   509  				if tcase.simple {
   510  					// If the test is simple, set GOMAXPROCS=1 in order to better
   511  					// control the behavior of the scheduler.
   512  					cmdEnv = append(cmdEnv, "GOMAXPROCS=1")
   513  				}
   514  
   515  				var output string
   516  				for i := 0; i < tcase.repetitions; i++ {
   517  					// Run program for one repetition and get runOutput trace.
   518  					runOutput := runBuiltTestProg(t, exe, tcase.name, cmdEnv...)
   519  					if len(runOutput) == 0 {
   520  						t.Errorf("Test %s produced no output. Is the goroutine leak profile collected?", tcase.name)
   521  					}
   522  
   523  					// Zero tolerance policy for fatal exceptions or panics.
   524  					if failStates.MatchString(runOutput) {
   525  						t.Errorf("unexpected fatal exception or panic!\noutput:\n%s\n\n", runOutput)
   526  					}
   527  
   528  					output += runOutput + "\n\n"
   529  				}
   530  
   531  				// Extract all the goroutine leaks
   532  				foundLeaks := extractLeaks(output)
   533  
   534  				// If the test case was not expected to produce leaks, but some were reported,
   535  				// stop the test immediately. Zero tolerance policy for false positives.
   536  				if len(tcase.expectedLeaks)+len(tcase.flakyLeaks) == 0 && len(foundLeaks) > 0 {
   537  					t.Errorf("output:\n%s\n\ngoroutines leaks detected in case with no leaks", output)
   538  				}
   539  
   540  				unexpectedLeaks := make([]string, 0, len(foundLeaks))
   541  
   542  				// Parse every leak and check if it is expected (maybe as a flaky leak).
   543  			LEAKS:
   544  				for _, leak := range foundLeaks {
   545  					// Check if the leak is expected.
   546  					// If it is, check whether it has been encountered before.
   547  					var foundNew bool
   548  					var leakPattern *regexp.Regexp
   549  
   550  					for expectedLeak, ok := range tcase.expectedLeaks {
   551  						if expectedLeak.MatchString(leak) {
   552  							if !ok {
   553  								foundNew = true
   554  							}
   555  
   556  							leakPattern = expectedLeak
   557  							break
   558  						}
   559  					}
   560  
   561  					if foundNew {
   562  						// Only bother writing if we found a new leak.
   563  						tcase.expectedLeaks[leakPattern] = true
   564  					}
   565  
   566  					if leakPattern == nil {
   567  						// We are dealing with a leak not marked as expected.
   568  						// Check if it is a flaky leak.
   569  						for flakyLeak := range tcase.flakyLeaks {
   570  							if flakyLeak.MatchString(leak) {
   571  								// The leak is flaky. Carry on to the next line.
   572  								continue LEAKS
   573  							}
   574  						}
   575  
   576  						unexpectedLeaks = append(unexpectedLeaks, leak)
   577  					}
   578  				}
   579  
   580  				missingLeakStrs := make([]string, 0, len(tcase.expectedLeaks))
   581  				for expectedLeak, found := range tcase.expectedLeaks {
   582  					if !found {
   583  						missingLeakStrs = append(missingLeakStrs, expectedLeak.String())
   584  					}
   585  				}
   586  
   587  				var errors []error
   588  				if len(unexpectedLeaks) > 0 {
   589  					errors = append(errors, fmt.Errorf("unexpected goroutine leaks:\n%s\n", strings.Join(unexpectedLeaks, "\n")))
   590  				}
   591  				if len(missingLeakStrs) > 0 {
   592  					errors = append(errors, fmt.Errorf("missing expected leaks:\n%s\n", strings.Join(missingLeakStrs, ", ")))
   593  				}
   594  				if len(errors) > 0 {
   595  					t.Fatalf("Failed with the following errors:\n%s\n\noutput:\n%s", errors, output)
   596  				}
   597  			})
   598  		}
   599  	}
   600  
   601  	testApp("testgoroutineleakprofile", testCases)
   602  	testApp("testgoroutineleakprofile/goker", gokerTestCases)
   603  }
   604  

View as plain text