Source file src/cmd/compile/internal/base/startheap.go
1 // Copyright 2025 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package base 6 7 import ( 8 "fmt" 9 "os" 10 "runtime" 11 "runtime/debug" 12 "runtime/metrics" 13 "sync" 14 ) 15 16 // forEachGC calls fn each GC cycle until it returns false. 17 func forEachGC(fn func() bool) { 18 type T [32]byte // large enough to avoid runtime's tiny object allocator 19 var finalizer func(*T) 20 finalizer = func(p *T) { 21 22 if fn() { 23 runtime.SetFinalizer(p, finalizer) 24 } 25 } 26 27 finalizer(new(T)) 28 } 29 30 // AdjustStartingHeap modifies GOGC so that GC should not occur until the heap 31 // grows to the requested size. This is intended but not promised, though it 32 // is true-mostly, depending on when the adjustment occurs and on the 33 // compiler's input and behavior. Once the live heap is approximately half 34 // this size, GOGC is reset to its value when AdjustStartingHeap was called; 35 // subsequent GCs may reduce the heap below the requested size, but this 36 // function does not affect that. 37 // 38 // logHeapTweaks (-d=gcadjust=1) enables logging of GOGC adjustment events. 39 // 40 // The temporarily requested GOGC is derated from what would be the "obvious" 41 // value necessary to hit the starting heap goal because the obvious 42 // (goal/live-1)*100 value seems to grow RSS a little more than it "should" 43 // (compared to GOMEMLIMIT, e.g.) and the assumption is that the GC's control 44 // algorithms are tuned for GOGC near 100, and not tuned for huge values of 45 // GOGC. Different derating factors apply for "lo" and "hi" values of GOGC; 46 // lo is below derateBreak, hi is above derateBreak. The derating factors, 47 // expressed as integer percentages, are derateLoPct and derateHiPct. 48 // 60-75 is an okay value for derateLoPct, 30-65 seems like a good value for 49 // derateHiPct, and 600 seems like a good value for derateBreak. If these 50 // are zero, defaults are used instead. 51 // 52 // NOTE: If you think this code would help startup time in your own 53 // application and you decide to use it, please benchmark first to see if it 54 // actually works for you (it may not: the Go compiler is not typical), and 55 // whatever the outcome, please leave a comment on bug #56546. This code 56 // uses supported interfaces, but depends more than we like on 57 // current+observed behavior of the garbage collector, so if many people need 58 // this feature, we should consider/propose a better way to accomplish it. 59 func AdjustStartingHeap(requestedHeapGoal, derateBreak, derateLoPct, derateHiPct uint64, logHeapTweaks bool) { 60 mp := runtime.GOMAXPROCS(0) 61 62 const ( 63 SHgoal = "/gc/heap/goal:bytes" 64 SHcount = "/gc/cycles/total:gc-cycles" 65 SHallocs = "/gc/heap/allocs:bytes" 66 SHfrees = "/gc/heap/frees:bytes" 67 ) 68 69 var sample = []metrics.Sample{{Name: SHgoal}, {Name: SHcount}, {Name: SHallocs}, {Name: SHfrees}} 70 71 const ( 72 SH_GOAL = 0 73 SH_COUNT = 1 74 SH_ALLOCS = 2 75 SH_FREES = 3 76 77 MB = 1_000_000 78 ) 79 80 // These particular magic numbers are designed to make the RSS footprint of -d=-gcstart=2000 81 // resemble that of GOMEMLIMIT=2000MiB GOGC=10000 when building large projects 82 // (e.g. the Go compiler itself, and the microsoft's typescript AST package), 83 // with the further restriction that these magic numbers did a good job of reducing user-cpu 84 // for builds at either gcstart=2000 or gcstart=128. 85 // 86 // The benchmarking to obtain this was (a version of): 87 // 88 // for i in {1..50} ; do 89 // for what in std cmd/compile cmd/fix cmd/go github.com/microsoft/typescript-go/internal/ast ; do 90 // whatbase=`basename ${what}` 91 // for sh in 128 2000 ; do 92 // for br in 500 600 ; do 93 // for shlo in 65 70; do 94 // for shhi in 55 60 ; do 95 // benchcmd -n=2 ${whatbase} go build -a \ 96 // -gcflags=all=-d=gcstart=${sh},gcstartloderate=${shlo},gcstarthiderate=${shhi},gcstartbreak=${br} \ 97 // ${what} | tee -a startheap${sh}_${br}_${shhi}_${shlo}.bench 98 // done 99 // done 100 // done 101 // done 102 // done 103 // done 104 // 105 // benchcmd is "go install github.com/aclements/go-misc/benchcmd@latest" 106 107 if derateBreak == 0 { 108 derateBreak = 600 109 } 110 if derateLoPct == 0 { 111 derateLoPct = 70 112 } 113 if derateHiPct == 0 { 114 derateHiPct = 55 115 } 116 117 gogcDerate := func(myGogc uint64) uint64 { 118 if myGogc < derateBreak { 119 return (myGogc * derateLoPct) / 100 120 } 121 return (myGogc * derateHiPct) / 100 122 } 123 124 // Assumptions and observations of Go's garbage collector, as of Go 1.17-1.20: 125 126 // - the initial heap goal is 4MiB, by fiat. It is possible for Go to start 127 // with a heap as small as 512k, so this may change in the future. 128 129 // - except for the first heap goal, heap goal is a function of 130 // observed-live at the previous GC and current GOGC. After the first 131 // GC, adjusting GOGC immediately updates GOGC; before the first GC, 132 // adjusting GOGC does not modify goal (but the change takes effect after 133 // the first GC). 134 135 // - the before/after first GC behavior is not guaranteed anywhere, it's 136 // just behavior, and it's a bad idea to rely on it. 137 138 // - we don't know exactly when GC will run, even after we adjust GOGC; the 139 // first GC may not have happened yet, may have already happened, or may 140 // be currently in progress, and GCs can start for several reasons. 141 142 // - forEachGC above will run the provided function at some delay after each 143 // GC's mark phase terminates; finalizers are run after marking as the 144 // spans containing finalizable objects are swept, driven by GC 145 // background activity and allocation demand. 146 147 // - "live at last GC" is not available through the current metrics 148 // interface. Instead, live is estimated by knowing the adjusted value of 149 // GOGC and the new heap goal following a GC (this requires knowing that 150 // at least one GC has occurred): 151 // estLive = 100 * newGoal / (100 + currentGogc) 152 // this new value of GOGC 153 // newGogc = 100*requestedHeapGoal/estLive - 100 154 // will result in the desired goal. The logging code checks that the 155 // resulting goal is correct. 156 157 // There's a small risk that the finalizer will be slow to run after a GC 158 // that expands the goal to a huge value, and that this will lead to 159 // out-of-memory. This doesn't seem to happen; in experiments on a variety 160 // of machines with a variety of extra loads to disrupt scheduling, the 161 // worst overshoot observed was 50% past requestedHeapGoal. 162 163 metrics.Read(sample) 164 for _, s := range sample { 165 if s.Value.Kind() == metrics.KindBad { 166 // Just return, a slightly slower compilation is a tolerable outcome. 167 if logHeapTweaks { 168 fmt.Fprintf(os.Stderr, "GCAdjust: Regret unexpected KindBad for metric %s\n", s.Name) 169 } 170 return 171 } 172 } 173 174 // Tinker with GOGC to make the heap grow rapidly at first. 175 currentGoal := sample[SH_GOAL].Value.Uint64() // Believe this will be 4MByte or less, perhaps 512k 176 myGogc := 100 * requestedHeapGoal / currentGoal 177 myGogc = gogcDerate(myGogc) 178 if myGogc <= 125 { 179 return 180 } 181 182 if logHeapTweaks { 183 sample := append([]metrics.Sample(nil), sample...) // avoid races with GC callback 184 AtExit(func() { 185 metrics.Read(sample) 186 goal := sample[SH_GOAL].Value.Uint64() 187 count := sample[SH_COUNT].Value.Uint64() 188 oldGogc := debug.SetGCPercent(100) 189 if oldGogc == 100 { 190 fmt.Fprintf(os.Stderr, "GCAdjust: AtExit goal %dMB gogc %d count %d maxprocs %d\n", 191 goal/MB, oldGogc, count, mp) 192 } else { 193 inUse := sample[SH_ALLOCS].Value.Uint64() - sample[SH_FREES].Value.Uint64() 194 overPct := 100 * (int(inUse) - int(requestedHeapGoal)) / int(requestedHeapGoal) 195 fmt.Fprintf(os.Stderr, "GCAdjust: AtExit goal %dMB gogc %d count %d maxprocs %d overPct %d\n", 196 goal/MB, oldGogc, count, mp, overPct) 197 198 } 199 }) 200 } 201 202 originalGOGC := debug.SetGCPercent(int(myGogc)) 203 204 // forEachGC finalizers ought not overlap, but they could run in separate threads. 205 // This ought not matter, but just in case it bothers the/a race detector, 206 // use this mutex. 207 var forEachGCLock sync.Mutex 208 209 adjustFunc := func() bool { 210 211 forEachGCLock.Lock() 212 defer forEachGCLock.Unlock() 213 214 metrics.Read(sample) 215 goal := sample[SH_GOAL].Value.Uint64() 216 count := sample[SH_COUNT].Value.Uint64() 217 218 if goal <= requestedHeapGoal { // Stay the course 219 if logHeapTweaks { 220 fmt.Fprintf(os.Stderr, "GCAdjust: Reuse GOGC adjust, current goal %dMB, count is %d, current gogc %d\n", 221 goal/MB, count, myGogc) 222 } 223 return true 224 } 225 226 // Believe goal has been adjusted upwards, else it would be less-than-or-equal to requestedHeapGoal 227 calcLive := 100 * goal / (100 + myGogc) 228 229 if 2*calcLive < requestedHeapGoal { // calcLive can exceed requestedHeapGoal! 230 myGogc = 100*requestedHeapGoal/calcLive - 100 231 myGogc = gogcDerate(myGogc) 232 233 if myGogc > 125 { 234 // Not done growing the heap. 235 oldGogc := debug.SetGCPercent(int(myGogc)) 236 237 if logHeapTweaks { 238 // Check that the new goal looks right 239 inUse := sample[SH_ALLOCS].Value.Uint64() - sample[SH_FREES].Value.Uint64() 240 metrics.Read(sample) 241 newGoal := sample[SH_GOAL].Value.Uint64() 242 pctOff := 100 * (int64(newGoal) - int64(requestedHeapGoal)) / int64(requestedHeapGoal) 243 // Check that the new goal is close to requested. 3% of make.bash fails this test. Why, TBD. 244 if pctOff < 2 { 245 fmt.Fprintf(os.Stderr, "GCAdjust: Retry GOGC adjust, current goal %dMB, count is %d, gogc was %d, is now %d, calcLive %dMB pctOff %d\n", 246 goal/MB, count, oldGogc, myGogc, calcLive/MB, pctOff) 247 } else { 248 // The GC is being annoying and not giving us the goal that we requested, say more to help understand when/why. 249 fmt.Fprintf(os.Stderr, "GCAdjust: Retry GOGC adjust, current goal %dMB, count is %d, gogc was %d, is now %d, calcLive %dMB pctOff %d inUse %dMB\n", 250 goal/MB, count, oldGogc, myGogc, calcLive/MB, pctOff, inUse/MB) 251 } 252 } 253 return true 254 } 255 } 256 257 // In this case we're done boosting GOGC, set it to its original value and don't set a new finalizer. 258 oldGogc := debug.SetGCPercent(originalGOGC) 259 // inUse helps estimate how late the finalizer ran; at the instant the previous GC ended, 260 // it was (in theory) equal to the previous GC's heap goal. In a growing heap it is 261 // expected to grow to the new heap goal. 262 if logHeapTweaks { 263 inUse := sample[SH_ALLOCS].Value.Uint64() - sample[SH_FREES].Value.Uint64() 264 overPct := 100 * (int(inUse) - int(requestedHeapGoal)) / int(requestedHeapGoal) 265 fmt.Fprintf(os.Stderr, "GCAdjust: Reset GOGC adjust, old goal %dMB, count is %d, gogc was %d, gogc is now %d, calcLive %dMB inUse %dMB overPct %d\n", 266 goal/MB, count, oldGogc, originalGOGC, calcLive/MB, inUse/MB, overPct) 267 } 268 return false 269 } 270 271 forEachGC(adjustFunc) 272 } 273