1 // Copyright 2025 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Lowering of mul, div, and mod operations.
6 // Runs after prove, so that prove can analyze div and mod ops
7 // directly instead of these obscured expansions,
8 // but before decompose builtin, so that 32-bit systems
9 // can still lower 64-bit ops to 32-bit ones.
10 //
11 // See ../magic.go for a detailed description of these algorithms.
12 // See test/codegen/divmod.go for tests.
13
14 // Unsigned div and mod by power of 2 handled in generic.rules.
15 // (The equivalent unsigned right shift and mask are simple enough for prove to analyze.)
16
17 // Signed divide by power of 2.
18 // n / c = n >> log(c) if n >= 0
19 // = (n+c-1) >> log(c) if n < 0
20 // We conditionally add c-1 by adding n>>63>>(64-log(c)) (first shift signed, second shift unsigned).
21 (Div8 <t> n (Const8 [c])) && isPowerOfTwo(c) =>
22 (Rsh8x64
23 (Add8 <t> n (Rsh8Ux64 <t> (Rsh8x64 <t> n (Const64 <typ.UInt64> [ 7])) (Const64 <typ.UInt64> [int64( 8-log8(c))])))
24 (Const64 <typ.UInt64> [int64(log8(c))]))
25 (Div16 <t> n (Const16 [c])) && isPowerOfTwo(c) =>
26 (Rsh16x64
27 (Add16 <t> n (Rsh16Ux64 <t> (Rsh16x64 <t> n (Const64 <typ.UInt64> [15])) (Const64 <typ.UInt64> [int64(16-log16(c))])))
28 (Const64 <typ.UInt64> [int64(log16(c))]))
29 (Div32 <t> n (Const32 [c])) && isPowerOfTwo(c) =>
30 (Rsh32x64
31 (Add32 <t> n (Rsh32Ux64 <t> (Rsh32x64 <t> n (Const64 <typ.UInt64> [31])) (Const64 <typ.UInt64> [int64(32-log32(c))])))
32 (Const64 <typ.UInt64> [int64(log32(c))]))
33 (Div64 <t> n (Const64 [c])) && isPowerOfTwo(c) =>
34 (Rsh64x64
35 (Add64 <t> n (Rsh64Ux64 <t> (Rsh64x64 <t> n (Const64 <typ.UInt64> [63])) (Const64 <typ.UInt64> [int64(64-log64(c))])))
36 (Const64 <typ.UInt64> [int64(log64(c))]))
37
38 // Divide, not a power of 2, by strength reduction to double-width multiply and shift.
39 //
40 // umagicN(c) computes m, s such that N-bit unsigned divide
41 // x/c = (x*((1<<N)+m))>>N>>s = ((x*m)>>N+x)>>s
42 // where the multiplies are unsigned.
43 // Note that the returned m is always N+1 bits; umagicN omits the high 1<<N bit.
44 // The difficult part is implementing the 2N+1-bit multiply,
45 // since in general we have only a 2N-bit multiply available.
46 //
47 // smagic(c) computes m, s such that N-bit signed divide
48 // x/c = (x*m)>>N>>s - bool2int(x < 0).
49 // Here m is an unsigned N-bit number but x is signed.
50 //
51 // In general the division cases are:
52 //
53 // 1. A signed divide where 2N ≤ the register size.
54 // This form can use the signed algorithm directly.
55 //
56 // 2. A signed divide where m is even.
57 // This form can use a signed double-width multiply with m/2,
58 // shifting by s-1.
59 //
60 // 3. A signed divide where m is odd.
61 // This form can use x*m = ((x*(m-2^N))>>N+x) with a signed multiply.
62 // Since intN(m) is m-2^N < 0, the product and x have different signs,
63 // so there can be no overflow on the addition.
64 //
65 // 4. An unsigned divide where we know x < 1<<(N-1).
66 // This form can use the signed algorithm without the bool2int fixup,
67 // and since we know the product is only 2N-1 bits, we can use an
68 // unsigned multiply to obtain the high N bits directly, regardless
69 // of whether m is odd or even.
70 //
71 // 5. An unsigned divide where 2N+1 ≤ the register size.
72 // This form uses the unsigned algorithm with an explicit (1<<N)+m.
73 //
74 // 6. An unsigned divide where the N+1-bit m is even.
75 // This form can use an N-bit m/2 instead and shift one less bit.
76 //
77 // 7. An unsigned divide where m is odd but c is even.
78 // This form can shift once and then divide by (c/2) instead.
79 // The magic number m for c is ⌈2^k/c⌉, so we can use
80 // (m+1)/2 = ⌈2^k/(c/2)⌉ instead.
81 //
82 // 8. An unsigned divide on systems with an avg instruction.
83 // We noted above that (x*((1<<N)+m))>>N>>s = ((x*m)>>N+x)>>s.
84 // Let hi = (x*m)>>N, so we want (hi+x) >> s = avg(hi, x) >> (s-1).
85 //
86 // 9. Unsigned 64-bit divide by 16-bit constant on 32-bit systems.
87 // Use long division with 16-bit digits.
88 //
89 // Note: All systems have Hmul and Avg except for wasm, and the
90 // wasm JITs may well apply all these optimizations already anyway,
91 // so it may be worth looking into avoiding this pass entirely on wasm
92 // and dropping all the useAvg useHmul uncertainty.
93
94 // Case 1. Signed divides where 2N ≤ register size.
95 (Div8 <t> x (Const8 [c])) && smagicOK8(c) =>
96 (Sub8 <t>
97 (Rsh32x64 <t>
98 (Mul32 <typ.UInt32> (SignExt8to32 x) (Const32 <typ.UInt32> [int32(smagic8(c).m)]))
99 (Const64 <typ.UInt64> [8 + smagic8(c).s]))
100 (Rsh32x64 <t> (SignExt8to32 x) (Const64 <typ.UInt64> [31])))
101 (Div16 <t> x (Const16 [c])) && smagicOK16(c) =>
102 (Sub16 <t>
103 (Rsh32x64 <t>
104 (Mul32 <typ.UInt32> (SignExt16to32 x) (Const32 <typ.UInt32> [int32(smagic16(c).m)]))
105 (Const64 <typ.UInt64> [16 + smagic16(c).s]))
106 (Rsh32x64 <t> (SignExt16to32 x) (Const64 <typ.UInt64> [31])))
107 (Div32 <t> x (Const32 [c])) && smagicOK32(c) && config.RegSize == 8 =>
108 (Sub32 <t>
109 (Rsh64x64 <t>
110 (Mul64 <typ.UInt64> (SignExt32to64 x) (Const64 <typ.UInt64> [int64(smagic32(c).m)]))
111 (Const64 <typ.UInt64> [32 + smagic32(c).s]))
112 (Rsh64x64 <t> (SignExt32to64 x) (Const64 <typ.UInt64> [63])))
113
114 // Case 2. Signed divides where m is even.
115 (Div32 <t> x (Const32 [c])) && smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 == 0 && config.useHmul =>
116 (Sub32 <t>
117 (Rsh32x64 <t>
118 (Hmul32 <t> x (Const32 <typ.UInt32> [int32(smagic32(c).m/2)]))
119 (Const64 <typ.UInt64> [smagic32(c).s - 1]))
120 (Rsh32x64 <t> x (Const64 <typ.UInt64> [31])))
121 (Div64 <t> x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 == 0 && config.useHmul =>
122 (Sub64 <t>
123 (Rsh64x64 <t>
124 (Hmul64 <t> x (Const64 <typ.UInt64> [int64(smagic64(c).m/2)]))
125 (Const64 <typ.UInt64> [smagic64(c).s - 1]))
126 (Rsh64x64 <t> x (Const64 <typ.UInt64> [63])))
127
128 // Case 3. Signed divides where m is odd.
129 (Div32 <t> x (Const32 [c])) && smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 != 0 && config.useHmul =>
130 (Sub32 <t>
131 (Rsh32x64 <t>
132 (Add32 <t> x (Hmul32 <t> x (Const32 <typ.UInt32> [int32(smagic32(c).m)])))
133 (Const64 <typ.UInt64> [smagic32(c).s]))
134 (Rsh32x64 <t> x (Const64 <typ.UInt64> [31])))
135 (Div64 <t> x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 != 0 && config.useHmul =>
136 (Sub64 <t>
137 (Rsh64x64 <t>
138 (Add64 <t> x (Hmul64 <t> x (Const64 <typ.UInt64> [int64(smagic64(c).m)])))
139 (Const64 <typ.UInt64> [smagic64(c).s]))
140 (Rsh64x64 <t> x (Const64 <typ.UInt64> [63])))
141
142 // Case 4. Unsigned divide where x < 1<<(N-1).
143 // Skip Div8u since case 5's handling is just as good.
144 (Div16u <t> x (Const16 [c])) && t.IsSigned() && smagicOK16(c) =>
145 (Rsh32Ux64 <t>
146 (Mul32 <typ.UInt32> (SignExt16to32 x) (Const32 <typ.UInt32> [int32(smagic16(c).m)]))
147 (Const64 <typ.UInt64> [16 + smagic16(c).s]))
148 (Div32u <t> x (Const32 [c])) && t.IsSigned() && smagicOK32(c) && config.RegSize == 8 =>
149 (Rsh64Ux64 <t>
150 (Mul64 <typ.UInt64> (SignExt32to64 x) (Const64 <typ.UInt64> [int64(smagic32(c).m)]))
151 (Const64 <typ.UInt64> [32 + smagic32(c).s]))
152 (Div32u <t> x (Const32 [c])) && t.IsSigned() && smagicOK32(c) && config.RegSize == 4 && config.useHmul =>
153 (Rsh32Ux64 <t>
154 (Hmul32u <typ.UInt32> x (Const32 <typ.UInt32> [int32(smagic32(c).m)]))
155 (Const64 <typ.UInt64> [smagic32(c).s]))
156 (Div64u <t> x (Const64 [c])) && t.IsSigned() && smagicOK64(c) && config.useHmul =>
157 (Rsh64Ux64 <t>
158 (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(smagic64(c).m)]))
159 (Const64 <typ.UInt64> [smagic64(c).s]))
160
161 // Case 5. Unsigned divide where 2N+1 ≤ register size.
162 (Div8u <t> x (Const8 [c])) && umagicOK8(c) =>
163 (Trunc32to8 <t>
164 (Rsh32Ux64 <typ.UInt32>
165 (Mul32 <typ.UInt32> (ZeroExt8to32 x) (Const32 <typ.UInt32> [int32(1<<8 + umagic8(c).m)]))
166 (Const64 <typ.UInt64> [8 + umagic8(c).s])))
167 (Div16u <t> x (Const16 [c])) && umagicOK16(c) && config.RegSize == 8 =>
168 (Trunc64to16 <t>
169 (Rsh64Ux64 <typ.UInt64>
170 (Mul64 <typ.UInt64> (ZeroExt16to64 x) (Const64 <typ.UInt64> [int64(1<<16 + umagic16(c).m)]))
171 (Const64 <typ.UInt64> [16 + umagic16(c).s])))
172
173 // Case 6. Unsigned divide where m is even.
174 (Div16u <t> x (Const16 [c])) && umagicOK16(c) && umagic16(c).m&1 == 0 =>
175 (Trunc32to16 <t>
176 (Rsh32Ux64 <typ.UInt32>
177 (Mul32 <typ.UInt32> (ZeroExt16to32 x) (Const32 <typ.UInt32> [int32(1<<15 + umagic16(c).m/2)]))
178 (Const64 <typ.UInt64> [16 + umagic16(c).s - 1])))
179 (Div32u <t> x (Const32 [c])) && umagicOK32(c) && umagic32(c).m&1 == 0 && config.RegSize == 8 =>
180 (Trunc64to32 <t>
181 (Rsh64Ux64 <typ.UInt64>
182 (Mul64 <typ.UInt64> (ZeroExt32to64 x) (Const64 <typ.UInt64> [int64(1<<31 + umagic32(c).m/2)]))
183 (Const64 <typ.UInt64> [32 + umagic32(c).s - 1])))
184 (Div32u <t> x (Const32 [c])) && umagicOK32(c) && umagic32(c).m&1 == 0 && config.RegSize == 4 && config.useHmul =>
185 (Rsh32Ux64 <t>
186 (Hmul32u <typ.UInt32> x (Const32 <typ.UInt32> [int32(1<<31 + umagic32(c).m/2)]))
187 (Const64 <typ.UInt64> [umagic32(c).s - 1]))
188 (Div64u <t> x (Const64 [c])) && umagicOK64(c) && umagic64(c).m&1 == 0 && config.useHmul =>
189 (Rsh64Ux64 <t>
190 (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(1<<63 + umagic64(c).m/2)]))
191 (Const64 <typ.UInt64> [umagic64(c).s - 1]))
192
193 // Case 7. Unsigned divide where c is even.
194 (Div16u <t> x (Const16 [c])) && umagicOK16(c) && config.RegSize == 4 && c&1 == 0 =>
195 (Trunc32to16 <t>
196 (Rsh32Ux64 <typ.UInt32>
197 (Mul32 <typ.UInt32>
198 (Rsh32Ux64 <typ.UInt32> (ZeroExt16to32 x) (Const64 <typ.UInt64> [1]))
199 (Const32 <typ.UInt32> [int32(1<<15 + (umagic16(c).m+1)/2)]))
200 (Const64 <typ.UInt64> [16 + umagic16(c).s - 2])))
201 (Div32u <t> x (Const32 [c])) && umagicOK32(c) && config.RegSize == 8 && c&1 == 0 =>
202 (Trunc64to32 <t>
203 (Rsh64Ux64 <typ.UInt64>
204 (Mul64 <typ.UInt64>
205 (Rsh64Ux64 <typ.UInt64> (ZeroExt32to64 x) (Const64 <typ.UInt64> [1]))
206 (Const64 <typ.UInt64> [int64(1<<31 + (umagic32(c).m+1)/2)]))
207 (Const64 <typ.UInt64> [32 + umagic32(c).s - 2])))
208 (Div32u <t> x (Const32 [c])) && umagicOK32(c) && config.RegSize == 4 && c&1 == 0 && config.useHmul =>
209 (Rsh32Ux64 <t>
210 (Hmul32u <typ.UInt32>
211 (Rsh32Ux64 <typ.UInt32> x (Const64 <typ.UInt64> [1]))
212 (Const32 <typ.UInt32> [int32(1<<31 + (umagic32(c).m+1)/2)]))
213 (Const64 <typ.UInt64> [umagic32(c).s - 2]))
214 (Div64u <t> x (Const64 [c])) && umagicOK64(c) && c&1 == 0 && config.useHmul =>
215 (Rsh64Ux64 <t>
216 (Hmul64u <typ.UInt64>
217 (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [1]))
218 (Const64 <typ.UInt64> [int64(1<<63 + (umagic64(c).m+1)/2)]))
219 (Const64 <typ.UInt64> [umagic64(c).s - 2]))
220
221 // Case 8. Unsigned divide on systems with avg.
222 (Div16u <t> x (Const16 [c])) && umagicOK16(c) && config.RegSize == 4 && config.useAvg =>
223 (Trunc32to16 <t>
224 (Rsh32Ux64 <typ.UInt32>
225 (Avg32u
226 (Lsh32x64 <typ.UInt32> (ZeroExt16to32 x) (Const64 <typ.UInt64> [16]))
227 (Mul32 <typ.UInt32> (ZeroExt16to32 x) (Const32 <typ.UInt32> [int32(umagic16(c).m)])))
228 (Const64 <typ.UInt64> [16 + umagic16(c).s - 1])))
229 (Div32u <t> x (Const32 [c])) && umagicOK32(c) && config.RegSize == 8 && config.useAvg =>
230 (Trunc64to32 <t>
231 (Rsh64Ux64 <typ.UInt64>
232 (Avg64u
233 (Lsh64x64 <typ.UInt64> (ZeroExt32to64 x) (Const64 <typ.UInt64> [32]))
234 (Mul64 <typ.UInt64> (ZeroExt32to64 x) (Const64 <typ.UInt32> [int64(umagic32(c).m)])))
235 (Const64 <typ.UInt64> [32 + umagic32(c).s - 1])))
236 (Div32u <t> x (Const32 [c])) && umagicOK32(c) && config.RegSize == 4 && config.useAvg && config.useHmul =>
237 (Rsh32Ux64 <t>
238 (Avg32u x (Hmul32u <typ.UInt32> x (Const32 <typ.UInt32> [int32(umagic32(c).m)])))
239 (Const64 <typ.UInt64> [umagic32(c).s - 1]))
240 (Div64u <t> x (Const64 [c])) && umagicOK64(c) && config.useAvg && config.useHmul =>
241 (Rsh64Ux64 <t>
242 (Avg64u x (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(umagic64(c).m)])))
243 (Const64 <typ.UInt64> [umagic64(c).s - 1]))
244
View as plain text