// Copyright 2025 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Lowering of mul, div, and mod operations. // Runs after prove, so that prove can analyze div and mod ops // directly instead of these obscured expansions, // but before decompose builtin, so that 32-bit systems // can still lower 64-bit ops to 32-bit ones. // // See ../magic.go for a detailed description of these algorithms. // See test/codegen/divmod.go for tests. // Unsigned div and mod by power of 2 handled in generic.rules. // (The equivalent unsigned right shift and mask are simple enough for prove to analyze.) // Signed divide by power of 2. // n / c = n >> log(c) if n >= 0 // = (n+c-1) >> log(c) if n < 0 // We conditionally add c-1 by adding n>>63>>(64-log(c)) (first shift signed, second shift unsigned). (Div8 n (Const8 [c])) && isPowerOfTwo(c) => (Rsh8x64 (Add8 n (Rsh8Ux64 (Rsh8x64 n (Const64 [ 7])) (Const64 [int64( 8-log8(c))]))) (Const64 [int64(log8(c))])) (Div16 n (Const16 [c])) && isPowerOfTwo(c) => (Rsh16x64 (Add16 n (Rsh16Ux64 (Rsh16x64 n (Const64 [15])) (Const64 [int64(16-log16(c))]))) (Const64 [int64(log16(c))])) (Div32 n (Const32 [c])) && isPowerOfTwo(c) => (Rsh32x64 (Add32 n (Rsh32Ux64 (Rsh32x64 n (Const64 [31])) (Const64 [int64(32-log32(c))]))) (Const64 [int64(log32(c))])) (Div64 n (Const64 [c])) && isPowerOfTwo(c) => (Rsh64x64 (Add64 n (Rsh64Ux64 (Rsh64x64 n (Const64 [63])) (Const64 [int64(64-log64(c))]))) (Const64 [int64(log64(c))])) // Divide, not a power of 2, by strength reduction to double-width multiply and shift. // // umagicN(c) computes m, s such that N-bit unsigned divide // x/c = (x*((1<>N>>s = ((x*m)>>N+x)>>s // where the multiplies are unsigned. // Note that the returned m is always N+1 bits; umagicN omits the high 1<>N>>s - bool2int(x < 0). // Here m is an unsigned N-bit number but x is signed. // // In general the division cases are: // // 1. A signed divide where 2N ≤ the register size. // This form can use the signed algorithm directly. // // 2. A signed divide where m is even. // This form can use a signed double-width multiply with m/2, // shifting by s-1. // // 3. A signed divide where m is odd. // This form can use x*m = ((x*(m-2^N))>>N+x) with a signed multiply. // Since intN(m) is m-2^N < 0, the product and x have different signs, // so there can be no overflow on the addition. // // 4. An unsigned divide where we know x < 1<<(N-1). // This form can use the signed algorithm without the bool2int fixup, // and since we know the product is only 2N-1 bits, we can use an // unsigned multiply to obtain the high N bits directly, regardless // of whether m is odd or even. // // 5. An unsigned divide where 2N+1 ≤ the register size. // This form uses the unsigned algorithm with an explicit (1<>N>>s = ((x*m)>>N+x)>>s. // Let hi = (x*m)>>N, so we want (hi+x) >> s = avg(hi, x) >> (s-1). // // 9. Unsigned 64-bit divide by 16-bit constant on 32-bit systems. // Use long division with 16-bit digits. // // Note: All systems have Hmul and Avg except for wasm, and the // wasm JITs may well apply all these optimizations already anyway, // so it may be worth looking into avoiding this pass entirely on wasm // and dropping all the useAvg useHmul uncertainty. // Case 1. Signed divides where 2N ≤ register size. (Div8 x (Const8 [c])) && smagicOK8(c) => (Sub8 (Rsh32x64 (Mul32 (SignExt8to32 x) (Const32 [int32(smagic8(c).m)])) (Const64 [8 + smagic8(c).s])) (Rsh32x64 (SignExt8to32 x) (Const64 [31]))) (Div16 x (Const16 [c])) && smagicOK16(c) => (Sub16 (Rsh32x64 (Mul32 (SignExt16to32 x) (Const32 [int32(smagic16(c).m)])) (Const64 [16 + smagic16(c).s])) (Rsh32x64 (SignExt16to32 x) (Const64 [31]))) (Div32 x (Const32 [c])) && smagicOK32(c) && config.RegSize == 8 => (Sub32 (Rsh64x64 (Mul64 (SignExt32to64 x) (Const64 [int64(smagic32(c).m)])) (Const64 [32 + smagic32(c).s])) (Rsh64x64 (SignExt32to64 x) (Const64 [63]))) // Case 2. Signed divides where m is even. (Div32 x (Const32 [c])) && smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 == 0 && config.useHmul => (Sub32 (Rsh32x64 (Hmul32 x (Const32 [int32(smagic32(c).m/2)])) (Const64 [smagic32(c).s - 1])) (Rsh32x64 x (Const64 [31]))) (Div64 x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 == 0 && config.useHmul => (Sub64 (Rsh64x64 (Hmul64 x (Const64 [int64(smagic64(c).m/2)])) (Const64 [smagic64(c).s - 1])) (Rsh64x64 x (Const64 [63]))) // Case 3. Signed divides where m is odd. (Div32 x (Const32 [c])) && smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 != 0 && config.useHmul => (Sub32 (Rsh32x64 (Add32 x (Hmul32 x (Const32 [int32(smagic32(c).m)]))) (Const64 [smagic32(c).s])) (Rsh32x64 x (Const64 [31]))) (Div64 x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 != 0 && config.useHmul => (Sub64 (Rsh64x64 (Add64 x (Hmul64 x (Const64 [int64(smagic64(c).m)]))) (Const64 [smagic64(c).s])) (Rsh64x64 x (Const64 [63]))) // Case 4. Unsigned divide where x < 1<<(N-1). // Skip Div8u since case 5's handling is just as good. (Div16u x (Const16 [c])) && t.IsSigned() && smagicOK16(c) => (Rsh32Ux64 (Mul32 (SignExt16to32 x) (Const32 [int32(smagic16(c).m)])) (Const64 [16 + smagic16(c).s])) (Div32u x (Const32 [c])) && t.IsSigned() && smagicOK32(c) && config.RegSize == 8 => (Rsh64Ux64 (Mul64 (SignExt32to64 x) (Const64 [int64(smagic32(c).m)])) (Const64 [32 + smagic32(c).s])) (Div32u x (Const32 [c])) && t.IsSigned() && smagicOK32(c) && config.RegSize == 4 && config.useHmul => (Rsh32Ux64 (Hmul32u x (Const32 [int32(smagic32(c).m)])) (Const64 [smagic32(c).s])) (Div64u x (Const64 [c])) && t.IsSigned() && smagicOK64(c) && config.useHmul => (Rsh64Ux64 (Hmul64u x (Const64 [int64(smagic64(c).m)])) (Const64 [smagic64(c).s])) // Case 5. Unsigned divide where 2N+1 ≤ register size. (Div8u x (Const8 [c])) && umagicOK8(c) => (Trunc32to8 (Rsh32Ux64 (Mul32 (ZeroExt8to32 x) (Const32 [int32(1<<8 + umagic8(c).m)])) (Const64 [8 + umagic8(c).s]))) (Div16u x (Const16 [c])) && umagicOK16(c) && config.RegSize == 8 => (Trunc64to16 (Rsh64Ux64 (Mul64 (ZeroExt16to64 x) (Const64 [int64(1<<16 + umagic16(c).m)])) (Const64 [16 + umagic16(c).s]))) // Case 6. Unsigned divide where m is even. (Div16u x (Const16 [c])) && umagicOK16(c) && umagic16(c).m&1 == 0 => (Trunc32to16 (Rsh32Ux64 (Mul32 (ZeroExt16to32 x) (Const32 [int32(1<<15 + umagic16(c).m/2)])) (Const64 [16 + umagic16(c).s - 1]))) (Div32u x (Const32 [c])) && umagicOK32(c) && umagic32(c).m&1 == 0 && config.RegSize == 8 => (Trunc64to32 (Rsh64Ux64 (Mul64 (ZeroExt32to64 x) (Const64 [int64(1<<31 + umagic32(c).m/2)])) (Const64 [32 + umagic32(c).s - 1]))) (Div32u x (Const32 [c])) && umagicOK32(c) && umagic32(c).m&1 == 0 && config.RegSize == 4 && config.useHmul => (Rsh32Ux64 (Hmul32u x (Const32 [int32(1<<31 + umagic32(c).m/2)])) (Const64 [umagic32(c).s - 1])) (Div64u x (Const64 [c])) && umagicOK64(c) && umagic64(c).m&1 == 0 && config.useHmul => (Rsh64Ux64 (Hmul64u x (Const64 [int64(1<<63 + umagic64(c).m/2)])) (Const64 [umagic64(c).s - 1])) // Case 7. Unsigned divide where c is even. (Div16u x (Const16 [c])) && umagicOK16(c) && config.RegSize == 4 && c&1 == 0 => (Trunc32to16 (Rsh32Ux64 (Mul32 (Rsh32Ux64 (ZeroExt16to32 x) (Const64 [1])) (Const32 [int32(1<<15 + (umagic16(c).m+1)/2)])) (Const64 [16 + umagic16(c).s - 2]))) (Div32u x (Const32 [c])) && umagicOK32(c) && config.RegSize == 8 && c&1 == 0 => (Trunc64to32 (Rsh64Ux64 (Mul64 (Rsh64Ux64 (ZeroExt32to64 x) (Const64 [1])) (Const64 [int64(1<<31 + (umagic32(c).m+1)/2)])) (Const64 [32 + umagic32(c).s - 2]))) (Div32u x (Const32 [c])) && umagicOK32(c) && config.RegSize == 4 && c&1 == 0 && config.useHmul => (Rsh32Ux64 (Hmul32u (Rsh32Ux64 x (Const64 [1])) (Const32 [int32(1<<31 + (umagic32(c).m+1)/2)])) (Const64 [umagic32(c).s - 2])) (Div64u x (Const64 [c])) && umagicOK64(c) && c&1 == 0 && config.useHmul => (Rsh64Ux64 (Hmul64u (Rsh64Ux64 x (Const64 [1])) (Const64 [int64(1<<63 + (umagic64(c).m+1)/2)])) (Const64 [umagic64(c).s - 2])) // Case 8. Unsigned divide on systems with avg. (Div16u x (Const16 [c])) && umagicOK16(c) && config.RegSize == 4 && config.useAvg => (Trunc32to16 (Rsh32Ux64 (Avg32u (Lsh32x64 (ZeroExt16to32 x) (Const64 [16])) (Mul32 (ZeroExt16to32 x) (Const32 [int32(umagic16(c).m)]))) (Const64 [16 + umagic16(c).s - 1]))) (Div32u x (Const32 [c])) && umagicOK32(c) && config.RegSize == 8 && config.useAvg => (Trunc64to32 (Rsh64Ux64 (Avg64u (Lsh64x64 (ZeroExt32to64 x) (Const64 [32])) (Mul64 (ZeroExt32to64 x) (Const64 [int64(umagic32(c).m)]))) (Const64 [32 + umagic32(c).s - 1]))) (Div32u x (Const32 [c])) && umagicOK32(c) && config.RegSize == 4 && config.useAvg && config.useHmul => (Rsh32Ux64 (Avg32u x (Hmul32u x (Const32 [int32(umagic32(c).m)]))) (Const64 [umagic32(c).s - 1])) (Div64u x (Const64 [c])) && umagicOK64(c) && config.useAvg && config.useHmul => (Rsh64Ux64 (Avg64u x (Hmul64u x (Const64 [int64(umagic64(c).m)]))) (Const64 [umagic64(c).s - 1]))