| // Copyright 2019 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package sys |
| |
| // Copied from math/bits to avoid dependence. |
| |
| const len8tab = "" + |
| "\x00\x01\x02\x02\x03\x03\x03\x03\x04\x04\x04\x04\x04\x04\x04\x04" + |
| "\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05" + |
| "\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06" + |
| "\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06\x06" + |
| "\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" + |
| "\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" + |
| "\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" + |
| "\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07" + |
| "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" + |
| "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" + |
| "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" + |
| "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" + |
| "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" + |
| "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" + |
| "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" + |
| "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08" |
| |
| // Len64 returns the minimum number of bits required to represent x; the result is 0 for x == 0. |
| // |
| // nosplit because this is used in src/runtime/histogram.go, which make run in sensitive contexts. |
| // |
| //go:nosplit |
| func Len64(x uint64) (n int) { |
| if x >= 1<<32 { |
| x >>= 32 |
| n = 32 |
| } |
| if x >= 1<<16 { |
| x >>= 16 |
| n += 16 |
| } |
| if x >= 1<<8 { |
| x >>= 8 |
| n += 8 |
| } |
| return n + int(len8tab[x]) |
| } |
| |
| // --- OnesCount --- |
| |
| const m0 = 0x5555555555555555 // 01010101 ... |
| const m1 = 0x3333333333333333 // 00110011 ... |
| const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ... |
| |
| // OnesCount64 returns the number of one bits ("population count") in x. |
| func OnesCount64(x uint64) int { |
| // Implementation: Parallel summing of adjacent bits. |
| // See "Hacker's Delight", Chap. 5: Counting Bits. |
| // The following pattern shows the general approach: |
| // |
| // x = x>>1&(m0&m) + x&(m0&m) |
| // x = x>>2&(m1&m) + x&(m1&m) |
| // x = x>>4&(m2&m) + x&(m2&m) |
| // x = x>>8&(m3&m) + x&(m3&m) |
| // x = x>>16&(m4&m) + x&(m4&m) |
| // x = x>>32&(m5&m) + x&(m5&m) |
| // return int(x) |
| // |
| // Masking (& operations) can be left away when there's no |
| // danger that a field's sum will carry over into the next |
| // field: Since the result cannot be > 64, 8 bits is enough |
| // and we can ignore the masks for the shifts by 8 and up. |
| // Per "Hacker's Delight", the first line can be simplified |
| // more, but it saves at best one instruction, so we leave |
| // it alone for clarity. |
| const m = 1<<64 - 1 |
| x = x>>1&(m0&m) + x&(m0&m) |
| x = x>>2&(m1&m) + x&(m1&m) |
| x = (x>>4 + x) & (m2 & m) |
| x += x >> 8 |
| x += x >> 16 |
| x += x >> 32 |
| return int(x) & (1<<7 - 1) |
| } |
| |
| // LeadingZeros64 returns the number of leading zero bits in x; the result is 64 for x == 0. |
| func LeadingZeros64(x uint64) int { return 64 - Len64(x) } |
| |
| // LeadingZeros8 returns the number of leading zero bits in x; the result is 8 for x == 0. |
| func LeadingZeros8(x uint8) int { return 8 - Len8(x) } |
| |
| // Len8 returns the minimum number of bits required to represent x; the result is 0 for x == 0. |
| func Len8(x uint8) int { |
| return int(len8tab[x]) |
| } |
| |
| // Prefetch prefetches data from memory addr to cache |
| // |
| // AMD64: Produce PREFETCHT0 instruction |
| // |
| // ARM64: Produce PRFM instruction with PLDL1KEEP option |
| func Prefetch(addr uintptr) {} |
| |
| // PrefetchStreamed prefetches data from memory addr, with a hint that this data is being streamed. |
| // That is, it is likely to be accessed very soon, but only once. If possible, this will avoid polluting the cache. |
| // |
| // AMD64: Produce PREFETCHNTA instruction |
| // |
| // ARM64: Produce PRFM instruction with PLDL1STRM option |
| func PrefetchStreamed(addr uintptr) {} |