| // Copyright 2019 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package sys |
| |
| // Copied from math/bits to avoid dependence. |
| |
| var len8tab = [256]uint8{ |
| 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, |
| 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, |
| 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, |
| 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, |
| 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, |
| 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, |
| 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, |
| 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, |
| 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, |
| 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, |
| 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, |
| 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, |
| 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, |
| 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, |
| 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, |
| 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, |
| } |
| |
| var ntz8tab = [256]uint8{ |
| 0x08, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, |
| } |
| |
| // len64 returns the minimum number of bits required to represent x; the result is 0 for x == 0. |
| func Len64(x uint64) (n int) { |
| if x >= 1<<32 { |
| x >>= 32 |
| n = 32 |
| } |
| if x >= 1<<16 { |
| x >>= 16 |
| n += 16 |
| } |
| if x >= 1<<8 { |
| x >>= 8 |
| n += 8 |
| } |
| return n + int(len8tab[x]) |
| } |
| |
| // --- OnesCount --- |
| |
| const m0 = 0x5555555555555555 // 01010101 ... |
| const m1 = 0x3333333333333333 // 00110011 ... |
| const m2 = 0x0f0f0f0f0f0f0f0f // 00001111 ... |
| |
| // OnesCount64 returns the number of one bits ("population count") in x. |
| func OnesCount64(x uint64) int { |
| // Implementation: Parallel summing of adjacent bits. |
| // See "Hacker's Delight", Chap. 5: Counting Bits. |
| // The following pattern shows the general approach: |
| // |
| // x = x>>1&(m0&m) + x&(m0&m) |
| // x = x>>2&(m1&m) + x&(m1&m) |
| // x = x>>4&(m2&m) + x&(m2&m) |
| // x = x>>8&(m3&m) + x&(m3&m) |
| // x = x>>16&(m4&m) + x&(m4&m) |
| // x = x>>32&(m5&m) + x&(m5&m) |
| // return int(x) |
| // |
| // Masking (& operations) can be left away when there's no |
| // danger that a field's sum will carry over into the next |
| // field: Since the result cannot be > 64, 8 bits is enough |
| // and we can ignore the masks for the shifts by 8 and up. |
| // Per "Hacker's Delight", the first line can be simplified |
| // more, but it saves at best one instruction, so we leave |
| // it alone for clarity. |
| const m = 1<<64 - 1 |
| x = x>>1&(m0&m) + x&(m0&m) |
| x = x>>2&(m1&m) + x&(m1&m) |
| x = (x>>4 + x) & (m2 & m) |
| x += x >> 8 |
| x += x >> 16 |
| x += x >> 32 |
| return int(x) & (1<<7 - 1) |
| } |
| |
| var deBruijn64tab = [64]byte{ |
| 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4, |
| 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5, |
| 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11, |
| 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6, |
| } |
| |
| const deBruijn64 = 0x03f79d71b4ca8b09 |
| |
| // TrailingZeros64 returns the number of trailing zero bits in x; the result is 64 for x == 0. |
| func TrailingZeros64(x uint64) int { |
| if x == 0 { |
| return 64 |
| } |
| // If popcount is fast, replace code below with return popcount(^x & (x - 1)). |
| // |
| // x & -x leaves only the right-most bit set in the word. Let k be the |
| // index of that bit. Since only a single bit is set, the value is two |
| // to the power of k. Multiplying by a power of two is equivalent to |
| // left shifting, in this case by k bits. The de Bruijn (64 bit) constant |
| // is such that all six bit, consecutive substrings are distinct. |
| // Therefore, if we have a left shifted version of this constant we can |
| // find by how many bits it was shifted by looking at which six bit |
| // substring ended up at the top of the word. |
| // (Knuth, volume 4, section 7.3.1) |
| return int(deBruijn64tab[(x&-x)*deBruijn64>>(64-6)]) |
| } |
| |
| // LeadingZeros64 returns the number of leading zero bits in x; the result is 64 for x == 0. |
| func LeadingZeros64(x uint64) int { return 64 - Len64(x) } |
| |
| // LeadingZeros8 returns the number of leading zero bits in x; the result is 8 for x == 0. |
| func LeadingZeros8(x uint8) int { return 8 - Len8(x) } |
| |
| // TrailingZeros8 returns the number of trailing zero bits in x; the result is 8 for x == 0. |
| func TrailingZeros8(x uint8) int { |
| return int(ntz8tab[x]) |
| } |
| |
| // Len8 returns the minimum number of bits required to represent x; the result is 0 for x == 0. |
| func Len8(x uint8) int { |
| return int(len8tab[x]) |
| } |
| |
| // Prefetch prefetches data from memory addr to cache |
| // |
| // AMD64: Produce PREFETCHT0 instruction |
| // |
| // ARM64: Produce PRFM instruction with PLDL1KEEP option |
| func Prefetch(addr uintptr) {} |
| |
| // PrefetchStreamed prefetches data from memory addr, with a hint that this data is being streamed. |
| // That is, it is likely to be accessed very soon, but only once. If possible, this will avoid polluting the cache. |
| // |
| // AMD64: Produce PREFETCHNTA instruction |
| // |
| // ARM64: Produce PRFM instruction with PLDL1STRM option |
| func PrefetchStreamed(addr uintptr) {} |