Chris Zou | 5833d84 | 2016-04-18 19:30:17 -0400 | [diff] [blame] | 1 | // Copyright 2016 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package crc32 |
| 6 | |
Ruixin(Peter) Bao | 9a3f22b | 2020-04-27 15:23:37 -0400 | [diff] [blame] | 7 | import "internal/cpu" |
| 8 | |
Chris Zou | 5833d84 | 2016-04-18 19:30:17 -0400 | [diff] [blame] | 9 | const ( |
| 10 | vxMinLen = 64 |
Michael Munday | 4b17b15 | 2016-08-20 21:09:53 -0400 | [diff] [blame] | 11 | vxAlignMask = 15 // align to 16 bytes |
Chris Zou | 5833d84 | 2016-04-18 19:30:17 -0400 | [diff] [blame] | 12 | ) |
| 13 | |
Ruixin(Peter) Bao | 9a3f22b | 2020-04-27 15:23:37 -0400 | [diff] [blame] | 14 | // hasVX reports whether the machine has the z/Architecture |
Chris Zou | 5833d84 | 2016-04-18 19:30:17 -0400 | [diff] [blame] | 15 | // vector facility installed and enabled. |
Ruixin(Peter) Bao | 9a3f22b | 2020-04-27 15:23:37 -0400 | [diff] [blame] | 16 | var hasVX = cpu.S390X.HasVX |
Chris Zou | 5833d84 | 2016-04-18 19:30:17 -0400 | [diff] [blame] | 17 | |
| 18 | // vectorizedCastagnoli implements CRC32 using vector instructions. |
| 19 | // It is defined in crc32_s390x.s. |
Russ Cox | 9839668 | 2022-01-30 20:13:43 -0500 | [diff] [blame] | 20 | // |
Chris Zou | 5833d84 | 2016-04-18 19:30:17 -0400 | [diff] [blame] | 21 | //go:noescape |
| 22 | func vectorizedCastagnoli(crc uint32, p []byte) uint32 |
| 23 | |
| 24 | // vectorizedIEEE implements CRC32 using vector instructions. |
| 25 | // It is defined in crc32_s390x.s. |
Russ Cox | 9839668 | 2022-01-30 20:13:43 -0500 | [diff] [blame] | 26 | // |
Chris Zou | 5833d84 | 2016-04-18 19:30:17 -0400 | [diff] [blame] | 27 | //go:noescape |
| 28 | func vectorizedIEEE(crc uint32, p []byte) uint32 |
| 29 | |
Radu Berinde | bdde101 | 2016-08-28 14:36:06 -0400 | [diff] [blame] | 30 | func archAvailableCastagnoli() bool { |
| 31 | return hasVX |
Radu Berinde | 90c3cf4 | 2016-08-27 13:17:30 -0400 | [diff] [blame] | 32 | } |
| 33 | |
Radu Berinde | bdde101 | 2016-08-28 14:36:06 -0400 | [diff] [blame] | 34 | var archCastagnoliTable8 *slicing8Table |
| 35 | |
| 36 | func archInitCastagnoli() { |
| 37 | if !hasVX { |
| 38 | panic("not available") |
Chris Zou | 5833d84 | 2016-04-18 19:30:17 -0400 | [diff] [blame] | 39 | } |
Radu Berinde | bdde101 | 2016-08-28 14:36:06 -0400 | [diff] [blame] | 40 | // We still use slicing-by-8 for small buffers. |
| 41 | archCastagnoliTable8 = slicingMakeTable(Castagnoli) |
Chris Zou | 5833d84 | 2016-04-18 19:30:17 -0400 | [diff] [blame] | 42 | } |
| 43 | |
Radu Berinde | bdde101 | 2016-08-28 14:36:06 -0400 | [diff] [blame] | 44 | // archUpdateCastagnoli calculates the checksum of p using |
| 45 | // vectorizedCastagnoli. |
| 46 | func archUpdateCastagnoli(crc uint32, p []byte) uint32 { |
| 47 | if !hasVX { |
| 48 | panic("not available") |
Chris Zou | 5833d84 | 2016-04-18 19:30:17 -0400 | [diff] [blame] | 49 | } |
Radu Berinde | bdde101 | 2016-08-28 14:36:06 -0400 | [diff] [blame] | 50 | // Use vectorized function if data length is above threshold. |
| 51 | if len(p) >= vxMinLen { |
Chris Zou | 5833d84 | 2016-04-18 19:30:17 -0400 | [diff] [blame] | 52 | aligned := len(p) & ^vxAlignMask |
| 53 | crc = vectorizedCastagnoli(crc, p[:aligned]) |
| 54 | p = p[aligned:] |
Radu Berinde | bdde101 | 2016-08-28 14:36:06 -0400 | [diff] [blame] | 55 | } |
| 56 | if len(p) == 0 { |
Chris Zou | 5833d84 | 2016-04-18 19:30:17 -0400 | [diff] [blame] | 57 | return crc |
| 58 | } |
Radu Berinde | bdde101 | 2016-08-28 14:36:06 -0400 | [diff] [blame] | 59 | return slicingUpdate(crc, archCastagnoliTable8, p) |
Chris Zou | 5833d84 | 2016-04-18 19:30:17 -0400 | [diff] [blame] | 60 | } |
| 61 | |
Radu Berinde | bdde101 | 2016-08-28 14:36:06 -0400 | [diff] [blame] | 62 | func archAvailableIEEE() bool { |
| 63 | return hasVX |
| 64 | } |
| 65 | |
| 66 | var archIeeeTable8 *slicing8Table |
| 67 | |
| 68 | func archInitIEEE() { |
| 69 | if !hasVX { |
| 70 | panic("not available") |
| 71 | } |
| 72 | // We still use slicing-by-8 for small buffers. |
| 73 | archIeeeTable8 = slicingMakeTable(IEEE) |
| 74 | } |
| 75 | |
| 76 | // archUpdateIEEE calculates the checksum of p using vectorizedIEEE. |
| 77 | func archUpdateIEEE(crc uint32, p []byte) uint32 { |
| 78 | if !hasVX { |
| 79 | panic("not available") |
| 80 | } |
| 81 | // Use vectorized function if data length is above threshold. |
| 82 | if len(p) >= vxMinLen { |
Chris Zou | 5833d84 | 2016-04-18 19:30:17 -0400 | [diff] [blame] | 83 | aligned := len(p) & ^vxAlignMask |
| 84 | crc = vectorizedIEEE(crc, p[:aligned]) |
| 85 | p = p[aligned:] |
Radu Berinde | bdde101 | 2016-08-28 14:36:06 -0400 | [diff] [blame] | 86 | } |
| 87 | if len(p) == 0 { |
Chris Zou | 5833d84 | 2016-04-18 19:30:17 -0400 | [diff] [blame] | 88 | return crc |
| 89 | } |
Radu Berinde | bdde101 | 2016-08-28 14:36:06 -0400 | [diff] [blame] | 90 | return slicingUpdate(crc, archIeeeTable8, p) |
Chris Zou | 5833d84 | 2016-04-18 19:30:17 -0400 | [diff] [blame] | 91 | } |