encoding/base64: Optimize EncodeToString and DecodeString.
benchmark old ns/op new ns/op delta
BenchmarkEncodeToString 31281 23821 -23.85%
BenchmarkDecodeString 156508 82254 -47.44%
benchmark old MB/s new MB/s speedup
BenchmarkEncodeToString 261.88 343.89 1.31x
BenchmarkDecodeString 69.80 132.81 1.90x
Change-Id: I115e0b18c3a6d5ef6bfdcb3f637644f02f290907
Reviewed-on: https://go-review.googlesource.com/8808
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/src/encoding/base64/base64.go b/src/encoding/base64/base64.go
index db4a409..3302fb4 100644
--- a/src/encoding/base64/base64.go
+++ b/src/encoding/base64/base64.go
@@ -6,10 +6,8 @@
package base64
import (
- "bytes"
"io"
"strconv"
- "strings"
)
/*
@@ -22,7 +20,7 @@
// (RFC 1421). RFC 4648 also defines an alternate encoding, which is
// the standard encoding with - and _ substituted for + and /.
type Encoding struct {
- encode string
+ encode [64]byte
decodeMap [256]byte
padChar rune
}
@@ -40,9 +38,14 @@
// The resulting Encoding uses the default padding character ('='),
// which may be changed or disabled via WithPadding.
func NewEncoding(encoder string) *Encoding {
+ if len(encoder) != 64 {
+ panic("encoding alphabet is not 64-bytes long")
+ }
+
e := new(Encoding)
- e.encode = encoder
e.padChar = StdPadding
+ copy(e.encode[:], encoder)
+
for i := 0; i < len(e.decodeMap); i++ {
e.decodeMap[i] = 0xFF
}
@@ -77,13 +80,6 @@
// This is the same as URLEncoding but omits padding characters.
var RawURLEncoding = URLEncoding.WithPadding(NoPadding)
-var removeNewlinesMapper = func(r rune) rune {
- if r == '\r' || r == '\n' {
- return -1
- }
- return r
-}
-
/*
* Encoder
*/
@@ -99,46 +95,45 @@
return
}
- for len(src) > 0 {
- var b0, b1, b2, b3 byte
+ di, si := 0, 0
+ n := (len(src) / 3) * 3
+ for si < n {
+ // Convert 3x 8bit source bytes into 4 bytes
+ val := uint(src[si+0])<<16 | uint(src[si+1])<<8 | uint(src[si+2])
- // Unpack 4x 6-bit source blocks into a 4 byte
- // destination quantum
- switch len(src) {
- default:
- b3 = src[2] & 0x3F
- b2 = src[2] >> 6
- fallthrough
- case 2:
- b2 |= (src[1] << 2) & 0x3F
- b1 = src[1] >> 4
- fallthrough
- case 1:
- b1 |= (src[0] << 4) & 0x3F
- b0 = src[0] >> 2
+ dst[di+0] = enc.encode[val>>18&0x3F]
+ dst[di+1] = enc.encode[val>>12&0x3F]
+ dst[di+2] = enc.encode[val>>6&0x3F]
+ dst[di+3] = enc.encode[val&0x3F]
+
+ si += 3
+ di += 4
+ }
+
+ remain := len(src) - si
+ if remain == 0 {
+ return
+ }
+ // Add the remaining small block
+ val := uint(src[si+0]) << 16
+ if remain == 2 {
+ val |= uint(src[si+1]) << 8
+ }
+
+ dst[di+0] = enc.encode[val>>18&0x3F]
+ dst[di+1] = enc.encode[val>>12&0x3F]
+
+ switch remain {
+ case 2:
+ dst[di+2] = enc.encode[val>>6&0x3F]
+ if enc.padChar != NoPadding {
+ dst[di+3] = byte(enc.padChar)
}
-
- // Encode 6-bit blocks using the base64 alphabet
- dst[0] = enc.encode[b0]
- dst[1] = enc.encode[b1]
- if len(src) >= 3 {
- dst[2] = enc.encode[b2]
- dst[3] = enc.encode[b3]
- } else { // Final incomplete quantum
- if len(src) >= 2 {
- dst[2] = enc.encode[b2]
- }
- if enc.padChar != NoPadding {
- if len(src) < 2 {
- dst[2] = byte(enc.padChar)
- }
- dst[3] = byte(enc.padChar)
- }
- break
+ case 1:
+ if enc.padChar != NoPadding {
+ dst[di+2] = byte(enc.padChar)
+ dst[di+3] = byte(enc.padChar)
}
-
- src = src[3:]
- dst = dst[4:]
}
}
@@ -248,67 +243,83 @@
// decode is like Decode but returns an additional 'end' value, which
// indicates if end-of-message padding or a partial quantum was encountered
-// and thus any additional data is an error. This method assumes that src has been
-// stripped of all supported whitespace ('\r' and '\n').
+// and thus any additional data is an error.
func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) {
- olen := len(src)
- for len(src) > 0 && !end {
+ si := 0
+
+ // skip over newlines
+ for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
+ si++
+ }
+
+ for si < len(src) && !end {
// Decode quantum using the base64 alphabet
var dbuf [4]byte
dinc, dlen := 3, 4
for j := range dbuf {
- if len(src) == 0 {
+ if len(src) == si {
if enc.padChar != NoPadding || j < 2 {
- return n, false, CorruptInputError(olen - len(src) - j)
+ return n, false, CorruptInputError(si - j)
}
dinc, dlen, end = j-1, j, true
break
}
- in := src[0]
- src = src[1:]
+ in := src[si]
+
+ si++
+ // skip over newlines
+ for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
+ si++
+ }
+
if rune(in) == enc.padChar {
// We've reached the end and there's padding
switch j {
case 0, 1:
// incorrect padding
- return n, false, CorruptInputError(olen - len(src) - 1)
+ return n, false, CorruptInputError(si - 1)
case 2:
// "==" is expected, the first "=" is already consumed.
- if len(src) == 0 {
+ if si == len(src) {
// not enough padding
- return n, false, CorruptInputError(olen)
+ return n, false, CorruptInputError(len(src))
}
- if rune(src[0]) != enc.padChar {
+ if rune(src[si]) != enc.padChar {
// incorrect padding
- return n, false, CorruptInputError(olen - len(src) - 1)
+ return n, false, CorruptInputError(si - 1)
}
- src = src[1:]
+
+ si++
+ // skip over newlines
+ for si < len(src) && (src[si] == '\n' || src[si] == '\r') {
+ si++
+ }
}
- if len(src) > 0 {
+ if si < len(src) {
// trailing garbage
- err = CorruptInputError(olen - len(src))
+ err = CorruptInputError(si)
}
dinc, dlen, end = 3, j, true
break
}
dbuf[j] = enc.decodeMap[in]
if dbuf[j] == 0xFF {
- return n, false, CorruptInputError(olen - len(src) - 1)
+ return n, false, CorruptInputError(si - 1)
}
}
- // Pack 4x 6-bit source blocks into 3 byte destination
- // quantum
+ // Convert 4x 6bit source bytes into 3 bytes
+ val := uint(dbuf[0])<<18 | uint(dbuf[1])<<12 | uint(dbuf[2])<<6 | uint(dbuf[3])
switch dlen {
case 4:
- dst[2] = dbuf[2]<<6 | dbuf[3]
+ dst[2] = byte(val >> 0)
fallthrough
case 3:
- dst[1] = dbuf[1]<<4 | dbuf[2]>>2
+ dst[1] = byte(val >> 8)
fallthrough
case 2:
- dst[0] = dbuf[0]<<2 | dbuf[1]>>4
+ dst[0] = byte(val >> 16)
}
dst = dst[dinc:]
n += dlen - 1
@@ -323,14 +334,12 @@
// number of bytes successfully written and CorruptInputError.
// New line characters (\r and \n) are ignored.
func (enc *Encoding) Decode(dst, src []byte) (n int, err error) {
- src = bytes.Map(removeNewlinesMapper, src)
n, _, err = enc.decode(dst, src)
return
}
// DecodeString returns the bytes represented by the base64 string s.
func (enc *Encoding) DecodeString(s string) ([]byte, error) {
- s = strings.Map(removeNewlinesMapper, s)
dbuf := make([]byte, enc.DecodedLen(len(s)))
n, _, err := enc.decode(dbuf, []byte(s))
return dbuf[:n], err
@@ -359,6 +368,8 @@
return n, nil
}
+ // This code assumes that d.r strips supported whitespace ('\r' and '\n').
+
// Read a chunk.
nn := len(p) / 3 * 4
if nn < 4 {