src/archive/tar/strconv.go - go - Git at Google

 // Copyright 2016 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package tar

 import (
 	"bytes"
 	"fmt"
 	"strconv"
 	"strings"
 	"time"
 )

 // hasNUL reports whether the NUL character exists within s.
 func hasNUL(s string) bool {
 	return strings.Contains(s, "\x00")
 }

 // isASCII reports whether the input is an ASCII C-style string.
 func isASCII(s string) bool {
 	for _, c := range s {
 		if c >= 0x80 || c == 0x00 {
 			return false
 		}
 	}
 	return true
 }

 // toASCII converts the input to an ASCII C-style string.
 // This is a best effort conversion, so invalid characters are dropped.
 func toASCII(s string) string {
 	if isASCII(s) {
 		return s
 	}
 	b := make([]byte, 0, len(s))
 	for _, c := range s {
 		if c < 0x80 && c != 0x00 {
 			b = append(b, byte(c))
 		}
 	}
 	return string(b)
 }

 type parser struct {
 	err error // Last error seen
 }

 type formatter struct {
 	err error // Last error seen
 }

 // parseString parses bytes as a NUL-terminated C-style string.
 // If a NUL byte is not found then the whole slice is returned as a string.
 func (*parser) parseString(b []byte) string {
 	if i := bytes.IndexByte(b, 0); i >= 0 {
 		return string(b[:i])
 	}
 	return string(b)
 }

 // formatString copies s into b, NUL-terminating if possible.
 func (f *formatter) formatString(b []byte, s string) {
 	if len(s) > len(b) {
 		f.err = ErrFieldTooLong
 	}
 	copy(b, s)
 	if len(s) < len(b) {
 		b[len(s)] = 0
 	}

 	// Some buggy readers treat regular files with a trailing slash
 	// in the V7 path field as a directory even though the full path
 	// recorded elsewhere (e.g., via PAX record) contains no trailing slash.
 	if len(s) > len(b) && b[len(b)-1] == '/' {
 		n := len(strings.TrimRight(s[:len(b)], "/"))
 		b[n] = 0 // Replace trailing slash with NUL terminator
 	}
 }

 // fitsInBase256 reports whether x can be encoded into n bytes using base-256
 // encoding. Unlike octal encoding, base-256 encoding does not require that the
 // string ends with a NUL character. Thus, all n bytes are available for output.
 //
 // If operating in binary mode, this assumes strict GNU binary mode; which means
 // that the first byte can only be either 0x80 or 0xff. Thus, the first byte is
 // equivalent to the sign bit in two's complement form.
 func fitsInBase256(n int, x int64) bool {
 	binBits := uint(n-1) * 8
 	return n >= 9 || (x >= -1<<binBits && x < 1<<binBits)
 }

 // parseNumeric parses the input as being encoded in either base-256 or octal.
 // This function may return negative numbers.
 // If parsing fails or an integer overflow occurs, err will be set.
 func (p *parser) parseNumeric(b []byte) int64 {
 	// Check for base-256 (binary) format first.
 	// If the first bit is set, then all following bits constitute a two's
 	// complement encoded number in big-endian byte order.
 	if len(b) > 0 && b[0]&0x80 != 0 {
 		// Handling negative numbers relies on the following identity:
 		//	-a-1 == ^a
 		//
 		// If the number is negative, we use an inversion mask to invert the
 		// data bytes and treat the value as an unsigned number.
 		var inv byte // 0x00 if positive or zero, 0xff if negative
 		if b[0]&0x40 != 0 {
 			inv = 0xff
 		}

 		var x uint64
 		for i, c := range b {
 			c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
 			if i == 0 {
 				c &= 0x7f // Ignore signal bit in first byte
 			}
 			if (x >> 56) > 0 {
 				p.err = ErrHeader // Integer overflow
 				return 0
 			}
 			x = x<<8 | uint64(c)
 		}
 		if (x >> 63) > 0 {
 			p.err = ErrHeader // Integer overflow
 			return 0
 		}
 		if inv == 0xff {
 			return ^int64(x)
 		}
 		return int64(x)
 	}

 	// Normal case is base-8 (octal) format.
 	return p.parseOctal(b)
 }

 // formatNumeric encodes x into b using base-8 (octal) encoding if possible.
 // Otherwise it will attempt to use base-256 (binary) encoding.
 func (f *formatter) formatNumeric(b []byte, x int64) {
 	if fitsInOctal(len(b), x) {
 		f.formatOctal(b, x)
 		return
 	}

 	if fitsInBase256(len(b), x) {
 		for i := len(b) - 1; i >= 0; i-- {
 			b[i] = byte(x)
 			x >>= 8
 		}
 		b[0] |= 0x80 // Highest bit indicates binary format
 		return
 	}

 	f.formatOctal(b, 0) // Last resort, just write zero
 	f.err = ErrFieldTooLong
 }

 func (p *parser) parseOctal(b []byte) int64 {
 	// Because unused fields are filled with NULs, we need
 	// to skip leading NULs. Fields may also be padded with
 	// spaces or NULs.
 	// So we remove leading and trailing NULs and spaces to
 	// be sure.
 	b = bytes.Trim(b, " \x00")

 	if len(b) == 0 {
 		return 0
 	}
 	x, perr := strconv.ParseUint(p.parseString(b), 8, 64)
 	if perr != nil {
 		p.err = ErrHeader
 	}
 	return int64(x)
 }

 func (f *formatter) formatOctal(b []byte, x int64) {
 	if !fitsInOctal(len(b), x) {
 		x = 0 // Last resort, just write zero
 		f.err = ErrFieldTooLong
 	}

 	s := strconv.FormatInt(x, 8)
 	// Add leading zeros, but leave room for a NUL.
 	if n := len(b) - len(s) - 1; n > 0 {
 		s = strings.Repeat("0", n) + s
 	}
 	f.formatString(b, s)
 }

 // fitsInOctal reports whether the integer x fits in a field n-bytes long
 // using octal encoding with the appropriate NUL terminator.
 func fitsInOctal(n int, x int64) bool {
 	octBits := uint(n-1) * 3
 	return x >= 0 && (n >= 22 || x < 1<<octBits)
 }

 // parsePAXTime takes a string of the form %d.%d as described in the PAX
 // specification. Note that this implementation allows for negative timestamps,
 // which is allowed for by the PAX specification, but not always portable.
 func parsePAXTime(s string) (time.Time, error) {
 	const maxNanoSecondDigits = 9

 	// Split string into seconds and sub-seconds parts.
 	ss, sn, _ := strings.Cut(s, ".")

 	// Parse the seconds.
 	secs, err := strconv.ParseInt(ss, 10, 64)
 	if err != nil {
 		return time.Time{}, ErrHeader
 	}
 	if len(sn) == 0 {
 		return time.Unix(secs, 0), nil // No sub-second values
 	}

 	// Parse the nanoseconds.
 	if strings.Trim(sn, "0123456789") != "" {
 		return time.Time{}, ErrHeader
 	}
 	if len(sn) < maxNanoSecondDigits {
 		sn += strings.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad
 	} else {
 		sn = sn[:maxNanoSecondDigits] // Right truncate
 	}
 	nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed
 	if len(ss) > 0 && ss[0] == '-' {
 		return time.Unix(secs, -1*nsecs), nil // Negative correction
 	}
 	return time.Unix(secs, nsecs), nil
 }

 // formatPAXTime converts ts into a time of the form %d.%d as described in the
 // PAX specification. This function is capable of negative timestamps.
 func formatPAXTime(ts time.Time) (s string) {
 	secs, nsecs := ts.Unix(), ts.Nanosecond()
 	if nsecs == 0 {
 		return strconv.FormatInt(secs, 10)
 	}

 	// If seconds is negative, then perform correction.
 	sign := ""
 	if secs < 0 {
 		sign = "-"             // Remember sign
 		secs = -(secs + 1)     // Add a second to secs
 		nsecs = -(nsecs - 1e9) // Take that second away from nsecs
 	}
 	return strings.TrimRight(fmt.Sprintf("%s%d.%09d", sign, secs, nsecs), "0")
 }

 // parsePAXRecord parses the input PAX record string into a key-value pair.
 // If parsing is successful, it will slice off the currently read record and
 // return the remainder as r.
 func parsePAXRecord(s string) (k, v, r string, err error) {
 	// The size field ends at the first space.
 	nStr, rest, ok := strings.Cut(s, " ")
 	if !ok {
 		return "", "", s, ErrHeader
 	}

 	// Parse the first token as a decimal integer.
 	n, perr := strconv.ParseInt(nStr, 10, 0) // Intentionally parse as native int
 	if perr != nil || n < 5 || n > int64(len(s)) {
 		return "", "", s, ErrHeader
 	}
 	n -= int64(len(nStr) + 1) // convert from index in s to index in rest
 	if n <= 0 {
 		return "", "", s, ErrHeader
 	}

 	// Extract everything between the space and the final newline.
 	rec, nl, rem := rest[:n-1], rest[n-1:n], rest[n:]
 	if nl != "\n" {
 		return "", "", s, ErrHeader
 	}

 	// The first equals separates the key from the value.
 	k, v, ok = strings.Cut(rec, "=")
 	if !ok {
 		return "", "", s, ErrHeader
 	}

 	if !validPAXRecord(k, v) {
 		return "", "", s, ErrHeader
 	}
 	return k, v, rem, nil
 }

 // formatPAXRecord formats a single PAX record, prefixing it with the
 // appropriate length.
 func formatPAXRecord(k, v string) (string, error) {
 	if !validPAXRecord(k, v) {
 		return "", ErrHeader
 	}

 	const padding = 3 // Extra padding for ' ', '=', and '\n'
 	size := len(k) + len(v) + padding
 	size += len(strconv.Itoa(size))
 	record := strconv.Itoa(size) + " " + k + "=" + v + "\n"

 	// Final adjustment if adding size field increased the record size.
 	if len(record) != size {
 		size = len(record)
 		record = strconv.Itoa(size) + " " + k + "=" + v + "\n"
 	}
 	return record, nil
 }

 // validPAXRecord reports whether the key-value pair is valid where each
 // record is formatted as:
 //	"%d %s=%s\n" % (size, key, value)
 //
 // Keys and values should be UTF-8, but the number of bad writers out there
 // forces us to be a more liberal.
 // Thus, we only reject all keys with NUL, and only reject NULs in values
 // for the PAX version of the USTAR string fields.
 // The key must not contain an '=' character.
 func validPAXRecord(k, v string) bool {
 	if k == "" || strings.Contains(k, "=") {
 		return false
 	}
 	switch k {
 	case paxPath, paxLinkpath, paxUname, paxGname:
 		return !hasNUL(v)
 	default:
 		return !hasNUL(k)
 	}
 }
	// Copyright 2016 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package tar

	import (
	"bytes"
	"fmt"
	"strconv"
	"strings"
	"time"
	)

	// hasNUL reports whether the NUL character exists within s.
	func hasNUL(s string) bool {
	return strings.Contains(s, "\x00")
	}

	// isASCII reports whether the input is an ASCII C-style string.
	func isASCII(s string) bool {
	for _, c := range s {
	if c >= 0x80 \|\| c == 0x00 {
	return false
	}
	}
	return true
	}

	// toASCII converts the input to an ASCII C-style string.
	// This is a best effort conversion, so invalid characters are dropped.
	func toASCII(s string) string {
	if isASCII(s) {
	return s
	}
	b := make([]byte, 0, len(s))
	for _, c := range s {
	if c < 0x80 && c != 0x00 {
	b = append(b, byte(c))
	}
	}
	return string(b)
	}

	type parser struct {
	err error // Last error seen
	}

	type formatter struct {
	err error // Last error seen
	}

	// parseString parses bytes as a NUL-terminated C-style string.
	// If a NUL byte is not found then the whole slice is returned as a string.
	func (*parser) parseString(b []byte) string {
	if i := bytes.IndexByte(b, 0); i >= 0 {
	return string(b[:i])
	}
	return string(b)
	}

	// formatString copies s into b, NUL-terminating if possible.
	func (f *formatter) formatString(b []byte, s string) {
	if len(s) > len(b) {
	f.err = ErrFieldTooLong
	}
	copy(b, s)
	if len(s) < len(b) {
	b[len(s)] = 0
	}

	// Some buggy readers treat regular files with a trailing slash
	// in the V7 path field as a directory even though the full path
	// recorded elsewhere (e.g., via PAX record) contains no trailing slash.
	if len(s) > len(b) && b[len(b)-1] == '/' {
	n := len(strings.TrimRight(s[:len(b)], "/"))
	b[n] = 0 // Replace trailing slash with NUL terminator
	}
	}

	// fitsInBase256 reports whether x can be encoded into n bytes using base-256
	// encoding. Unlike octal encoding, base-256 encoding does not require that the
	// string ends with a NUL character. Thus, all n bytes are available for output.
	//
	// If operating in binary mode, this assumes strict GNU binary mode; which means
	// that the first byte can only be either 0x80 or 0xff. Thus, the first byte is
	// equivalent to the sign bit in two's complement form.
	func fitsInBase256(n int, x int64) bool {
	binBits := uint(n-1) * 8
	return n >= 9 \|\| (x >= -1<<binBits && x < 1<<binBits)
	}

	// parseNumeric parses the input as being encoded in either base-256 or octal.
	// This function may return negative numbers.
	// If parsing fails or an integer overflow occurs, err will be set.
	func (p *parser) parseNumeric(b []byte) int64 {
	// Check for base-256 (binary) format first.
	// If the first bit is set, then all following bits constitute a two's
	// complement encoded number in big-endian byte order.
	if len(b) > 0 && b[0]&0x80 != 0 {
	// Handling negative numbers relies on the following identity:
	// -a-1 == ^a
	//
	// If the number is negative, we use an inversion mask to invert the
	// data bytes and treat the value as an unsigned number.
	var inv byte // 0x00 if positive or zero, 0xff if negative
	if b[0]&0x40 != 0 {
	inv = 0xff
	}

	var x uint64
	for i, c := range b {
	c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
	if i == 0 {
	c &= 0x7f // Ignore signal bit in first byte
	}
	if (x >> 56) > 0 {
	p.err = ErrHeader // Integer overflow
	return 0
	}
	x = x<<8 \| uint64(c)
	}
	if (x >> 63) > 0 {
	p.err = ErrHeader // Integer overflow
	return 0
	}
	if inv == 0xff {
	return ^int64(x)
	}
	return int64(x)
	}

	// Normal case is base-8 (octal) format.
	return p.parseOctal(b)
	}

	// formatNumeric encodes x into b using base-8 (octal) encoding if possible.
	// Otherwise it will attempt to use base-256 (binary) encoding.
	func (f *formatter) formatNumeric(b []byte, x int64) {
	if fitsInOctal(len(b), x) {
	f.formatOctal(b, x)
	return
	}

	if fitsInBase256(len(b), x) {
	for i := len(b) - 1; i >= 0; i-- {
	b[i] = byte(x)
	x >>= 8
	}
	b[0] \|= 0x80 // Highest bit indicates binary format
	return
	}

	f.formatOctal(b, 0) // Last resort, just write zero
	f.err = ErrFieldTooLong
	}

	func (p *parser) parseOctal(b []byte) int64 {
	// Because unused fields are filled with NULs, we need
	// to skip leading NULs. Fields may also be padded with
	// spaces or NULs.
	// So we remove leading and trailing NULs and spaces to
	// be sure.
	b = bytes.Trim(b, " \x00")

	if len(b) == 0 {
	return 0
	}
	x, perr := strconv.ParseUint(p.parseString(b), 8, 64)
	if perr != nil {
	p.err = ErrHeader
	}
	return int64(x)
	}

	func (f *formatter) formatOctal(b []byte, x int64) {
	if !fitsInOctal(len(b), x) {
	x = 0 // Last resort, just write zero
	f.err = ErrFieldTooLong
	}

	s := strconv.FormatInt(x, 8)
	// Add leading zeros, but leave room for a NUL.
	if n := len(b) - len(s) - 1; n > 0 {
	s = strings.Repeat("0", n) + s
	}
	f.formatString(b, s)
	}

	// fitsInOctal reports whether the integer x fits in a field n-bytes long
	// using octal encoding with the appropriate NUL terminator.
	func fitsInOctal(n int, x int64) bool {
	octBits := uint(n-1) * 3
	return x >= 0 && (n >= 22 \|\| x < 1<<octBits)
	}

	// parsePAXTime takes a string of the form %d.%d as described in the PAX
	// specification. Note that this implementation allows for negative timestamps,
	// which is allowed for by the PAX specification, but not always portable.
	func parsePAXTime(s string) (time.Time, error) {
	const maxNanoSecondDigits = 9

	// Split string into seconds and sub-seconds parts.
	ss, sn, _ := strings.Cut(s, ".")

	// Parse the seconds.
	secs, err := strconv.ParseInt(ss, 10, 64)
	if err != nil {
	return time.Time{}, ErrHeader
	}
	if len(sn) == 0 {
	return time.Unix(secs, 0), nil // No sub-second values
	}

	// Parse the nanoseconds.
	if strings.Trim(sn, "0123456789") != "" {
	return time.Time{}, ErrHeader
	}
	if len(sn) < maxNanoSecondDigits {
	sn += strings.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad
	} else {
	sn = sn[:maxNanoSecondDigits] // Right truncate
	}
	nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed
	if len(ss) > 0 && ss[0] == '-' {
	return time.Unix(secs, -1*nsecs), nil // Negative correction
	}
	return time.Unix(secs, nsecs), nil
	}

	// formatPAXTime converts ts into a time of the form %d.%d as described in the
	// PAX specification. This function is capable of negative timestamps.
	func formatPAXTime(ts time.Time) (s string) {
	secs, nsecs := ts.Unix(), ts.Nanosecond()
	if nsecs == 0 {
	return strconv.FormatInt(secs, 10)
	}

	// If seconds is negative, then perform correction.
	sign := ""
	if secs < 0 {
	sign = "-" // Remember sign
	secs = -(secs + 1) // Add a second to secs
	nsecs = -(nsecs - 1e9) // Take that second away from nsecs
	}
	return strings.TrimRight(fmt.Sprintf("%s%d.%09d", sign, secs, nsecs), "0")
	}

	// parsePAXRecord parses the input PAX record string into a key-value pair.
	// If parsing is successful, it will slice off the currently read record and
	// return the remainder as r.
	func parsePAXRecord(s string) (k, v, r string, err error) {
	// The size field ends at the first space.
	nStr, rest, ok := strings.Cut(s, " ")
	if !ok {
	return "", "", s, ErrHeader
	}

	// Parse the first token as a decimal integer.
	n, perr := strconv.ParseInt(nStr, 10, 0) // Intentionally parse as native int
	if perr != nil \|\| n < 5 \|\| n > int64(len(s)) {
	return "", "", s, ErrHeader
	}
	n -= int64(len(nStr) + 1) // convert from index in s to index in rest
	if n <= 0 {
	return "", "", s, ErrHeader
	}

	// Extract everything between the space and the final newline.
	rec, nl, rem := rest[:n-1], rest[n-1:n], rest[n:]
	if nl != "\n" {
	return "", "", s, ErrHeader
	}

	// The first equals separates the key from the value.
	k, v, ok = strings.Cut(rec, "=")
	if !ok {
	return "", "", s, ErrHeader
	}

	if !validPAXRecord(k, v) {
	return "", "", s, ErrHeader
	}
	return k, v, rem, nil
	}

	// formatPAXRecord formats a single PAX record, prefixing it with the
	// appropriate length.
	func formatPAXRecord(k, v string) (string, error) {
	if !validPAXRecord(k, v) {
	return "", ErrHeader
	}

	const padding = 3 // Extra padding for ' ', '=', and '\n'
	size := len(k) + len(v) + padding
	size += len(strconv.Itoa(size))
	record := strconv.Itoa(size) + " " + k + "=" + v + "\n"

	// Final adjustment if adding size field increased the record size.
	if len(record) != size {
	size = len(record)
	record = strconv.Itoa(size) + " " + k + "=" + v + "\n"
	}
	return record, nil
	}

	// validPAXRecord reports whether the key-value pair is valid where each
	// record is formatted as:
	// "%d %s=%s\n" % (size, key, value)
	//
	// Keys and values should be UTF-8, but the number of bad writers out there
	// forces us to be a more liberal.
	// Thus, we only reject all keys with NUL, and only reject NULs in values
	// for the PAX version of the USTAR string fields.
	// The key must not contain an '=' character.
	func validPAXRecord(k, v string) bool {
	if k == "" \|\| strings.Contains(k, "=") {
	return false
	}
	switch k {
	case paxPath, paxLinkpath, paxUname, paxGname:
	return !hasNUL(v)
	default:
	return !hasNUL(k)
	}
	}