text/scanner: don't liberally consume (invalid) floats or underbars

This is a follow-up on https://golang.org/cl/161199 which introduced
the new Go 2 number literals to text/scanner.

That change introduced a bug by allowing decimal and hexadecimal floats
to be consumed even if the scanner was not configured to accept floats.

This CL changes the code to not consume a radix dot '.' or exponent
unless the scanner is configured to accept floats.

This CL also introduces a new mode "AllowNumberbars" which controls
whether underbars '_' are permitted as digit separators in numbers
or not.

There is a possibility that we may need to refine text/scanner
further (e.g., the Float mode now includes hexadecimal floats
which it didn't recognize before). We're very early in the cycle,
so let's see how it goes.

RELNOTE=yes

Updates #12711.
Updates #19308.
Updates #28493.
Updates #29008.

Fixes #30320.

Change-Id: I6481d314f0384e09ef6803ffad38dc529b1e89a3
Reviewed-on: https://go-review.googlesource.com/c/163079
Reviewed-by: Ian Lance Taylor <iant@golang.org>
diff --git a/api/except.txt b/api/except.txt
index 637be18..a608d57 100644
--- a/api/except.txt
+++ b/api/except.txt
@@ -457,3 +457,4 @@
 pkg syscall (freebsd-arm-cgo), type Stat_t struct, Rdev uint32
 pkg syscall (freebsd-arm-cgo), type Statfs_t struct, Mntfromname [88]int8
 pkg syscall (freebsd-arm-cgo), type Statfs_t struct, Mntonname [88]int8
+pkg text/scanner, const GoTokens = 1012
\ No newline at end of file
diff --git a/api/next.txt b/api/next.txt
index e69de29..aaea62d 100644
--- a/api/next.txt
+++ b/api/next.txt
@@ -0,0 +1,3 @@
+pkg text/scanner, const AllowNumberbars = 1024
+pkg text/scanner, const AllowNumberbars ideal-int
+pkg text/scanner, const GoTokens = 2036
diff --git a/src/text/scanner/scanner.go b/src/text/scanner/scanner.go
index 38c27f6..8db0ed2 100644
--- a/src/text/scanner/scanner.go
+++ b/src/text/scanner/scanner.go
@@ -59,15 +59,16 @@
 // "foo" is scanned as the token sequence '"' Ident '"'.
 //
 const (
-	ScanIdents     = 1 << -Ident
-	ScanInts       = 1 << -Int
-	ScanFloats     = 1 << -Float // includes Ints
-	ScanChars      = 1 << -Char
-	ScanStrings    = 1 << -String
-	ScanRawStrings = 1 << -RawString
-	ScanComments   = 1 << -Comment
-	SkipComments   = 1 << -skipComment // if set with ScanComments, comments become white space
-	GoTokens       = ScanIdents | ScanFloats | ScanChars | ScanStrings | ScanRawStrings | ScanComments | SkipComments
+	ScanIdents      = 1 << -Ident
+	ScanInts        = 1 << -Int
+	ScanFloats      = 1 << -Float // includes Ints and hexadecimal floats
+	ScanChars       = 1 << -Char
+	ScanStrings     = 1 << -String
+	ScanRawStrings  = 1 << -RawString
+	ScanComments    = 1 << -Comment
+	SkipComments    = 1 << -skipComment     // if set with ScanComments, comments become white space
+	AllowNumberbars = 1 << -allowNumberbars // if set, number literals may contain underbars as digit separators
+	GoTokens        = ScanIdents | ScanFloats | ScanChars | ScanStrings | ScanRawStrings | ScanComments | SkipComments | AllowNumberbars
 )
 
 // The result of Scan is one of these tokens or a Unicode character.
@@ -80,7 +81,10 @@
 	String
 	RawString
 	Comment
+
+	// internal use only
 	skipComment
+	allowNumberbars
 )
 
 var tokenString = map[rune]string{
@@ -359,7 +363,8 @@
 func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' }
 func isHex(ch rune) bool     { return '0' <= ch && ch <= '9' || 'a' <= lower(ch) && lower(ch) <= 'f' }
 
-// digits accepts the sequence { digit | '_' } starting with ch0.
+// digits accepts the sequence { digit } (if AllowNumberbars is not set)
+// or { digit | '_' } (if AllowNumberbars is set), starting with ch0.
 // If base <= 10, digits accepts any decimal digit but records
 // the first invalid digit >= base in *invalid if *invalid == 0.
 // digits returns the first rune that is not part of the sequence
@@ -369,7 +374,7 @@
 	ch = ch0
 	if base <= 10 {
 		max := rune('0' + base)
-		for isDecimal(ch) || ch == '_' {
+		for isDecimal(ch) || ch == '_' && s.Mode&AllowNumberbars != 0 {
 			ds := 1
 			if ch == '_' {
 				ds = 2
@@ -380,7 +385,7 @@
 			ch = s.next()
 		}
 	} else {
-		for isHex(ch) || ch == '_' {
+		for isHex(ch) || ch == '_' && s.Mode&AllowNumberbars != 0 {
 			ds := 1
 			if ch == '_' {
 				ds = 2
@@ -392,7 +397,7 @@
 	return
 }
 
-func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) {
+func (s *Scanner) scanNumber(ch rune, seenDot bool) (rune, rune) {
 	base := 10         // number base
 	prefix := rune(0)  // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b'
 	digsep := 0        // bit 0: digit present, bit 1: '_' present
@@ -401,7 +406,7 @@
 	// integer part
 	var tok rune
 	var ds int
-	if integerPart {
+	if !seenDot {
 		tok = Int
 		if ch == '0' {
 			ch = s.next()
@@ -422,17 +427,18 @@
 		}
 		ch, ds = s.digits(ch, base, &invalid)
 		digsep |= ds
+		if ch == '.' && s.Mode&ScanFloats != 0 {
+			ch = s.next()
+			seenDot = true
+		}
 	}
 
 	// fractional part
-	if !integerPart || ch == '.' {
+	if seenDot {
 		tok = Float
 		if prefix == 'o' || prefix == 'b' {
 			s.error("invalid radix point in " + litname(prefix))
 		}
-		if ch == '.' {
-			ch = s.next()
-		}
 		ch, ds = s.digits(ch, base, &invalid)
 		digsep |= ds
 	}
@@ -442,7 +448,7 @@
 	}
 
 	// exponent
-	if e := lower(ch); e == 'e' || e == 'p' {
+	if e := lower(ch); (e == 'e' || e == 'p') && s.Mode&ScanFloats != 0 {
 		switch {
 		case e == 'e' && prefix != 0 && prefix != '0':
 			s.errorf("%q exponent requires decimal mantissa", ch)
@@ -682,7 +688,7 @@
 		}
 	case isDecimal(ch):
 		if s.Mode&(ScanInts|ScanFloats) != 0 {
-			tok, ch = s.scanNumber(ch, true)
+			tok, ch = s.scanNumber(ch, false)
 		} else {
 			ch = s.next()
 		}
@@ -705,7 +711,7 @@
 		case '.':
 			ch = s.next()
 			if isDecimal(ch) && s.Mode&ScanFloats != 0 {
-				tok, ch = s.scanNumber(ch, false)
+				tok, ch = s.scanNumber(ch, true)
 			}
 		case '/':
 			ch = s.next()
diff --git a/src/text/scanner/scanner_test.go b/src/text/scanner/scanner_test.go
index 58db8e1..6ae8fd9 100644
--- a/src/text/scanner/scanner_test.go
+++ b/src/text/scanner/scanner_test.go
@@ -877,3 +877,40 @@
 		}
 	}
 }
+
+func TestIssue30320(t *testing.T) {
+	for _, test := range []struct {
+		in, want string
+		mode     uint
+	}{
+		{"foo01.bar31.xx-0-1-1-0", "01 31 0 1 1 0", ScanInts},
+		{"foo0/12/0/5.67", "0 12 0 5 67", ScanInts},
+		{"xxx1e0yyy", "1 0", ScanInts},
+		{"1_2", "1 2", ScanInts}, // don't consume _ as part of a number if not explicitly enabled
+		{"1_2", "1_2", ScanInts | AllowNumberbars},
+		{"xxx1.0yyy2e3ee", "1 0 2 3", ScanInts},
+		{"xxx1.0yyy2e3ee", "1.0 2e3", ScanFloats},
+	} {
+		got := extractInts(test.in, test.mode)
+		if got != test.want {
+			t.Errorf("%q: got %q; want %q", test.in, got, test.want)
+		}
+	}
+}
+
+func extractInts(t string, mode uint) (res string) {
+	var s Scanner
+	s.Init(strings.NewReader(t))
+	s.Mode = mode
+	for {
+		switch tok := s.Scan(); tok {
+		case Int, Float:
+			if len(res) > 0 {
+				res += " "
+			}
+			res += s.TokenText()
+		case EOF:
+			return
+		}
+	}
+}