cmd/compile/internal/syntax: permit /*line file:line:col*/ directives

R=go1.11

This implements parsing of /*line file:line*/ and /*line file:line:col*/
directives and also extends the optional column format to regular //line
directives, per #22662.

For a line directive to be recognized, its comment text must start with
the prefix "line " which is followed by one of the following:

:line
:line:col
filename:line
filename:line:col

with at least one : present. The line and col values must be unsigned
decimal integers; everything before is considered part of the filename.

Valid line directives are:

//line :123
//line :123:8
//line foo.go:123
//line C:foo.go:123	(filename is "C:foo.go")
//line C:foo.go:123:8	(filename is "C:foo.go")
/*line ::123*/		(filename is ":")

No matter the comment format, at the moment all directives act as if
they were in //line comments, and column information is ignored.
To be addressed in subsequent CLs.

For #22662.

Change-Id: I1a2dc54bacc94bc6cdedc5229ee13278971f314e
Reviewed-on: https://go-review.googlesource.com/86037
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
diff --git a/src/cmd/compile/internal/syntax/parser.go b/src/cmd/compile/internal/syntax/parser.go
index c8dfc96..6b52950 100644
--- a/src/cmd/compile/internal/syntax/parser.go
+++ b/src/cmd/compile/internal/syntax/parser.go
@@ -47,8 +47,9 @@
 			p.error_at(p.pos_at(line, col), msg)
 		},
 		func(line, col uint, text string) {
-			if strings.HasPrefix(text, "line ") {
-				p.updateBase(line, col+5, text[5:])
+			const prefix = "line "
+			if strings.HasPrefix(text, prefix) {
+				p.updateBase(line, col+uint(len(prefix)), text[len(prefix):])
 				return
 			}
 			if pragh != nil {
@@ -69,23 +70,54 @@
 const lineMax = 1<<24 - 1 // TODO(gri) this limit is defined for src.Pos - fix
 
 func (p *parser) updateBase(line, col uint, text string) {
-	// Want to use LastIndexByte below but it's not defined in Go1.4 and bootstrap fails.
-	i := strings.LastIndex(text, ":") // look from right (Windows filenames may contain ':')
-	if i < 0 {
+	i, n, ok := trailingDigits(text)
+	if i == 0 {
 		return // ignore (not a line directive)
 	}
-	nstr := text[i+1:]
-	n, err := strconv.Atoi(nstr)
-	if err != nil || n <= 0 || n > lineMax {
-		p.error_at(p.pos_at(line, col+uint(i+1)), "invalid line number: "+nstr)
+	// i > 0
+
+	if !ok {
+		// text has a suffix :xxx but xxx is not a number
+		p.error_at(p.pos_at(line, col+i), "invalid line number: "+text[i:])
 		return
 	}
-	filename := text[:i]
+
+	i2, n2, ok2 := trailingDigits(text[:i-1])
+	if ok2 {
+		//line filename:line:col
+		i, i2 = i2, i
+		n, n2 = n2, n
+		if n2 == 0 {
+			p.error_at(p.pos_at(line, col+i2), "invalid column number: "+text[i2:])
+			return
+		}
+		text = text[:i2-1] // lop off :col
+	}
+
+	if n == 0 || n > lineMax {
+		p.error_at(p.pos_at(line, col+i), "invalid line number: "+text[i:])
+		return
+	}
+
+	filename := text[:i-1] // lop off :line
 	absFilename := filename
 	if p.fileh != nil {
 		absFilename = p.fileh(filename)
 	}
-	p.base = src.NewLinePragmaBase(src.MakePos(p.base.Pos().Base(), line, col), filename, absFilename, uint(n))
+
+	// TODO(gri) pass column n2 to NewLinePragmaBase
+	p.base = src.NewLinePragmaBase(src.MakePos(p.base.Pos().Base(), line, col), filename, absFilename, uint(n) /*uint(n2)*/)
+}
+
+func trailingDigits(text string) (uint, uint, bool) {
+	// Want to use LastIndexByte below but it's not defined in Go1.4 and bootstrap fails.
+	i := strings.LastIndex(text, ":") // look from right (Windows filenames may contain ':')
+	if i < 0 {
+		return 0, 0, false // no ":"
+	}
+	// i >= 0
+	n, err := strconv.ParseUint(text[i+1:], 10, 0)
+	return uint(i + 1), uint(n), err == nil
 }
 
 func (p *parser) got(tok token) bool {
diff --git a/src/cmd/compile/internal/syntax/parser_test.go b/src/cmd/compile/internal/syntax/parser_test.go
index 684a842..592163b 100644
--- a/src/cmd/compile/internal/syntax/parser_test.go
+++ b/src/cmd/compile/internal/syntax/parser_test.go
@@ -182,25 +182,99 @@
 }
 
 func TestLineDirectives(t *testing.T) {
+	// valid line directives lead to a syntax error after them
+	const valid = "syntax error: package statement must be first"
+
 	for _, test := range []struct {
 		src, msg  string
 		filename  string
 		line, col uint // 0-based
 	}{
-		// test validity of //line directive
-		{`//line :`, "invalid line number: ", "", 0, 8},
-		{`//line :x`, "invalid line number: x", "", 0, 8},
-		{`//line foo :`, "invalid line number: ", "", 0, 12},
-		{`//line foo:123abc`, "invalid line number: 123abc", "", 0, 11},
-		{`/**///line foo:x`, "syntax error: package statement must be first", "", 0, 16}, //line directive not at start of line - ignored
-		{`//line foo:0`, "invalid line number: 0", "", 0, 11},
-		{fmt.Sprintf(`//line foo:%d`, lineMax+1), fmt.Sprintf("invalid line number: %d", lineMax+1), "", 0, 11},
+		// ignored //line directives
+		{"//\n", valid, "", 2 - linebase, 0},            // no directive
+		{"//line\n", valid, "", 2 - linebase, 0},        // missing colon
+		{"//line foo\n", valid, "", 2 - linebase, 0},    // missing colon
+		{"  //line foo:\n", valid, "", 2 - linebase, 0}, // not a line start
+		{"//  line foo:\n", valid, "", 2 - linebase, 0}, // space between // and line
 
-		// test effect of //line directive on (relative) position information
-		{"//line foo:123\n   foo", "syntax error: package statement must be first", "foo", 123 - linebase, 3},
-		{"//line foo:123\n//line bar:345\nfoo", "syntax error: package statement must be first", "bar", 345 - linebase, 0},
+		// invalid //line directives with one colon
+		{"//line :\n", "invalid line number: ", "", 0, 8},
+		{"//line :x\n", "invalid line number: x", "", 0, 8},
+		{"//line foo :\n", "invalid line number: ", "", 0, 12},
+		{"//line foo:x\n", "invalid line number: x", "", 0, 11},
+		{"//line foo:0\n", "invalid line number: 0", "", 0, 11},
+		{"//line foo:1 \n", "invalid line number: 1 ", "", 0, 11},
+		{"//line foo:-12\n", "invalid line number: -12", "", 0, 11},
+		{"//line C:foo:0\n", "invalid line number: 0", "", 0, 13},
+		{fmt.Sprintf("//line foo:%d\n", lineMax+1), fmt.Sprintf("invalid line number: %d", lineMax+1), "", 0, 11},
 
-		{"//line " + runtime.GOROOT() + "/src/a/a.go:123\n   foo", "syntax error: package statement must be first", "$GOROOT/src/a/a.go", 123 - linebase, 3},
+		// invalid //line directives with two colons
+		{"//line ::\n", "invalid line number: ", "", 0, 9},
+		{"//line ::x\n", "invalid line number: x", "", 0, 9},
+		{"//line foo::123abc\n", "invalid line number: 123abc", "", 0, 12},
+		{"//line foo::0\n", "invalid line number: 0", "", 0, 12},
+		{"//line foo:0:1\n", "invalid line number: 0", "", 0, 11},
+
+		{"//line :123:0\n", "invalid column number: 0", "", 0, 12},
+		{"//line foo:123:0\n", "invalid column number: 0", "", 0, 15},
+
+		// effect of valid //line directives on positions
+		{"//line foo:123\n   foo", valid, "foo", 123 - linebase, 3},
+		{"//line  foo:123\n   foo", valid, " foo", 123 - linebase, 3},
+		{"//line foo:123\n//line bar:345\nfoo", valid, "bar", 345 - linebase, 0},
+		{"//line C:foo:123\n", valid, "C:foo", 123 - linebase, 0},
+		{"//line " + runtime.GOROOT() + "/src/a/a.go:123\n   foo", valid, "$GOROOT/src/a/a.go", 123 - linebase, 3},
+		{"//line :x:1\n", valid, ":x", 0, 0},
+		{"//line foo ::1\n", valid, "foo :", 0, 0},
+		{"//line foo:123abc:1\n", valid, "foo:123abc", 0, 0},
+		{"//line foo :123:1\n", valid, "foo ", 123 - linebase, 0},
+		{"//line ::123\n", valid, ":", 123 - linebase, 0},
+
+		// TODO(gri) add tests to verify correct column changes, once implemented
+
+		// ignored /*line directives
+		{"/**/", valid, "", 1 - linebase, 4},             // no directive
+		{"/*line*/", valid, "", 1 - linebase, 8},         // missing colon
+		{"/*line foo*/", valid, "", 1 - linebase, 12},    // missing colon
+		{"  //line foo:*/", valid, "", 1 - linebase, 15}, // not a line start
+		{"/*  line foo:*/", valid, "", 1 - linebase, 15}, // space between // and line
+
+		// invalid /*line directives with one colon
+		{"/*line :*/", "invalid line number: ", "", 0, 8},
+		{"/*line :x*/", "invalid line number: x", "", 0, 8},
+		{"/*line foo :*/", "invalid line number: ", "", 0, 12},
+		{"/*line foo:x*/", "invalid line number: x", "", 0, 11},
+		{"/*line foo:0*/", "invalid line number: 0", "", 0, 11},
+		{"/*line foo:1 */", "invalid line number: 1 ", "", 0, 11},
+		{"/*line C:foo:0*/", "invalid line number: 0", "", 0, 13},
+		{fmt.Sprintf("/*line foo:%d*/", lineMax+1), fmt.Sprintf("invalid line number: %d", lineMax+1), "", 0, 11},
+
+		// invalid /*line directives with two colons
+		{"/*line ::*/", "invalid line number: ", "", 0, 9},
+		{"/*line ::x*/", "invalid line number: x", "", 0, 9},
+		{"/*line foo::123abc*/", "invalid line number: 123abc", "", 0, 12},
+		{"/*line foo::0*/", "invalid line number: 0", "", 0, 12},
+		{"/*line foo:0:1*/", "invalid line number: 0", "", 0, 11},
+
+		{"/*line :123:0*/", "invalid column number: 0", "", 0, 12},
+		{"/*line foo:123:0*/", "invalid column number: 0", "", 0, 15},
+
+		// effect of valid /*line directives on positions
+		// TODO(gri) remove \n after directives once line number is computed correctly
+		{"/*line foo:123*/\n   foo", valid, "foo", 123 - linebase, 3},
+		{"/*line foo:123*/\n//line bar:345\nfoo", valid, "bar", 345 - linebase, 0},
+		{"/*line C:foo:123*/\n", valid, "C:foo", 123 - linebase, 0},
+		{"/*line " + runtime.GOROOT() + "/src/a/a.go:123*/\n   foo", valid, "$GOROOT/src/a/a.go", 123 - linebase, 3},
+		{"/*line :x:1*/\n", valid, ":x", 1 - linebase, 0},
+		{"/*line foo ::1*/\n", valid, "foo :", 1 - linebase, 0},
+		{"/*line foo:123abc:1*/\n", valid, "foo:123abc", 1 - linebase, 0},
+		{"/*line foo :123:1*/\n", valid, "foo ", 123 - linebase, 0},
+		{"/*line ::123*/\n", valid, ":", 123 - linebase, 0},
+
+		// test effect of /*line directive on (relative) position information for this line
+		// TODO(gri) add these tests
+
+		// TODO(gri) add tests to verify correct column changes, once implemented
 	} {
 		fileh := func(name string) string {
 			if strings.HasPrefix(name, runtime.GOROOT()) {
@@ -224,11 +298,11 @@
 		if filename := perr.Pos.AbsFilename(); filename != test.filename {
 			t.Errorf("%s: got filename = %q; want %q", test.src, filename, test.filename)
 		}
-		if line := perr.Pos.RelLine(); line != test.line+linebase {
-			t.Errorf("%s: got line = %d; want %d", test.src, line, test.line+linebase)
+		if line := perr.Pos.RelLine(); line-linebase != test.line {
+			t.Errorf("%s: got line = %d; want %d", test.src, line-linebase, test.line)
 		}
-		if col := perr.Pos.Col(); col != test.col+colbase {
-			t.Errorf("%s: got col = %d; want %d", test.src, col, test.col+colbase)
+		if col := perr.Pos.Col(); col-colbase != test.col {
+			t.Errorf("%s: got col = %d; want %d", test.src, col-colbase, test.col)
 		}
 	}
 }
diff --git a/src/cmd/compile/internal/syntax/scanner.go b/src/cmd/compile/internal/syntax/scanner.go
index 05391e5..be406d9 100644
--- a/src/cmd/compile/internal/syntax/scanner.go
+++ b/src/cmd/compile/internal/syntax/scanner.go
@@ -577,6 +577,7 @@
 
 func (s *scanner) lineComment() {
 	r := s.getr()
+
 	// directives must start at the beginning of the line (s.col == colbase)
 	if s.col != colbase || s.pragh == nil || (r != 'g' && r != 'l') {
 		s.skipLine(r)
@@ -608,20 +609,47 @@
 	s.pragh(s.line, s.col+2, prefix+string(text)) // +2 since directive text starts after //
 }
 
-func (s *scanner) fullComment() {
-	for {
-		r := s.getr()
+func (s *scanner) skipComment(r rune) {
+	for r >= 0 {
 		for r == '*' {
 			r = s.getr()
 			if r == '/' {
 				return
 			}
 		}
-		if r < 0 {
-			s.errh(s.line, s.col, "comment not terminated")
+		r = s.getr()
+	}
+	s.errh(s.line, s.col, "comment not terminated")
+}
+
+func (s *scanner) fullComment() {
+	r := s.getr()
+
+	if s.pragh == nil || r != 'l' {
+		s.skipComment(r)
+		return
+	}
+	// s.pragh != nil && r == 'l'
+
+	// recognize line directive
+	const prefix = "line "
+	for _, m := range prefix {
+		if r != m {
+			s.skipComment(r)
 			return
 		}
+		r = s.getr()
 	}
+
+	// directive text without comment ending
+	s.startLit()
+	s.skipComment(r)
+	text := s.stopLit()
+	if i := len(text) - 2; i >= 0 && text[i] == '*' && text[i+1] == '/' {
+		text = text[:i]
+	}
+
+	s.pragh(s.line, s.col+2, prefix+string(text)) // +2 since directive text starts after /*
 }
 
 func (s *scanner) escape(quote rune) bool {
diff --git a/src/cmd/compile/internal/syntax/scanner_test.go b/src/cmd/compile/internal/syntax/scanner_test.go
index ba4ba8f..160bcbe 100644
--- a/src/cmd/compile/internal/syntax/scanner_test.go
+++ b/src/cmd/compile/internal/syntax/scanner_test.go
@@ -45,10 +45,10 @@
 	// make source
 	var buf bytes.Buffer
 	for i, s := range sampleTokens {
-		buf.WriteString("\t\t\t\t"[:i&3])     // leading indentation
-		buf.WriteString(s.src)                // token
-		buf.WriteString("        "[:i&7])     // trailing spaces
-		buf.WriteString("/* foo */ // bar\n") // comments
+		buf.WriteString("\t\t\t\t"[:i&3])           // leading indentation
+		buf.WriteString(s.src)                      // token
+		buf.WriteString("        "[:i&7])           // trailing spaces
+		buf.WriteString("/*line foo:1 */ // bar\n") // comments (don't crash w/o directive handler)
 	}
 
 	// scan source
@@ -314,7 +314,6 @@
 		{"`foo", "string not terminated", 0, 0},
 		{"/*/", "comment not terminated", 0, 0},
 		{"/*\n\nfoo", "comment not terminated", 0, 0},
-		{"/*\n\nfoo", "comment not terminated", 0, 0},
 		{`"\`, "string not terminated", 0, 0},
 		{`"\"`, "string not terminated", 0, 0},
 		{`"\x`, "string not terminated", 0, 0},