go/scanner: don't eat \r in comments if that shortens the comment For consistent formatting across platforms we strip \r's from comments. This happens in the go/scanner which already does this for raw string literals where it is mandated by the spec. But if a (sequence of) \r appears in a regular (/*-style) comment between a * and a /, removing that (sequence of) \r shortens that text segment to */ which terminates the comment prematurely. Don't do it. As an aside, a better approach would be to not touch comments, (and raw string literals for that matter) in the scanner and leave the extra processing to clients. That is the approach taken by the cmd/compile/internal/syntax package. However, we probably can't change the semantics here too much, so just do the minimal change that doesn't produce invalid comments. It's an esoteric case after all. Fixes #11151. Change-Id: Ib4dcb52094f13c235e840c9672e439ea65fef961 Reviewed-on: https://go-review.googlesource.com/87498 Reviewed-by: Matthew Dempsky <mdempsky@google.com>

commit: ea006a8513508061e64ad43d77b3c4983129b50d [log] [tgz]
author: Robert Griesemer <gri@golang.org> Thu Jan 11 21:52:27 2018 -0800
committer: Robert Griesemer <gri@golang.org> Mon Feb 12 21:43:22 2018 +0000
tree: 98d4a6e9c7ecf8b0285d8fd985b0bd13a72cbe9c
parent: c5f3a8b10797258cf527601a44bfdfa63d5ef1a7 [diff]
diff --git a/src/go/printer/printer_test.go b/src/go/printer/printer_test.go
index 5984d2c..e06604a 100644
--- a/src/go/printer/printer_test.go
+++ b/src/go/printer/printer_test.go

@@ -710,3 +710,26 @@
 		t.Errorf("got %q, want %q", buf.String(), bar)
 	}
 }
+
+func TestIssue11151(t *testing.T) {
+	const src = "package p\t/*\r/1\r*\r/2*\r\r\r\r/3*\r\r+\r\r/4*/\n"
+	fset := token.NewFileSet()
+	f, err := parser.ParseFile(fset, "", src, parser.ParseComments)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	var buf bytes.Buffer
+	Fprint(&buf, fset, f)
+	got := buf.String()
+	const want = "package p\t/*/1*\r/2*\r/3*+/4*/\n" // \r following opening /* should be stripped
+	if got != want {
+		t.Errorf("\ngot : %q\nwant: %q", got, want)
+	}
+
+	// the resulting program must be valid
+	_, err = parser.ParseFile(fset, "", got, 0)
+	if err != nil {
+		t.Errorf("%v\norig: %q\ngot : %q", err, src, got)
+	}
+}

diff --git a/src/go/scanner/scanner.go b/src/go/scanner/scanner.go
index a86e4eb..5e0a4a4 100644
--- a/src/go/scanner/scanner.go
+++ b/src/go/scanner/scanner.go

@@ -204,7 +204,7 @@
 exit:
 	lit := s.src[offs:s.offset]
 	if hasCR {
-		lit = stripCR(lit)
+		lit = stripCR(lit, lit[1] == '*')
 	}
 
 	return string(lit)
@@ -480,11 +480,16 @@
 	return string(s.src[offs:s.offset])
 }
 
-func stripCR(b []byte) []byte {
+func stripCR(b []byte, comment bool) []byte {
 	c := make([]byte, len(b))
 	i := 0
-	for _, ch := range b {
-		if ch != '\r' {
+	for j, ch := range b {
+		// In a /*-style comment, don't strip \r from *\r/ (incl.
+		// sequences of \r from *\r\r...\r/) since the resulting
+		// */ would terminate the comment too early unless the \r
+		// is immediately following the opening /* in which case
+		// it's ok because /*/ is not closed yet (issue #11151).
+		if ch != '\r' || comment && i > len("/*") && c[i-1] == '*' && j+1 < len(b) && b[j+1] == '/' {
 			c[i] = ch
 			i++
 		}
@@ -514,7 +519,7 @@
 
 	lit := s.src[offs:s.offset]
 	if hasCR {
-		lit = stripCR(lit)
+		lit = stripCR(lit, false)
 	}
 
 	return string(lit)

diff --git a/src/go/scanner/scanner_test.go b/src/go/scanner/scanner_test.go
index ff41c03..f70d932 100644
--- a/src/go/scanner/scanner_test.go
+++ b/src/go/scanner/scanner_test.go

@@ -45,6 +45,8 @@
 	{token.COMMENT, "/* a comment */", special},
 	{token.COMMENT, "// a comment \n", special},
 	{token.COMMENT, "/*\r*/", special},
+	{token.COMMENT, "/**\r/*/", special}, // issue 11151
+	{token.COMMENT, "/**\r\r/*/", special},
 	{token.COMMENT, "//\r\n", special},
 
 	// Identifiers and basic type literals
@@ -270,7 +272,7 @@
 		switch e.tok {
 		case token.COMMENT:
 			// no CRs in comments
-			elit = string(stripCR([]byte(e.lit)))
+			elit = string(stripCR([]byte(e.lit), e.lit[1] == '*'))
 			//-style comment literal doesn't contain newline
 			if elit[1] == '/' {
 				elit = elit[0 : len(elit)-1]
@@ -284,7 +286,7 @@
 				// no CRs in raw string literals
 				elit = e.lit
 				if elit[0] == '`' {
-					elit = string(stripCR([]byte(elit)))
+					elit = string(stripCR([]byte(elit), false))
 				}
 			} else if e.tok.IsKeyword() {
 				elit = e.lit
@@ -309,6 +311,26 @@
 	}
 }
 
+func TestStripCR(t *testing.T) {
+	for _, test := range []struct{ have, want string }{
+		{"//\n", "//\n"},
+		{"//\r\n", "//\n"},
+		{"//\r\r\r\n", "//\n"},
+		{"//\r*\r/\r\n", "//*/\n"},
+		{"/**/", "/**/"},
+		{"/*\r/*/", "/*/*/"},
+		{"/*\r*/", "/**/"},
+		{"/**\r/*/", "/**\r/*/"},
+		{"/*\r/\r*\r/*/", "/*/*\r/*/"},
+		{"/*\r\r\r\r*/", "/**/"},
+	} {
+		got := string(stripCR([]byte(test.have), len(test.have) >= 2 && test.have[1] == '*'))
+		if got != test.want {
+			t.Errorf("stripCR(%q) = %q; want %q", test.have, got, test.want)
+		}
+	}
+}
+
 func checkSemi(t *testing.T, line string, mode Mode) {
 	var S Scanner
 	file := fset.AddFile("TestSemis", fset.Base(), len(line))
commit	ea006a8513508061e64ad43d77b3c4983129b50d	[log] [tgz]
author	Robert Griesemer <gri@golang.org>	Thu Jan 11 21:52:27 2018 -0800
committer	Robert Griesemer <gri@golang.org>	Mon Feb 12 21:43:22 2018 +0000
tree	98d4a6e9c7ecf8b0285d8fd985b0bd13a72cbe9c
parent	c5f3a8b10797258cf527601a44bfdfa63d5ef1a7 [diff]