modfile: remove trailing newline from comment tokens

In v0.2.0, the go.mod lexer removed trailing LF bytes from comment
tokens. This regressed in v0.3.0. Documentation on Comment.Token says
the trailing newline should not be included.

This CL fixes the lexer to strip trailing newlines again. It will now
strip both LF and CRLF newlines. It also includes a test to ensure
comments are attached at the right place in the syntax tree with the
right content.

Fixes golang/go#39913

Change-Id: I7fba0ed3c85f0a3c23fefc6b7fecfe6df7777aea
Reviewed-on: https://go-review.googlesource.com/c/mod/+/240557
Run-TryBot: Jay Conrod <jayconrod@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Michael Matloob <matloob@golang.org>
diff --git a/modfile/read.go b/modfile/read.go
index c1f2008..2a961ca 100644
--- a/modfile/read.go
+++ b/modfile/read.go
@@ -477,9 +477,17 @@
 
 // endToken marks the end of an input token.
 // It records the actual token string in tok.text.
+// A single trailing newline (LF or CRLF) will be removed from comment tokens.
 func (in *input) endToken(kind tokenKind) {
 	in.token.kind = kind
 	text := string(in.tokenStart[:len(in.tokenStart)-len(in.remaining)])
+	if kind.isComment() {
+		if strings.HasSuffix(text, "\r\n") {
+			text = text[:len(text)-2]
+		} else {
+			text = strings.TrimSuffix(text, "\n")
+		}
+	}
 	in.token.text = text
 	in.token.endPos = in.pos
 }
diff --git a/modfile/read_test.go b/modfile/read_test.go
index f64d319..7065e91 100644
--- a/modfile/read_test.go
+++ b/modfile/read_test.go
@@ -445,3 +445,120 @@
 		})
 	}
 }
+
+func TestComments(t *testing.T) {
+	for _, test := range []struct {
+		desc, input, want string
+	}{
+		{
+			desc: "comment_only",
+			input: `
+// a
+// b
+`,
+			want: `
+comments before "// a"
+comments before "// b"
+`,
+		}, {
+			desc: "line",
+			input: `
+// a
+
+// b
+module m // c
+// d
+
+// e
+`,
+			want: `
+comments before "// a"
+line before "// b"
+line suffix "// c"
+comments before "// d"
+comments before "// e"
+`,
+		}, {
+			desc: "block",
+			input: `
+// a
+
+// b
+block ( // c
+	// d
+
+	// e
+	x // f
+	// g
+
+	// h
+) // i
+// j
+
+// k
+`,
+			want: `
+comments before "// a"
+block before "// b"
+lparen suffix "// c"
+blockline before "// d"
+blockline before ""
+blockline before "// e"
+blockline suffix "// f"
+rparen before "// g"
+rparen before ""
+rparen before "// h"
+rparen suffix "// i"
+comments before "// j"
+comments before "// k"
+`,
+		}, {
+			desc:  "cr_removed",
+			input: "// a\r\r\n",
+			want:  `comments before "// a\r"`,
+		},
+	} {
+		t.Run(test.desc, func(t *testing.T) {
+			f, err := ParseLax("go.mod", []byte(test.input), nil)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			buf := &bytes.Buffer{}
+			printComments := func(prefix string, cs *Comments) {
+				for _, c := range cs.Before {
+					fmt.Fprintf(buf, "%s before %q\n", prefix, c.Token)
+				}
+				for _, c := range cs.Suffix {
+					fmt.Fprintf(buf, "%s suffix %q\n", prefix, c.Token)
+				}
+				for _, c := range cs.After {
+					fmt.Fprintf(buf, "%s after %q\n", prefix, c.Token)
+				}
+			}
+
+			printComments("file", &f.Syntax.Comments)
+			for _, stmt := range f.Syntax.Stmt {
+				switch stmt := stmt.(type) {
+				case *CommentBlock:
+					printComments("comments", stmt.Comment())
+				case *Line:
+					printComments("line", stmt.Comment())
+				case *LineBlock:
+					printComments("block", stmt.Comment())
+					printComments("lparen", stmt.LParen.Comment())
+					for _, line := range stmt.Line {
+						printComments("blockline", line.Comment())
+					}
+					printComments("rparen", stmt.RParen.Comment())
+				}
+			}
+
+			got := strings.TrimSpace(buf.String())
+			want := strings.TrimSpace(test.want)
+			if got != want {
+				t.Errorf("got:\n%s\nwant:\n%s", got, want)
+			}
+		})
+	}
+}