html: properly handle exclamation marks in comments
Properly handle the case where HTML comments begin with exclamation
marks and have no other content, i.e. "<!--!-->". Previously these
comments would cause the tokenizer to consider everything following to
also be considered part of the comment.
Fixes golang/go#37771
Change-Id: I78ea310debc3846f145d62cba017055abc7fa4e0
Reviewed-on: https://go-review.googlesource.com/c/net/+/442496
Run-TryBot: Roland Shoemaker <roland@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Damien Neil <dneil@google.com>
diff --git a/html/token.go b/html/token.go
index be3c754..ae24a6f 100644
--- a/html/token.go
+++ b/html/token.go
@@ -605,7 +605,10 @@
z.data.end = z.data.start
}
}()
- for dashCount := 2; ; {
+
+ var dashCount int
+ beginning := true
+ for {
c := z.readByte()
if z.err != nil {
// Ignore up to two dashes at EOF.
@@ -620,7 +623,7 @@
dashCount++
continue
case '>':
- if dashCount >= 2 {
+ if dashCount >= 2 || beginning {
z.data.end = z.raw.end - len("-->")
return
}
@@ -638,6 +641,7 @@
}
}
dashCount = 0
+ beginning = false
}
}
diff --git a/html/token_test.go b/html/token_test.go
index ee33caf..0b9a947 100644
--- a/html/token_test.go
+++ b/html/token_test.go
@@ -366,6 +366,16 @@
"a<!--x--!>z",
"a$<!--x-->$z",
},
+ {
+ "comment14",
+ "a<!--!-->z",
+ "a$<!--!-->$z",
+ },
+ {
+ "comment15",
+ "a<!-- !-->z",
+ "a$<!-- !-->$z",
+ },
// An attribute with a backslash.
{
"backslash",
@@ -456,26 +466,27 @@
}
func TestTokenizer(t *testing.T) {
-loop:
for _, tt := range tokenTests {
- z := NewTokenizer(strings.NewReader(tt.html))
- if tt.golden != "" {
- for i, s := range strings.Split(tt.golden, "$") {
- if z.Next() == ErrorToken {
- t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Err())
- continue loop
- }
- actual := z.Token().String()
- if s != actual {
- t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual)
- continue loop
+ t.Run(tt.desc, func(t *testing.T) {
+ z := NewTokenizer(strings.NewReader(tt.html))
+ if tt.golden != "" {
+ for i, s := range strings.Split(tt.golden, "$") {
+ if z.Next() == ErrorToken {
+ t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Err())
+ return
+ }
+ actual := z.Token().String()
+ if s != actual {
+ t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual)
+ return
+ }
}
}
- }
- z.Next()
- if z.Err() != io.EOF {
- t.Errorf("%s: want EOF got %q", tt.desc, z.Err())
- }
+ z.Next()
+ if z.Err() != io.EOF {
+ t.Errorf("%s: want EOF got %q", tt.desc, z.Err())
+ }
+ })
}
}