Andrew Gerrand | 46c4a49 | 2013-07-10 17:32:24 +1000 | [diff] [blame] | 1 | // Copyright 2013 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package html |
| 6 | |
| 7 | import "testing" |
| 8 | |
| 9 | type unescapeTest struct { |
| 10 | // A short description of the test case. |
| 11 | desc string |
| 12 | // The HTML text. |
| 13 | html string |
| 14 | // The unescaped text. |
| 15 | unescaped string |
| 16 | } |
| 17 | |
| 18 | var unescapeTests = []unescapeTest{ |
| 19 | // Handle no entities. |
| 20 | { |
| 21 | "copy", |
| 22 | "A\ttext\nstring", |
| 23 | "A\ttext\nstring", |
| 24 | }, |
| 25 | // Handle simple named entities. |
| 26 | { |
| 27 | "simple", |
| 28 | "& > <", |
| 29 | "& > <", |
| 30 | }, |
| 31 | // Handle hitting the end of the string. |
| 32 | { |
| 33 | "stringEnd", |
| 34 | "& &", |
| 35 | "& &", |
| 36 | }, |
| 37 | // Handle entities with two codepoints. |
| 38 | { |
| 39 | "multiCodepoint", |
| 40 | "text ⋛︀ blah", |
| 41 | "text \u22db\ufe00 blah", |
| 42 | }, |
| 43 | // Handle decimal numeric entities. |
| 44 | { |
| 45 | "decimalEntity", |
| 46 | "Delta = Δ ", |
| 47 | "Delta = Δ ", |
| 48 | }, |
| 49 | // Handle hexadecimal numeric entities. |
| 50 | { |
| 51 | "hexadecimalEntity", |
| 52 | "Lambda = λ = λ ", |
| 53 | "Lambda = λ = λ ", |
| 54 | }, |
| 55 | // Handle numeric early termination. |
| 56 | { |
| 57 | "numericEnds", |
| 58 | "&# &#x €43 © = ©f = ©", |
| 59 | "&# &#x €43 © = ©f = ©", |
| 60 | }, |
| 61 | // Handle numeric ISO-8859-1 entity replacements. |
| 62 | { |
| 63 | "numericReplacements", |
| 64 | "Footnote‡", |
| 65 | "Footnote‡", |
| 66 | }, |
| 67 | } |
| 68 | |
| 69 | func TestUnescape(t *testing.T) { |
| 70 | for _, tt := range unescapeTests { |
| 71 | unescaped := UnescapeString(tt.html) |
| 72 | if unescaped != tt.unescaped { |
| 73 | t.Errorf("TestUnescape %s: want %q, got %q", tt.desc, tt.unescaped, unescaped) |
| 74 | } |
| 75 | } |
| 76 | } |
| 77 | |
| 78 | func TestUnescapeEscape(t *testing.T) { |
| 79 | ss := []string{ |
| 80 | ``, |
| 81 | `abc def`, |
| 82 | `a & b`, |
| 83 | `a&b`, |
| 84 | `a & b`, |
| 85 | `"`, |
| 86 | `"`, |
| 87 | `"<&>"`, |
| 88 | `"<&>"`, |
| 89 | `3&5==1 && 0<1, "0<1", a+acute=á`, |
| 90 | `The special characters are: <, >, &, ' and "`, |
| 91 | } |
| 92 | for _, s := range ss { |
| 93 | if got := UnescapeString(EscapeString(s)); got != s { |
| 94 | t.Errorf("got %q want %q", got, s) |
| 95 | } |
| 96 | } |
| 97 | } |