| // Copyright 2015 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // +build !go1.10 |
| |
| package precis |
| |
| import ( |
| "strings" |
| |
| "golang.org/x/text/runes" |
| "golang.org/x/text/secure/bidirule" |
| ) |
| |
| var enforceTestCases = []struct { |
| name string |
| p *Profile |
| cases []testCase |
| }{ |
| {"Basic", NewFreeform(), []testCase{ |
| {"e\u0301\u031f", "\u00e9\u031f", nil}, // normalize |
| }}, |
| |
| {"Context Rule 1", NewFreeform(), []testCase{ |
| // Rule 1: zero-width non-joiner (U+200C) |
| // From RFC: |
| // False |
| // If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True; |
| // If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C |
| // (Joining_Type:T)*(Joining_Type:{R,D})) Then True; |
| // |
| // Example runes for different joining types: |
| // Join L: U+A872; PHAGS-PA SUPERFIXED LETTER RA |
| // Join D: U+062C; HAH WITH DOT BELOW |
| // Join T: U+0610; ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM |
| // Join R: U+0627; ALEF |
| // Virama: U+0A4D; GURMUKHI SIGN VIRAMA |
| // Virama and Join T: U+0ACD; GUJARATI SIGN VIRAMA |
| {"\u200c", "", errContext}, |
| {"\u200ca", "", errContext}, |
| {"a\u200c", "", errContext}, |
| {"\u200c\u0627", "", errContext}, // missing JoinStart |
| {"\u062c\u200c", "", errContext}, // missing JoinEnd |
| {"\u0610\u200c\u0610\u0627", "", errContext}, // missing JoinStart |
| {"\u062c\u0610\u200c\u0610", "", errContext}, // missing JoinEnd |
| |
| // Variants of: D T* U+200c T* R |
| {"\u062c\u200c\u0627", "\u062c\u200c\u0627", nil}, |
| {"\u062c\u0610\u200c\u0610\u0627", "\u062c\u0610\u200c\u0610\u0627", nil}, |
| {"\u062c\u0610\u0610\u200c\u0610\u0610\u0627", "\u062c\u0610\u0610\u200c\u0610\u0610\u0627", nil}, |
| {"\u062c\u0610\u200c\u0627", "\u062c\u0610\u200c\u0627", nil}, |
| {"\u062c\u200c\u0610\u0627", "\u062c\u200c\u0610\u0627", nil}, |
| |
| // Variants of: L T* U+200c T* D |
| {"\ua872\u200c\u062c", "\ua872\u200c\u062c", nil}, |
| {"\ua872\u0610\u200c\u0610\u062c", "\ua872\u0610\u200c\u0610\u062c", nil}, |
| {"\ua872\u0610\u0610\u200c\u0610\u0610\u062c", "\ua872\u0610\u0610\u200c\u0610\u0610\u062c", nil}, |
| {"\ua872\u0610\u200c\u062c", "\ua872\u0610\u200c\u062c", nil}, |
| {"\ua872\u200c\u0610\u062c", "\ua872\u200c\u0610\u062c", nil}, |
| |
| // Virama |
| {"\u0a4d\u200c", "\u0a4d\u200c", nil}, |
| {"\ua872\u0a4d\u200c", "\ua872\u0a4d\u200c", nil}, |
| {"\ua872\u0a4d\u0610\u200c", "", errContext}, |
| {"\ua872\u0a4d\u0610\u200c", "", errContext}, |
| |
| {"\u0acd\u200c", "\u0acd\u200c", nil}, |
| {"\ua872\u0acd\u200c", "\ua872\u0acd\u200c", nil}, |
| {"\ua872\u0acd\u0610\u200c", "", errContext}, |
| {"\ua872\u0acd\u0610\u200c", "", errContext}, |
| |
| // Using Virama as join T |
| {"\ua872\u0acd\u200c\u062c", "\ua872\u0acd\u200c\u062c", nil}, |
| {"\ua872\u200c\u0acd\u062c", "\ua872\u200c\u0acd\u062c", nil}, |
| }}, |
| |
| {"Context Rule 2", NewFreeform(), []testCase{ |
| // Rule 2: zero-width joiner (U+200D) |
| {"\u200d", "", errContext}, |
| {"\u200da", "", errContext}, |
| {"a\u200d", "", errContext}, |
| |
| {"\u0a4d\u200d", "\u0a4d\u200d", nil}, |
| {"\ua872\u0a4d\u200d", "\ua872\u0a4d\u200d", nil}, |
| {"\u0a4da\u200d", "", errContext}, |
| }}, |
| |
| {"Context Rule 3", NewFreeform(), []testCase{ |
| // Rule 3: middle dot |
| {"·", "", errContext}, |
| {"l·", "", errContext}, |
| {"·l", "", errContext}, |
| {"a·", "", errContext}, |
| {"l·a", "", errContext}, |
| {"a·a", "", errContext}, |
| {"l·l", "l·l", nil}, |
| {"al·la", "al·la", nil}, |
| }}, |
| |
| {"Context Rule 4", NewFreeform(), []testCase{ |
| // Rule 4: Greek lower numeral U+0375 |
| {"͵", "", errContext}, |
| {"͵a", "", errContext}, |
| {"α͵", "", errContext}, |
| {"͵α", "͵α", nil}, |
| {"α͵α", "α͵α", nil}, |
| {"͵͵α", "͵͵α", nil}, // The numeric sign is itself Greek. |
| {"α͵͵α", "α͵͵α", nil}, |
| {"α͵͵", "", errContext}, |
| {"α͵͵a", "", errContext}, |
| }}, |
| |
| {"Context Rule 5+6", NewFreeform(), []testCase{ |
| // Rule 5+6: Hebrew preceding |
| // U+05f3: Geresh |
| {"׳", "", errContext}, |
| {"׳ה", "", errContext}, |
| {"a׳b", "", errContext}, |
| {"ש׳", "ש׳", nil}, // U+05e9 U+05f3 |
| {"ש׳׳׳", "ש׳׳׳", nil}, // U+05e9 U+05f3 |
| |
| // U+05f4: Gershayim |
| {"״", "", errContext}, |
| {"״ה", "", errContext}, |
| {"a״b", "", errContext}, |
| {"ש״", "ש״", nil}, // U+05e9 U+05f4 |
| {"ש״״״", "ש״״״", nil}, // U+05e9 U+05f4 |
| {"aש״״״", "aש״״״", nil}, // U+05e9 U+05f4 |
| }}, |
| |
| {"Context Rule 7", NewFreeform(), []testCase{ |
| // Rule 7: Katakana middle Dot |
| {"・", "", errContext}, |
| {"abc・", "", errContext}, |
| {"・def", "", errContext}, |
| {"abc・def", "", errContext}, |
| {"aヅc・def", "aヅc・def", nil}, |
| {"abc・dぶf", "abc・dぶf", nil}, |
| {"⺐bc・def", "⺐bc・def", nil}, |
| }}, |
| |
| {"Context Rule 8+9", NewFreeform(), []testCase{ |
| // Rule 8+9: Arabic Indic Digit |
| {"١٢٣٤٥۶", "", errContext}, |
| {"۱۲۳۴۵٦", "", errContext}, |
| {"١٢٣٤٥", "١٢٣٤٥", nil}, |
| {"۱۲۳۴۵", "۱۲۳۴۵", nil}, |
| }}, |
| |
| {"Nickname", Nickname, []testCase{ |
| {" Swan of Avon ", "Swan of Avon", nil}, |
| {"", "", errEmptyString}, |
| {" ", "", errEmptyString}, |
| {" ", "", errEmptyString}, |
| {"a\u00A0a\u1680a\u2000a\u2001a\u2002a\u2003a\u2004a\u2005a\u2006a\u2007a\u2008a\u2009a\u200Aa\u202Fa\u205Fa\u3000a", "a a a a a a a a a a a a a a a a a", nil}, |
| {"Foo", "Foo", nil}, |
| {"foo", "foo", nil}, |
| {"Foo Bar", "Foo Bar", nil}, |
| {"foo bar", "foo bar", nil}, |
| {"\u03A3", "\u03A3", nil}, |
| {"\u03C3", "\u03C3", nil}, |
| // Greek final sigma is left as is (do not fold!) |
| {"\u03C2", "\u03C2", nil}, |
| {"\u265A", "♚", nil}, |
| {"Richard \u2163", "Richard IV", nil}, |
| {"\u212B", "Å", nil}, |
| {"\uFB00", "ff", nil}, // because of NFKC |
| {"שa", "שa", nil}, // no bidi rule |
| {"동일조건변경허락", "동일조건변경허락", nil}, |
| }}, |
| {"OpaqueString", OpaqueString, []testCase{ |
| {" Swan of Avon ", " Swan of Avon ", nil}, |
| {"", "", errEmptyString}, |
| {" ", " ", nil}, |
| {" ", " ", nil}, |
| {"a\u00A0a\u1680a\u2000a\u2001a\u2002a\u2003a\u2004a\u2005a\u2006a\u2007a\u2008a\u2009a\u200Aa\u202Fa\u205Fa\u3000a", "a a a a a a a a a a a a a a a a a", nil}, |
| {"Foo", "Foo", nil}, |
| {"foo", "foo", nil}, |
| {"Foo Bar", "Foo Bar", nil}, |
| {"foo bar", "foo bar", nil}, |
| {"\u03C3", "\u03C3", nil}, |
| {"Richard \u2163", "Richard \u2163", nil}, |
| {"\u212B", "Å", nil}, |
| {"Jack of \u2666s", "Jack of \u2666s", nil}, |
| {"my cat is a \u0009by", "", errDisallowedRune}, |
| {"שa", "שa", nil}, // no bidi rule |
| }}, |
| {"UsernameCaseMapped", UsernameCaseMapped, []testCase{ |
| // TODO: Should this work? |
| // {UsernameCaseMapped, "", "", errDisallowedRune}, |
| {"juliet@example.com", "juliet@example.com", nil}, |
| {"fussball", "fussball", nil}, |
| {"fu\u00DFball", "fu\u00DFball", nil}, |
| {"\u03C0", "\u03C0", nil}, |
| {"\u03A3", "\u03C3", nil}, |
| {"\u03C3", "\u03C3", nil}, |
| // Greek final sigma is left as is (do not fold!) |
| {"\u03C2", "\u03C2", nil}, |
| {"\u0049", "\u0069", nil}, |
| {"\u0049", "\u0069", nil}, |
| {"\u03D2", "", errDisallowedRune}, |
| {"\u03B0", "\u03B0", nil}, |
| {"foo bar", "", errDisallowedRune}, |
| {"♚", "", errDisallowedRune}, |
| {"\u007E", "~", nil}, |
| {"a", "a", nil}, |
| {"!", "!", nil}, |
| {"²", "", errDisallowedRune}, |
| {"\t", "", errDisallowedRune}, |
| {"\n", "", errDisallowedRune}, |
| {"\u26D6", "", errDisallowedRune}, |
| {"\u26FF", "", errDisallowedRune}, |
| {"\uFB00", "", errDisallowedRune}, |
| {"\u1680", "", errDisallowedRune}, |
| {" ", "", errDisallowedRune}, |
| {" ", "", errDisallowedRune}, |
| {"\u01C5", "", errDisallowedRune}, |
| {"\u16EE", "", errDisallowedRune}, // Nl RUNIC ARLAUG SYMBOL |
| {"\u0488", "", errDisallowedRune}, // Me COMBINING CYRILLIC HUNDRED THOUSANDS SIGN |
| {"\u212B", "\u00e5", nil}, // Angstrom sign, NFC -> U+00E5 |
| {"A\u030A", "å", nil}, // A + ring |
| {"\u00C5", "å", nil}, // A with ring |
| {"\u00E7", "ç", nil}, // c cedille |
| {"\u0063\u0327", "ç", nil}, // c + cedille |
| {"\u0158", "ř", nil}, |
| {"\u0052\u030C", "ř", nil}, |
| |
| {"\u1E61", "\u1E61", nil}, // LATIN SMALL LETTER S WITH DOT ABOVE |
| |
| // Confusable characters ARE allowed and should NOT be mapped. |
| {"\u0410", "\u0430", nil}, // CYRILLIC CAPITAL LETTER A |
| |
| // Full width should be mapped to the canonical decomposition. |
| {"AB", "ab", nil}, |
| {"שc", "", bidirule.ErrInvalid}, // bidi rule |
| |
| }}, |
| {"UsernameCasePreserved", UsernameCasePreserved, []testCase{ |
| {"ABC", "ABC", nil}, |
| {"AB", "AB", nil}, |
| {"שc", "", bidirule.ErrInvalid}, // bidi rule |
| {"\uFB00", "", errDisallowedRune}, |
| {"\u212B", "\u00c5", nil}, // Angstrom sign, NFC -> U+00E5 |
| {"ẛ", "", errDisallowedRune}, // LATIN SMALL LETTER LONG S WITH DOT ABOVE |
| }}, |
| {"UsernameCaseMappedRestricted", NewRestrictedProfile(UsernameCaseMapped, runes.Predicate(func(r rune) bool { |
| return strings.ContainsRune(`@`, r) |
| })), []testCase{ |
| {"juliet@example.com", "", errDisallowedRune}, |
| {"\u0049", "\u0069", nil}, |
| }}, |
| } |