internal/export/idna: Allow specifying CheckHyphens and CheckJoiners
This aligns with the options in the latest version of UTS 46, and in
particular allows implementing the WHATWG URL Standard.
Fixes golang/go#41732.
Change-Id: Iab577eff4303f3eea64512d07d968c891acf126f
Reviewed-on: https://go-review.googlesource.com/c/text/+/258837
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
Run-TryBot: Marcel van Lohuizen <mpvl@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Nigel Tao <nigeltao@golang.org>
diff --git a/internal/export/idna/idna10.0.0.go b/internal/export/idna/idna10.0.0.go
index 1244f9c..2ceb327 100644
--- a/internal/export/idna/idna10.0.0.go
+++ b/internal/export/idna/idna10.0.0.go
@@ -65,15 +65,14 @@
// VerifyDNSLength sets whether a Profile should fail if any of the IDN parts
// are longer than allowed by the RFC.
+//
+// This option corresponds to the VerifyDnsLength flag in UTS #46.
func VerifyDNSLength(verify bool) Option {
return func(o *options) { o.verifyDNSLength = verify }
}
// RemoveLeadingDots removes leading label separators. Leading runes that map to
// dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
-//
-// This is the behavior suggested by the UTS #46 and is adopted by some
-// browsers.
func RemoveLeadingDots(remove bool) Option {
return func(o *options) { o.removeLeadingDots = remove }
}
@@ -81,6 +80,8 @@
// ValidateLabels sets whether to check the mandatory label validation criteria
// as defined in Section 5.4 of RFC 5891. This includes testing for correct use
// of hyphens ('-'), normalization, validity of runes, and the context rules.
+// In particular, ValidateLabels also sets the CheckHyphens and CheckJoiners flags
+// in UTS #46.
func ValidateLabels(enable bool) Option {
return func(o *options) {
// Don't override existing mappings, but set one that at least checks
@@ -89,25 +90,48 @@
o.mapping = normalize
}
o.trie = trie
- o.validateLabels = enable
- o.fromPuny = validateFromPunycode
+ o.checkJoiners = enable
+ o.checkHyphens = enable
+ if enable {
+ o.fromPuny = validateFromPunycode
+ } else {
+ o.fromPuny = nil
+ }
+ }
+}
+
+// CheckHyphens sets whether to check for correct use of hyphens ('-') in
+// labels. Most web browsers do not have this option set, since labels such as
+// "r3---sn-apo3qvuoxuxbt-j5pe" are in common use.
+//
+// This option corresponds to the CheckHyphens flag in UTS #46.
+func CheckHyphens(enable bool) Option {
+ return func(o *options) { o.checkHyphens = enable }
+}
+
+// CheckJoiners sets whether to check the ContextJ rules as defined in Appendix
+// A of RFC 5892, concerning the use of joiner runes.
+//
+// This option corresponds to the CheckJoiners flag in UTS #46.
+func CheckJoiners(enable bool) Option {
+ return func(o *options) {
+ o.trie = trie
+ o.checkJoiners = enable
}
}
// StrictDomainName limits the set of permissible ASCII characters to those
// allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
-// hyphen). This is set by default for MapForLookup and ValidateForRegistration.
+// hyphen). This is set by default for MapForLookup and ValidateForRegistration,
+// but is only useful if ValidateLabels is set.
//
// This option is useful, for instance, for browsers that allow characters
// outside this range, for example a '_' (U+005F LOW LINE). See
-// http://www.rfc-editor.org/std/std3.txt for more details This option
-// corresponds to the UseSTD3ASCIIRules option in UTS #46.
+// http://www.rfc-editor.org/std/std3.txt for more details.
+//
+// This option corresponds to the UseSTD3ASCIIRules flag in UTS #46.
func StrictDomainName(use bool) Option {
- return func(o *options) {
- o.trie = trie
- o.useSTD3Rules = use
- o.fromPuny = validateFromPunycode
- }
+ return func(o *options) { o.useSTD3Rules = use }
}
// NOTE: the following options pull in tables. The tables should not be linked
@@ -115,6 +139,8 @@
// BidiRule enables the Bidi rule as defined in RFC 5893. Any application
// that relies on proper validation of labels should include this rule.
+//
+// This option corresponds to the CheckBidi flag in UTS #46.
func BidiRule() Option {
return func(o *options) { o.bidirule = bidirule.ValidString }
}
@@ -150,7 +176,8 @@
type options struct {
transitional bool
useSTD3Rules bool
- validateLabels bool
+ checkHyphens bool
+ checkJoiners bool
verifyDNSLength bool
removeLeadingDots bool
@@ -223,8 +250,11 @@
if p.useSTD3Rules {
s += ":UseSTD3Rules"
}
- if p.validateLabels {
- s += ":ValidateLabels"
+ if p.checkHyphens {
+ s += ":CheckHyphens"
+ }
+ if p.checkJoiners {
+ s += ":CheckJoiners"
}
if p.verifyDNSLength {
s += ":VerifyDNSLength"
@@ -252,26 +282,29 @@
punycode = &Profile{}
lookup = &Profile{options{
- transitional: true,
- useSTD3Rules: true,
- validateLabels: true,
- trie: trie,
- fromPuny: validateFromPunycode,
- mapping: validateAndMap,
- bidirule: bidirule.ValidString,
+ transitional: true,
+ useSTD3Rules: true,
+ checkHyphens: true,
+ checkJoiners: true,
+ trie: trie,
+ fromPuny: validateFromPunycode,
+ mapping: validateAndMap,
+ bidirule: bidirule.ValidString,
}}
display = &Profile{options{
- useSTD3Rules: true,
- validateLabels: true,
- trie: trie,
- fromPuny: validateFromPunycode,
- mapping: validateAndMap,
- bidirule: bidirule.ValidString,
+ useSTD3Rules: true,
+ checkHyphens: true,
+ checkJoiners: true,
+ trie: trie,
+ fromPuny: validateFromPunycode,
+ mapping: validateAndMap,
+ bidirule: bidirule.ValidString,
}}
registration = &Profile{options{
useSTD3Rules: true,
- validateLabels: true,
verifyDNSLength: true,
+ checkHyphens: true,
+ checkJoiners: true,
trie: trie,
fromPuny: validateFromPunycode,
mapping: validateRegistration,
@@ -338,7 +371,7 @@
}
isBidi = isBidi || bidirule.DirectionString(u) != bidi.LeftToRight
labels.set(u)
- if err == nil && p.validateLabels {
+ if err == nil && p.fromPuny != nil {
err = p.fromPuny(p, u)
}
if err == nil {
@@ -679,16 +712,18 @@
}
return nil
}
- if !p.validateLabels {
+ if p.checkHyphens {
+ if len(s) > 4 && s[2] == '-' && s[3] == '-' {
+ return &labelError{s, "V2"}
+ }
+ if s[0] == '-' || s[len(s)-1] == '-' {
+ return &labelError{s, "V3"}
+ }
+ }
+ if !p.checkJoiners {
return nil
}
- trie := p.trie // p.validateLabels is only set if trie is set.
- if len(s) > 4 && s[2] == '-' && s[3] == '-' {
- return &labelError{s, "V2"}
- }
- if s[0] == '-' || s[len(s)-1] == '-' {
- return &labelError{s, "V3"}
- }
+ trie := p.trie // p.checkJoiners is only set if trie is set.
// TODO: merge the use of this in the trie.
v, sz := trie.lookupString(s)
x := info(v)
diff --git a/internal/export/idna/idna10.0.0_test.go b/internal/export/idna/idna10.0.0_test.go
index ed01f93..66ea636 100644
--- a/internal/export/idna/idna10.0.0_test.go
+++ b/internal/export/idna/idna10.0.0_test.go
@@ -31,6 +31,8 @@
lengthA := kind{"CheckLengthA", p.ToASCII}
p = New(MapForLookup(), StrictDomainName(false))
std3 := kind{"STD3", p.ToASCII}
+ p = New(MapForLookup(), CheckHyphens(false))
+ hyphens := kind{"CheckHyphens", p.ToASCII}
testCases := []struct {
kind
@@ -85,6 +87,12 @@
{display, "*.foo.com", "*.foo.com", "P1"},
{std3, "*.foo.com", "*.foo.com", ""},
+ // Hyphens
+ {display, "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "V2"},
+ {hyphens, "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", ""},
+ {display, "-label-.com", "-label-.com", "V3"},
+ {hyphens, "-label-.com", "-label-.com", ""},
+
// Don't map U+2490 (DIGIT NINE FULL STOP). This is the behavior of
// Chrome, Safari, and IE. Firefox will first map ⒐ to 9. and return
// lab9.be.
diff --git a/internal/export/idna/idna9.0.0.go b/internal/export/idna/idna9.0.0.go
index 25f2ac3..1ea9431 100644
--- a/internal/export/idna/idna9.0.0.go
+++ b/internal/export/idna/idna9.0.0.go
@@ -64,15 +64,14 @@
// VerifyDNSLength sets whether a Profile should fail if any of the IDN parts
// are longer than allowed by the RFC.
+//
+// This option corresponds to the VerifyDnsLength flag in UTS #46.
func VerifyDNSLength(verify bool) Option {
return func(o *options) { o.verifyDNSLength = verify }
}
// RemoveLeadingDots removes leading label separators. Leading runes that map to
// dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
-//
-// This is the behavior suggested by the UTS #46 and is adopted by some
-// browsers.
func RemoveLeadingDots(remove bool) Option {
return func(o *options) { o.removeLeadingDots = remove }
}
@@ -80,6 +79,8 @@
// ValidateLabels sets whether to check the mandatory label validation criteria
// as defined in Section 5.4 of RFC 5891. This includes testing for correct use
// of hyphens ('-'), normalization, validity of runes, and the context rules.
+// In particular, ValidateLabels also sets the CheckHyphens and CheckJoiners flags
+// in UTS #46.
func ValidateLabels(enable bool) Option {
return func(o *options) {
// Don't override existing mappings, but set one that at least checks
@@ -88,25 +89,48 @@
o.mapping = normalize
}
o.trie = trie
- o.validateLabels = enable
- o.fromPuny = validateFromPunycode
+ o.checkJoiners = enable
+ o.checkHyphens = enable
+ if enable {
+ o.fromPuny = validateFromPunycode
+ } else {
+ o.fromPuny = nil
+ }
+ }
+}
+
+// CheckHyphens sets whether to check for correct use of hyphens ('-') in
+// labels. Most web browsers do not have this option set, since labels such as
+// "r3---sn-apo3qvuoxuxbt-j5pe" are in common use.
+//
+// This option corresponds to the CheckHyphens flag in UTS #46.
+func CheckHyphens(enable bool) Option {
+ return func(o *options) { o.checkHyphens = enable }
+}
+
+// CheckJoiners sets whether to check the ContextJ rules as defined in Appendix
+// A of RFC 5892, concerning the use of joiner runes.
+//
+// This option corresponds to the CheckJoiners flag in UTS #46.
+func CheckJoiners(enable bool) Option {
+ return func(o *options) {
+ o.trie = trie
+ o.checkJoiners = enable
}
}
// StrictDomainName limits the set of permissable ASCII characters to those
// allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
-// hyphen). This is set by default for MapForLookup and ValidateForRegistration.
+// hyphen). This is set by default for MapForLookup and ValidateForRegistration,
+// but is only useful if ValidateLabels is set.
//
// This option is useful, for instance, for browsers that allow characters
// outside this range, for example a '_' (U+005F LOW LINE). See
-// http://www.rfc-editor.org/std/std3.txt for more details This option
-// corresponds to the UseSTD3ASCIIRules option in UTS #46.
+// http://www.rfc-editor.org/std/std3.txt for more details.
+//
+// This option corresponds to the UseSTD3ASCIIRules flag in UTS #46.
func StrictDomainName(use bool) Option {
- return func(o *options) {
- o.trie = trie
- o.useSTD3Rules = use
- o.fromPuny = validateFromPunycode
- }
+ return func(o *options) { o.useSTD3Rules = use }
}
// NOTE: the following options pull in tables. The tables should not be linked
@@ -114,6 +138,8 @@
// BidiRule enables the Bidi rule as defined in RFC 5893. Any application
// that relies on proper validation of labels should include this rule.
+//
+// This option corresponds to the CheckBidi flag in UTS #46.
func BidiRule() Option {
return func(o *options) { o.bidirule = bidirule.ValidString }
}
@@ -150,7 +176,8 @@
type options struct {
transitional bool
useSTD3Rules bool
- validateLabels bool
+ checkHyphens bool
+ checkJoiners bool
verifyDNSLength bool
removeLeadingDots bool
@@ -223,8 +250,11 @@
if p.useSTD3Rules {
s += ":UseSTD3Rules"
}
- if p.validateLabels {
- s += ":ValidateLabels"
+ if p.checkHyphens {
+ s += ":CheckHyphens"
+ }
+ if p.checkJoiners {
+ s += ":CheckJoiners"
}
if p.verifyDNSLength {
s += ":VerifyDNSLength"
@@ -253,9 +283,10 @@
punycode = &Profile{}
lookup = &Profile{options{
transitional: true,
- useSTD3Rules: true,
- validateLabels: true,
removeLeadingDots: true,
+ useSTD3Rules: true,
+ checkHyphens: true,
+ checkJoiners: true,
trie: trie,
fromPuny: validateFromPunycode,
mapping: validateAndMap,
@@ -263,8 +294,9 @@
}}
display = &Profile{options{
useSTD3Rules: true,
- validateLabels: true,
removeLeadingDots: true,
+ checkHyphens: true,
+ checkJoiners: true,
trie: trie,
fromPuny: validateFromPunycode,
mapping: validateAndMap,
@@ -272,8 +304,9 @@
}}
registration = &Profile{options{
useSTD3Rules: true,
- validateLabels: true,
verifyDNSLength: true,
+ checkHyphens: true,
+ checkJoiners: true,
trie: trie,
fromPuny: validateFromPunycode,
mapping: validateRegistration,
@@ -337,7 +370,7 @@
continue
}
labels.set(u)
- if err == nil && p.validateLabels {
+ if err == nil && p.fromPuny != nil {
err = p.fromPuny(p, u)
}
if err == nil {
@@ -627,16 +660,18 @@
if p.bidirule != nil && !p.bidirule(s) {
return &labelError{s, "B"}
}
- if !p.validateLabels {
+ if p.checkHyphens {
+ if len(s) > 4 && s[2] == '-' && s[3] == '-' {
+ return &labelError{s, "V2"}
+ }
+ if s[0] == '-' || s[len(s)-1] == '-' {
+ return &labelError{s, "V3"}
+ }
+ }
+ if !p.checkJoiners {
return nil
}
- trie := p.trie // p.validateLabels is only set if trie is set.
- if len(s) > 4 && s[2] == '-' && s[3] == '-' {
- return &labelError{s, "V2"}
- }
- if s[0] == '-' || s[len(s)-1] == '-' {
- return &labelError{s, "V3"}
- }
+ trie := p.trie // p.checkJoiners is only set if trie is set.
// TODO: merge the use of this in the trie.
v, sz := trie.lookupString(s)
x := info(v)
diff --git a/internal/export/idna/idna9.0.0_test.go b/internal/export/idna/idna9.0.0_test.go
index 7047d74..03b1267 100644
--- a/internal/export/idna/idna9.0.0_test.go
+++ b/internal/export/idna/idna9.0.0_test.go
@@ -31,6 +31,8 @@
lengthA := kind{"CheckLengthA", p.ToASCII}
p = New(MapForLookup(), StrictDomainName(false))
std3 := kind{"STD3", p.ToASCII}
+ p = New(MapForLookup(), CheckHyphens(false))
+ hyphens := kind{"CheckHyphens", p.ToASCII}
testCases := []struct {
kind
@@ -81,6 +83,12 @@
{display, "*.foo.com", "*.foo.com", "P1"},
{std3, "*.foo.com", "*.foo.com", ""},
+ // Hyphens
+ {display, "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "V2"},
+ {hyphens, "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", "r3---sn-apo3qvuoxuxbt-j5pe.googlevideo.com", ""},
+ {display, "-label-.com", "-label-.com", "V3"},
+ {hyphens, "-label-.com", "-label-.com", ""},
+
// Don't map U+2490 (DIGIT NINE FULL STOP). This is the behavior of
// Chrome, Safari, and IE. Firefox will first map ⒐ to 9. and return
// lab9.be.