unicode: add the first few property tests for printing.
The long-term goal is that %q will use IsPrint to decide
what to show natively vs. as hexadecimal.
R=rsc, r
CC=golang-dev
https://golang.org/cl/4526095
diff --git a/src/pkg/unicode/Makefile b/src/pkg/unicode/Makefile
index 26e6e50..55ed5b2 100644
--- a/src/pkg/unicode/Makefile
+++ b/src/pkg/unicode/Makefile
@@ -8,6 +8,7 @@
GOFILES=\
casetables.go\
digit.go\
+ graphic.go\
letter.go\
tables.go\
diff --git a/src/pkg/unicode/digit.go b/src/pkg/unicode/digit.go
index 471c4df..c0866ea 100644
--- a/src/pkg/unicode/digit.go
+++ b/src/pkg/unicode/digit.go
@@ -6,7 +6,7 @@
// IsDigit reports whether the rune is a decimal digit.
func IsDigit(rune int) bool {
- if rune < 0x100 { // quick ASCII (Latin-1, really) check
+ if rune < Latin1Max {
return '0' <= rune && rune <= '9'
}
return Is(Digit, rune)
diff --git a/src/pkg/unicode/digit_test.go b/src/pkg/unicode/digit_test.go
index 9bbccde..3cafbb1 100644
--- a/src/pkg/unicode/digit_test.go
+++ b/src/pkg/unicode/digit_test.go
@@ -118,7 +118,7 @@
// Test that the special case in IsDigit agrees with the table
func TestDigitOptimization(t *testing.T) {
- for i := 0; i < 0x100; i++ {
+ for i := 0; i < Latin1Max; i++ {
if Is(Digit, i) != IsDigit(i) {
t.Errorf("IsDigit(U+%04X) disagrees with Is(Digit)", i)
}
diff --git a/src/pkg/unicode/graphic.go b/src/pkg/unicode/graphic.go
new file mode 100644
index 0000000..3feda7b
--- /dev/null
+++ b/src/pkg/unicode/graphic.go
@@ -0,0 +1,132 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unicode
+
+// Bit masks for each code point under U+0100, for fast lookup.
+const (
+ pC = 1 << iota // a control character.
+ pP // a punctuation character.
+ pN // a numeral.
+ pS // a symbolic character.
+ pZ // a spacing character.
+ pLu // an upper-case letter.
+ pLl // a lower-case letter.
+ pp // a printable character according to Go's definition.
+ pg = pp | pZ // a graphical character according to the Unicode definition.
+)
+
+// GraphicRanges defines the set of graphic characters according to Unicode.
+var GraphicRanges = []*RangeTable{
+ L, M, N, P, S, Zs,
+}
+
+// PrintRanges defines the set of printable characters according to Go.
+// ASCII space, U+0020, is handled separately.
+var PrintRanges = []*RangeTable{
+ L, M, N, P, S,
+}
+
+// IsGraphic reports whether the rune is defined as a Graphic by Unicode.
+// Such characters include letters, marks, numbers, punctuation, symbols, and
+// spaces, from categories L, M, N, P, S, Zs.
+func IsGraphic(rune int) bool {
+ // We cast to uint32 to avoid the extra test for negative,
+ // and in the index we cast to uint8 to avoid the range check.
+ if uint32(rune) < Latin1Max {
+ return properties[uint8(rune)]&pg != 0
+ }
+ return IsOneOf(GraphicRanges, rune)
+}
+
+// IsPrint reports whether the rune is defined as printable by Go. Such
+// characters include letters, marks, numbers, punctuation, symbols, and the
+// ASCII space character, from categories L, M, N, P, S and the ASCII space
+// character. This categorization is the same as IsGraphic except that the
+// only spacing character is ASCII space, U+0020.
+func IsPrint(rune int) bool {
+ if uint32(rune) < Latin1Max {
+ return properties[uint8(rune)]&pp != 0
+ }
+ return IsOneOf(PrintRanges, rune)
+}
+
+// IsOneOf reports whether the rune is a member of one of the ranges.
+// The rune is known to be above Latin-1.
+func IsOneOf(set []*RangeTable, rune int) bool {
+ for _, inside := range set {
+ if Is(inside, rune) {
+ return true
+ }
+ }
+ return false
+}
+
+// IsControl reports whether the rune is a control character.
+// The C (Other) Unicode category includes more code points
+// such as surrogates; use Is(C, rune) to test for them.
+func IsControl(rune int) bool {
+ if uint32(rune) < Latin1Max {
+ return properties[uint8(rune)]&pC != 0
+ }
+ // All control characters are < Latin1Max.
+ return false
+}
+
+// IsLetter reports whether the rune is a letter (category L).
+func IsLetter(rune int) bool {
+ if uint32(rune) < Latin1Max {
+ return properties[uint8(rune)]&(pLu|pLl) != 0
+ }
+ return Is(Letter, rune)
+}
+
+// IsMark reports whether the rune is a mark character (category M).
+func IsMark(rune int) bool {
+ // There are no mark characters in Latin-1.
+ return Is(Mark, rune)
+}
+
+// IsNumber reports whether the rune is a number (category N).
+func IsNumber(rune int) bool {
+ if uint32(rune) < Latin1Max {
+ return properties[uint8(rune)]&pN != 0
+ }
+ return Is(Number, rune)
+}
+
+// IsPunct reports whether the rune is a Unicode punctuation character
+// (category P).
+func IsPunct(rune int) bool {
+ if uint32(rune) < Latin1Max {
+ return properties[uint8(rune)]&pP != 0
+ }
+ return Is(Punct, rune)
+}
+
+// IsSpace reports whether the rune is a space character as defined
+// by Unicode's White Space property; in the Latin-1 space
+// this is
+// '\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP).
+// Other definitions of spacing characters are set by category
+// Z and property Pattern_White_Space.
+func IsSpace(rune int) bool {
+ // This property isn't the same as Z; special-case it.
+ if uint32(rune) < Latin1Max {
+ switch rune {
+ case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
+ return true
+ }
+ return false
+ }
+ return Is(White_Space, rune)
+}
+
+// IsSymbol reports whether the rune is a symbolic character.
+func IsSymbol(rune int) bool {
+ if uint32(rune) < Latin1Max {
+ return properties[uint8(rune)]&pS != 0
+ }
+ return Is(Symbol, rune)
+}
diff --git a/src/pkg/unicode/graphic_test.go b/src/pkg/unicode/graphic_test.go
new file mode 100644
index 0000000..b15b974
--- /dev/null
+++ b/src/pkg/unicode/graphic_test.go
@@ -0,0 +1,122 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unicode_test
+
+import (
+ "testing"
+ . "unicode"
+)
+
+// Independently check that the special "Is" functions work
+// in the Latin-1 range through the property table.
+
+func TestIsControlLatin1(t *testing.T) {
+ for i := 0; i < Latin1Max; i++ {
+ got := IsControl(i)
+ want := false
+ switch {
+ case 0x00 <= i && i <= 0x1F:
+ want = true
+ case 0x7F <= i && i <= 0x9F:
+ want = true
+ }
+ if got != want {
+ t.Errorf("%U incorrect: got %t; want %t", i, got, want)
+ }
+ }
+}
+
+func TestIsLetterLatin1(t *testing.T) {
+ for i := 0; i < Latin1Max; i++ {
+ got := IsLetter(i)
+ want := Is(Letter, i)
+ if got != want {
+ t.Errorf("%U incorrect: got %t; want %t", i, got, want)
+ }
+ }
+}
+
+func TestIsUpperLatin1(t *testing.T) {
+ for i := 0; i < Latin1Max; i++ {
+ got := IsUpper(i)
+ want := Is(Upper, i)
+ if got != want {
+ t.Errorf("%U incorrect: got %t; want %t", i, got, want)
+ }
+ }
+}
+
+func TestIsLowerLatin1(t *testing.T) {
+ for i := 0; i < Latin1Max; i++ {
+ got := IsLower(i)
+ want := Is(Lower, i)
+ if got != want {
+ t.Errorf("%U incorrect: got %t; want %t", i, got, want)
+ }
+ }
+}
+
+func TestNumberLatin1(t *testing.T) {
+ for i := 0; i < Latin1Max; i++ {
+ got := IsNumber(i)
+ want := Is(Number, i)
+ if got != want {
+ t.Errorf("%U incorrect: got %t; want %t", i, got, want)
+ }
+ }
+}
+
+func TestIsPrintLatin1(t *testing.T) {
+ for i := 0; i < Latin1Max; i++ {
+ got := IsPrint(i)
+ want := IsOneOf(PrintRanges, i)
+ if i == ' ' {
+ want = true
+ }
+ if got != want {
+ t.Errorf("%U incorrect: got %t; want %t", i, got, want)
+ }
+ }
+}
+
+func TestIsGraphicLatin1(t *testing.T) {
+ for i := 0; i < Latin1Max; i++ {
+ got := IsGraphic(i)
+ want := IsOneOf(GraphicRanges, i)
+ if got != want {
+ t.Errorf("%U incorrect: got %t; want %t", i, got, want)
+ }
+ }
+}
+
+func TestIsPunctLatin1(t *testing.T) {
+ for i := 0; i < Latin1Max; i++ {
+ got := IsPunct(i)
+ want := Is(Punct, i)
+ if got != want {
+ t.Errorf("%U incorrect: got %t; want %t", i, got, want)
+ }
+ }
+}
+
+func TestIsSpaceLatin1(t *testing.T) {
+ for i := 0; i < Latin1Max; i++ {
+ got := IsSpace(i)
+ want := Is(White_Space, i)
+ if got != want {
+ t.Errorf("%U incorrect: got %t; want %t", i, got, want)
+ }
+ }
+}
+
+func TestIsSymbolLatin1(t *testing.T) {
+ for i := 0; i < Latin1Max; i++ {
+ got := IsSymbol(i)
+ want := Is(Symbol, i)
+ if got != want {
+ t.Errorf("%U incorrect: got %t; want %t", i, got, want)
+ }
+ }
+}
diff --git a/src/pkg/unicode/letter.go b/src/pkg/unicode/letter.go
index 047bef1..54df777 100644
--- a/src/pkg/unicode/letter.go
+++ b/src/pkg/unicode/letter.go
@@ -9,6 +9,8 @@
const (
MaxRune = 0x10FFFF // Maximum valid Unicode code point.
ReplacementChar = 0xFFFD // Represents invalid code points.
+ ASCIIMax = 0x80 // (1 beyond) maximum ASCII value.
+ Latin1Max = 0x100 // (1 beyond) maximum Latin-1 value.
)
// RangeTable defines a set of Unicode code points by listing the ranges of
@@ -121,7 +123,7 @@
// Is tests whether rune is in the specified table of ranges.
func Is(rangeTab *RangeTable, rune int) bool {
// common case: rune is ASCII or Latin-1.
- if rune < 0x100 {
+ if uint32(rune) < Latin1Max {
// Only need to check R16, since R32 is always >= 1<<16.
r16 := uint16(rune)
for _, r := range rangeTab.R16 {
@@ -148,49 +150,30 @@
// IsUpper reports whether the rune is an upper case letter.
func IsUpper(rune int) bool {
- if rune < 0x80 { // quick ASCII check
- return 'A' <= rune && rune <= 'Z'
+ // See comment in IsGraphic.
+ if uint32(rune) < Latin1Max {
+ return properties[uint8(rune)]&pLu != 0
}
return Is(Upper, rune)
}
// IsLower reports whether the rune is a lower case letter.
func IsLower(rune int) bool {
- if rune < 0x80 { // quick ASCII check
- return 'a' <= rune && rune <= 'z'
+ // See comment in IsGraphic.
+ if uint32(rune) < Latin1Max {
+ return properties[uint8(rune)]&pLl != 0
}
return Is(Lower, rune)
}
// IsTitle reports whether the rune is a title case letter.
func IsTitle(rune int) bool {
- if rune < 0x80 { // quick ASCII check
+ if rune < Latin1Max {
return false
}
return Is(Title, rune)
}
-// IsLetter reports whether the rune is a letter.
-func IsLetter(rune int) bool {
- if rune < 0x80 { // quick ASCII check
- rune &^= 'a' - 'A'
- return 'A' <= rune && rune <= 'Z'
- }
- return Is(Letter, rune)
-}
-
-// IsSpace reports whether the rune is a white space character.
-func IsSpace(rune int) bool {
- if rune <= 0xFF { // quick Latin-1 check
- switch rune {
- case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0:
- return true
- }
- return false
- }
- return Is(White_Space, rune)
-}
-
// to maps the rune using the specified case mapping.
func to(_case int, rune int, caseRange []CaseRange) int {
if _case < 0 || MaxCase <= _case {
@@ -235,7 +218,7 @@
// ToUpper maps the rune to upper case.
func ToUpper(rune int) int {
- if rune < 0x80 { // quick ASCII check
+ if rune < ASCIIMax {
if 'a' <= rune && rune <= 'z' {
rune -= 'a' - 'A'
}
@@ -246,7 +229,7 @@
// ToLower maps the rune to lower case.
func ToLower(rune int) int {
- if rune < 0x80 { // quick ASCII check
+ if rune < ASCIIMax {
if 'A' <= rune && rune <= 'Z' {
rune += 'a' - 'A'
}
@@ -257,7 +240,7 @@
// ToTitle maps the rune to title case.
func ToTitle(rune int) int {
- if rune < 0x80 { // quick ASCII check
+ if rune < ASCIIMax {
if 'a' <= rune && rune <= 'z' { // title case is upper case for ASCII
rune -= 'a' - 'A'
}
diff --git a/src/pkg/unicode/letter_test.go b/src/pkg/unicode/letter_test.go
index 432ffb6..989f9cf 100644
--- a/src/pkg/unicode/letter_test.go
+++ b/src/pkg/unicode/letter_test.go
@@ -323,7 +323,7 @@
// Check that the optimizations for IsLetter etc. agree with the tables.
// We only need to check the Latin-1 range.
func TestLetterOptimizations(t *testing.T) {
- for i := 0; i < 0x100; i++ {
+ for i := 0; i < Latin1Max; i++ {
if Is(Letter, i) != IsLetter(i) {
t.Errorf("IsLetter(U+%04X) disagrees with Is(Letter)", i)
}
diff --git a/src/pkg/unicode/maketables.go b/src/pkg/unicode/maketables.go
index c3cf32b..dd6da41 100644
--- a/src/pkg/unicode/maketables.go
+++ b/src/pkg/unicode/maketables.go
@@ -28,6 +28,7 @@
printScriptOrProperty(false)
printScriptOrProperty(true)
printCases()
+ printLatinProperties()
printSizes()
}
@@ -54,7 +55,17 @@
var scriptRe = regexp.MustCompile(`^([0-9A-F]+)(\.\.[0-9A-F]+)? *; ([A-Za-z_]+)$`)
var logger = log.New(os.Stderr, "", log.Lshortfile)
-var category = map[string]bool{"letter": true} // Nd Lu etc. letter is a special case
+var category = map[string]bool{
+ // Nd Lu etc.
+ // We use one-character names to identify merged categories
+ "L": true, // Lu Ll Lt Lm Lo
+ "P": true, // Pc Pd Ps Pe Pu Pf Po
+ "M": true, // Mn Mc Me
+ "N": true, // Nd Nl No
+ "S": true, // Sm Sc Sk So
+ "Z": true, // Zs Zl Zp
+ "C": true, // Cc Cf Cs Co Cn
+}
// UnicodeData.txt has form:
// 0037;DIGIT SEVEN;Nd;0;EN;;7;7;7;N;;;;;
@@ -247,12 +258,9 @@
return "Unknown"
}
-func letterOp(code int) bool {
- switch chars[code].category {
- case "Lu", "Ll", "Lt", "Lm", "Lo":
- return true
- }
- return false
+func categoryOp(code int, class uint8) bool {
+ category := chars[code].category
+ return len(category) > 0 && category[0] == class
}
func loadChars() {
@@ -348,8 +356,27 @@
// Cases deserving special comments
varDecl := ""
switch name {
- case "letter":
- varDecl = "\tLetter = letter; // Letter is the set of Unicode letters.\n"
+ case "C":
+ varDecl = "\tOther = _C; // Other/C is the set of Unicode control and special characters, category C.\n"
+ varDecl += "\tC = _C\n"
+ case "L":
+ varDecl = "\tLetter = _L; // Letter/L is the set of Unicode letters, category L.\n"
+ varDecl += "\tL = _L\n"
+ case "M":
+ varDecl = "\tMark = _M; // Mark/M is the set of Unicode mark characters, category M.\n"
+ varDecl += "\tM = _M\n"
+ case "N":
+ varDecl = "\tNumber = _N; // Number/N is the set of Unicode number characters, category N.\n"
+ varDecl += "\tN = _N\n"
+ case "P":
+ varDecl = "\tPunct = _P; // Punct/P is the set of Unicode punctuation characters, category P.\n"
+ varDecl += "\tP = _P\n"
+ case "S":
+ varDecl = "\tSymbol = _S; // Symbol/S is the set of Unicode symbol characters, category S.\n"
+ varDecl += "\tS = _S\n"
+ case "Z":
+ varDecl = "\tSpace = _Z; // Space/Z is the set of Unicode space characters, category Z.\n"
+ varDecl += "\tZ = _Z\n"
case "Nd":
varDecl = "\tDigit = _Nd; // Digit is the set of Unicode characters with the \"decimal digit\" property.\n"
case "Lu":
@@ -359,17 +386,18 @@
case "Lt":
varDecl = "\tTitle = _Lt; // Title is the set of Unicode title case letters.\n"
}
- if name != "letter" {
+ if len(name) > 1 {
varDecl += fmt.Sprintf(
"\t%s = _%s; // %s is the set of Unicode characters in category %s.\n",
name, name, name, name)
}
decl[ndecl] = varDecl
ndecl++
- if name == "letter" { // special case
+ if len(name) == 1 { // unified categories
+ decl := fmt.Sprintf("var _%s = &RangeTable{\n", name)
dumpRange(
- "var letter = &RangeTable{\n",
- letterOp)
+ decl,
+ func(code int) bool { return categoryOp(code, name[0]) })
continue
}
dumpRange(
@@ -446,7 +474,7 @@
if size == 16 && hi >= 1<<16 {
if lo < 1<<16 {
if lo+stride != hi {
- log.Fatalf("unexpected straddle: %U %U %d", lo, hi, stride)
+ logger.Fatalf("unexpected straddle: %U %U %d", lo, hi, stride)
}
// No range contains U+FFFF as an instance, so split
// the range into two entries. That way we can maintain
@@ -472,11 +500,11 @@
logger.Fatal("unknown category", name)
}
r, ok := unicode.Categories[name]
- if !ok {
- logger.Fatal("unknown table", name)
+ if !ok && len(name) > 1 {
+ logger.Fatalf("unknown table %q", name)
}
- if name == "letter" {
- verifyRange(name, letterOp, r)
+ if len(name) == 1 {
+ verifyRange(name, func(code int) bool { return categoryOp(code, name[0]) }, r)
} else {
verifyRange(
name,
@@ -487,11 +515,16 @@
}
func verifyRange(name string, inCategory Op, table *unicode.RangeTable) {
+ count := 0
for i := range chars {
web := inCategory(i)
pkg := unicode.Is(table, i)
if web != pkg {
fmt.Fprintf(os.Stderr, "%s: %U: web=%t pkg=%t\n", name, i, web, pkg)
+ count++
+ if count > 10 {
+ break
+ }
}
}
}
@@ -882,6 +915,42 @@
}
}
+func printLatinProperties() {
+ if *test {
+ return
+ }
+ fmt.Println("var properties = [Latin1Max]uint8{")
+ for code := 0; code < unicode.Latin1Max; code++ {
+ var property string
+ switch chars[code].category {
+ case "Cc", "": // NUL has no category.
+ property = "pC"
+ case "Cf": // soft hyphen, unique category, not printable.
+ property = "0"
+ case "Ll":
+ property = "pLl | pp"
+ case "Lu":
+ property = "pLu | pp"
+ case "Nd", "No":
+ property = "pN | pp"
+ case "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps":
+ property = "pP | pp"
+ case "Sc", "Sk", "Sm", "So":
+ property = "pS | pp"
+ case "Zs":
+ property = "pZ"
+ default:
+ logger.Fatalf("%U has unknown category %q", code, chars[code].category)
+ }
+ // Special case
+ if code == ' ' {
+ property = "pZ | pp"
+ }
+ fmt.Printf("\t0x%.2X: %s, // %q\n", code, property, code)
+ }
+ fmt.Println("}")
+}
+
var range16Count = 0 // Number of entries in the 16-bit range tables.
var range32Count = 0 // Number of entries in the 32-bit range tables.
diff --git a/src/pkg/unicode/script_test.go b/src/pkg/unicode/script_test.go
index ff452b7..b37ad18 100644
--- a/src/pkg/unicode/script_test.go
+++ b/src/pkg/unicode/script_test.go
@@ -149,7 +149,14 @@
{0x2028, "Zl"},
{0x2029, "Zp"},
{0x202f, "Zs"},
- {0x04aa, "letter"},
+ // Unifieds.
+ {0x04aa, "L"},
+ {0x0009, "C"},
+ {0x1712, "M"},
+ {0x0031, "N"},
+ {0x00bb, "P"},
+ {0x00a2, "S"},
+ {0x00a0, "Z"},
}
var inPropTest = []T{
@@ -197,13 +204,13 @@
t.Fatal(test.script, "not a known script")
}
if !Is(Scripts[test.script], test.rune) {
- t.Errorf("IsScript(%#x, %s) = false, want true", test.rune, test.script)
+ t.Errorf("IsScript(%U, %s) = false, want true", test.rune, test.script)
}
notTested[test.script] = false, false
}
for _, test := range outTest {
if Is(Scripts[test.script], test.rune) {
- t.Errorf("IsScript(%#x, %s) = true, want false", test.rune, test.script)
+ t.Errorf("IsScript(%U, %s) = true, want false", test.rune, test.script)
}
}
for k := range notTested {
@@ -221,7 +228,7 @@
t.Fatal(test.script, "not a known category")
}
if !Is(Categories[test.script], test.rune) {
- t.Errorf("IsCategory(%#x, %s) = false, want true", test.rune, test.script)
+ t.Errorf("IsCategory(%U, %s) = false, want true", test.rune, test.script)
}
notTested[test.script] = false, false
}
@@ -240,7 +247,7 @@
t.Fatal(test.script, "not a known prop")
}
if !Is(Properties[test.script], test.rune) {
- t.Errorf("IsCategory(%#x, %s) = false, want true", test.rune, test.script)
+ t.Errorf("IsCategory(%U, %s) = false, want true", test.rune, test.script)
}
notTested[test.script] = false, false
}
diff --git a/src/pkg/unicode/tables.go b/src/pkg/unicode/tables.go
index fc2bdd8..87c734c 100644
--- a/src/pkg/unicode/tables.go
+++ b/src/pkg/unicode/tables.go
@@ -9,36 +9,42 @@
// Categories is the set of Unicode data tables.
var Categories = map[string]*RangeTable{
- "Lm": Lm,
- "Ll": Ll,
- "Me": Me,
- "Mc": Mc,
- "Mn": Mn,
- "Zl": Zl,
- "letter": letter,
- "Zp": Zp,
- "Zs": Zs,
- "Cs": Cs,
- "Co": Co,
- "Cf": Cf,
- "Cc": Cc,
- "Po": Po,
- "Pi": Pi,
- "Pf": Pf,
- "Pe": Pe,
- "Pd": Pd,
- "Pc": Pc,
- "Ps": Ps,
- "Nd": Nd,
- "Nl": Nl,
- "No": No,
- "So": So,
- "Sm": Sm,
- "Sk": Sk,
- "Sc": Sc,
- "Lu": Lu,
- "Lt": Lt,
- "Lo": Lo,
+ "Lm": Lm,
+ "Ll": Ll,
+ "C": C,
+ "M": M,
+ "L": L,
+ "N": N,
+ "P": P,
+ "S": S,
+ "Z": Z,
+ "Me": Me,
+ "Mc": Mc,
+ "Mn": Mn,
+ "Zl": Zl,
+ "Zp": Zp,
+ "Zs": Zs,
+ "Cs": Cs,
+ "Co": Co,
+ "Cf": Cf,
+ "Cc": Cc,
+ "Po": Po,
+ "Pi": Pi,
+ "Pf": Pf,
+ "Pe": Pe,
+ "Pd": Pd,
+ "Pc": Pc,
+ "Ps": Ps,
+ "Nd": Nd,
+ "Nl": Nl,
+ "No": No,
+ "So": So,
+ "Sm": Sm,
+ "Sk": Sk,
+ "Sc": Sc,
+ "Lu": Lu,
+ "Lt": Lt,
+ "Lo": Lo,
}
var _Lm = &RangeTable{
@@ -226,127 +232,36 @@
},
}
-var _Me = &RangeTable{
+var _C = &RangeTable{
R16: []Range16{
- {0x0488, 0x0489, 1},
- {0x20dd, 0x20e0, 1},
- {0x20e2, 0x20e4, 1},
- {0xa670, 0xa672, 1},
- },
-}
-
-var _Mc = &RangeTable{
- R16: []Range16{
- {0x0903, 0x093b, 56},
- {0x093e, 0x0940, 1},
- {0x0949, 0x094c, 1},
- {0x094e, 0x094f, 1},
- {0x0982, 0x0983, 1},
- {0x09be, 0x09c0, 1},
- {0x09c7, 0x09c8, 1},
- {0x09cb, 0x09cc, 1},
- {0x09d7, 0x0a03, 44},
- {0x0a3e, 0x0a40, 1},
- {0x0a83, 0x0abe, 59},
- {0x0abf, 0x0ac0, 1},
- {0x0ac9, 0x0acb, 2},
- {0x0acc, 0x0b02, 54},
- {0x0b03, 0x0b3e, 59},
- {0x0b40, 0x0b47, 7},
- {0x0b48, 0x0b4b, 3},
- {0x0b4c, 0x0b57, 11},
- {0x0bbe, 0x0bbf, 1},
- {0x0bc1, 0x0bc2, 1},
- {0x0bc6, 0x0bc8, 1},
- {0x0bca, 0x0bcc, 1},
- {0x0bd7, 0x0c01, 42},
- {0x0c02, 0x0c03, 1},
- {0x0c41, 0x0c44, 1},
- {0x0c82, 0x0c83, 1},
- {0x0cbe, 0x0cc0, 2},
- {0x0cc1, 0x0cc4, 1},
- {0x0cc7, 0x0cc8, 1},
- {0x0cca, 0x0ccb, 1},
- {0x0cd5, 0x0cd6, 1},
- {0x0d02, 0x0d03, 1},
- {0x0d3e, 0x0d40, 1},
- {0x0d46, 0x0d48, 1},
- {0x0d4a, 0x0d4c, 1},
- {0x0d57, 0x0d82, 43},
- {0x0d83, 0x0dcf, 76},
- {0x0dd0, 0x0dd1, 1},
- {0x0dd8, 0x0ddf, 1},
- {0x0df2, 0x0df3, 1},
- {0x0f3e, 0x0f3f, 1},
- {0x0f7f, 0x102b, 172},
- {0x102c, 0x1031, 5},
- {0x1038, 0x103b, 3},
- {0x103c, 0x1056, 26},
- {0x1057, 0x1062, 11},
- {0x1063, 0x1064, 1},
- {0x1067, 0x106d, 1},
- {0x1083, 0x1084, 1},
- {0x1087, 0x108c, 1},
- {0x108f, 0x109a, 11},
- {0x109b, 0x109c, 1},
- {0x17b6, 0x17be, 8},
- {0x17bf, 0x17c5, 1},
- {0x17c7, 0x17c8, 1},
- {0x1923, 0x1926, 1},
- {0x1929, 0x192b, 1},
- {0x1930, 0x1931, 1},
- {0x1933, 0x1938, 1},
- {0x19b0, 0x19c0, 1},
- {0x19c8, 0x19c9, 1},
- {0x1a19, 0x1a1b, 1},
- {0x1a55, 0x1a57, 2},
- {0x1a61, 0x1a63, 2},
- {0x1a64, 0x1a6d, 9},
- {0x1a6e, 0x1a72, 1},
- {0x1b04, 0x1b35, 49},
- {0x1b3b, 0x1b3d, 2},
- {0x1b3e, 0x1b41, 1},
- {0x1b43, 0x1b44, 1},
- {0x1b82, 0x1ba1, 31},
- {0x1ba6, 0x1ba7, 1},
- {0x1baa, 0x1be7, 61},
- {0x1bea, 0x1bec, 1},
- {0x1bee, 0x1bf2, 4},
- {0x1bf3, 0x1c24, 49},
- {0x1c25, 0x1c2b, 1},
- {0x1c34, 0x1c35, 1},
- {0x1ce1, 0x1cf2, 17},
- {0xa823, 0xa824, 1},
- {0xa827, 0xa880, 89},
- {0xa881, 0xa8b4, 51},
- {0xa8b5, 0xa8c3, 1},
- {0xa952, 0xa953, 1},
- {0xa983, 0xa9b4, 49},
- {0xa9b5, 0xa9ba, 5},
- {0xa9bb, 0xa9bd, 2},
- {0xa9be, 0xa9c0, 1},
- {0xaa2f, 0xaa30, 1},
- {0xaa33, 0xaa34, 1},
- {0xaa4d, 0xaa7b, 46},
- {0xabe3, 0xabe4, 1},
- {0xabe6, 0xabe7, 1},
- {0xabe9, 0xabea, 1},
- {0xabec, 0xabec, 1},
+ {0x0001, 0x001f, 1},
+ {0x007f, 0x009f, 1},
+ {0x00ad, 0x0600, 1363},
+ {0x0601, 0x0603, 1},
+ {0x06dd, 0x070f, 50},
+ {0x17b4, 0x17b5, 1},
+ {0x200b, 0x200f, 1},
+ {0x202a, 0x202e, 1},
+ {0x2060, 0x2064, 1},
+ {0x206a, 0x206f, 1},
+ {0xd800, 0xf8ff, 1},
+ {0xfeff, 0xfff9, 250},
+ {0xfffa, 0xfffb, 1},
},
R32: []Range32{
- {0x11000, 0x11000, 1},
- {0x11002, 0x11082, 128},
- {0x110b0, 0x110b2, 1},
- {0x110b7, 0x110b8, 1},
- {0x1d165, 0x1d166, 1},
- {0x1d16d, 0x1d172, 1},
+ {0x110bd, 0x1d173, 49334},
+ {0x1d174, 0x1d17a, 1},
+ {0xe0001, 0xe0020, 31},
+ {0xe0021, 0xe007f, 1},
+ {0xf0000, 0xffffd, 1},
+ {0x100000, 0x10fffd, 1},
},
}
-var _Mn = &RangeTable{
+var _M = &RangeTable{
R16: []Range16{
{0x0300, 0x036f, 1},
- {0x0483, 0x0487, 1},
+ {0x0483, 0x0489, 1},
{0x0591, 0x05bd, 1},
{0x05bf, 0x05c1, 2},
{0x05c2, 0x05c4, 2},
@@ -367,49 +282,69 @@
{0x0825, 0x0827, 1},
{0x0829, 0x082d, 1},
{0x0859, 0x085b, 1},
- {0x0900, 0x0902, 1},
- {0x093a, 0x093c, 2},
- {0x0941, 0x0948, 1},
- {0x094d, 0x0951, 4},
- {0x0952, 0x0957, 1},
+ {0x0900, 0x0903, 1},
+ {0x093a, 0x093c, 1},
+ {0x093e, 0x094f, 1},
+ {0x0951, 0x0957, 1},
{0x0962, 0x0963, 1},
- {0x0981, 0x09bc, 59},
- {0x09c1, 0x09c4, 1},
- {0x09cd, 0x09e2, 21},
+ {0x0981, 0x0983, 1},
+ {0x09bc, 0x09be, 2},
+ {0x09bf, 0x09c4, 1},
+ {0x09c7, 0x09c8, 1},
+ {0x09cb, 0x09cd, 1},
+ {0x09d7, 0x09e2, 11},
{0x09e3, 0x0a01, 30},
- {0x0a02, 0x0a3c, 58},
- {0x0a41, 0x0a42, 1},
+ {0x0a02, 0x0a03, 1},
+ {0x0a3c, 0x0a3e, 2},
+ {0x0a3f, 0x0a42, 1},
{0x0a47, 0x0a48, 1},
{0x0a4b, 0x0a4d, 1},
{0x0a51, 0x0a70, 31},
{0x0a71, 0x0a75, 4},
- {0x0a81, 0x0a82, 1},
- {0x0abc, 0x0ac1, 5},
- {0x0ac2, 0x0ac5, 1},
- {0x0ac7, 0x0ac8, 1},
- {0x0acd, 0x0ae2, 21},
- {0x0ae3, 0x0b01, 30},
- {0x0b3c, 0x0b3f, 3},
- {0x0b41, 0x0b44, 1},
- {0x0b4d, 0x0b56, 9},
+ {0x0a81, 0x0a83, 1},
+ {0x0abc, 0x0abe, 2},
+ {0x0abf, 0x0ac5, 1},
+ {0x0ac7, 0x0ac9, 1},
+ {0x0acb, 0x0acd, 1},
+ {0x0ae2, 0x0ae3, 1},
+ {0x0b01, 0x0b03, 1},
+ {0x0b3c, 0x0b3e, 2},
+ {0x0b3f, 0x0b44, 1},
+ {0x0b47, 0x0b48, 1},
+ {0x0b4b, 0x0b4d, 1},
+ {0x0b56, 0x0b57, 1},
{0x0b62, 0x0b63, 1},
- {0x0b82, 0x0bc0, 62},
- {0x0bcd, 0x0c3e, 113},
- {0x0c3f, 0x0c40, 1},
+ {0x0b82, 0x0bbe, 60},
+ {0x0bbf, 0x0bc2, 1},
+ {0x0bc6, 0x0bc8, 1},
+ {0x0bca, 0x0bcd, 1},
+ {0x0bd7, 0x0c01, 42},
+ {0x0c02, 0x0c03, 1},
+ {0x0c3e, 0x0c44, 1},
{0x0c46, 0x0c48, 1},
{0x0c4a, 0x0c4d, 1},
{0x0c55, 0x0c56, 1},
{0x0c62, 0x0c63, 1},
- {0x0cbc, 0x0cbf, 3},
- {0x0cc6, 0x0ccc, 6},
- {0x0ccd, 0x0ce2, 21},
- {0x0ce3, 0x0d41, 94},
- {0x0d42, 0x0d44, 1},
- {0x0d4d, 0x0d62, 21},
- {0x0d63, 0x0dca, 103},
- {0x0dd2, 0x0dd4, 1},
- {0x0dd6, 0x0e31, 91},
- {0x0e34, 0x0e3a, 1},
+ {0x0c82, 0x0c83, 1},
+ {0x0cbc, 0x0cbe, 2},
+ {0x0cbf, 0x0cc4, 1},
+ {0x0cc6, 0x0cc8, 1},
+ {0x0cca, 0x0ccd, 1},
+ {0x0cd5, 0x0cd6, 1},
+ {0x0ce2, 0x0ce3, 1},
+ {0x0d02, 0x0d03, 1},
+ {0x0d3e, 0x0d44, 1},
+ {0x0d46, 0x0d48, 1},
+ {0x0d4a, 0x0d4d, 1},
+ {0x0d57, 0x0d62, 11},
+ {0x0d63, 0x0d82, 31},
+ {0x0d83, 0x0dca, 71},
+ {0x0dcf, 0x0dd4, 1},
+ {0x0dd6, 0x0dd8, 2},
+ {0x0dd9, 0x0ddf, 1},
+ {0x0df2, 0x0df3, 1},
+ {0x0e31, 0x0e34, 3},
+ {0x0e35, 0x0e3a, 1},
{0x0e47, 0x0e4e, 1},
{0x0eb1, 0x0eb4, 3},
{0x0eb5, 0x0eb9, 1},
@@ -417,94 +352,79 @@
{0x0ec8, 0x0ecd, 1},
{0x0f18, 0x0f19, 1},
{0x0f35, 0x0f39, 2},
- {0x0f71, 0x0f7e, 1},
- {0x0f80, 0x0f84, 1},
+ {0x0f3e, 0x0f3f, 1},
+ {0x0f71, 0x0f84, 1},
{0x0f86, 0x0f87, 1},
{0x0f8d, 0x0f97, 1},
{0x0f99, 0x0fbc, 1},
- {0x0fc6, 0x102d, 103},
- {0x102e, 0x1030, 1},
- {0x1032, 0x1037, 1},
- {0x1039, 0x103a, 1},
- {0x103d, 0x103e, 1},
- {0x1058, 0x1059, 1},
+ {0x0fc6, 0x102b, 101},
+ {0x102c, 0x103e, 1},
+ {0x1056, 0x1059, 1},
{0x105e, 0x1060, 1},
+ {0x1062, 0x1064, 1},
+ {0x1067, 0x106d, 1},
{0x1071, 0x1074, 1},
- {0x1082, 0x1085, 3},
- {0x1086, 0x108d, 7},
- {0x109d, 0x135d, 704},
- {0x135e, 0x135f, 1},
+ {0x1082, 0x108d, 1},
+ {0x108f, 0x109a, 11},
+ {0x109b, 0x109d, 1},
+ {0x135d, 0x135f, 1},
{0x1712, 0x1714, 1},
{0x1732, 0x1734, 1},
{0x1752, 0x1753, 1},
{0x1772, 0x1773, 1},
- {0x17b7, 0x17bd, 1},
- {0x17c6, 0x17c9, 3},
- {0x17ca, 0x17d3, 1},
+ {0x17b6, 0x17d3, 1},
{0x17dd, 0x180b, 46},
{0x180c, 0x180d, 1},
{0x18a9, 0x1920, 119},
- {0x1921, 0x1922, 1},
- {0x1927, 0x1928, 1},
- {0x1932, 0x1939, 7},
- {0x193a, 0x193b, 1},
- {0x1a17, 0x1a18, 1},
- {0x1a56, 0x1a58, 2},
- {0x1a59, 0x1a5e, 1},
- {0x1a60, 0x1a62, 2},
- {0x1a65, 0x1a6c, 1},
- {0x1a73, 0x1a7c, 1},
+ {0x1921, 0x192b, 1},
+ {0x1930, 0x193b, 1},
+ {0x19b0, 0x19c0, 1},
+ {0x19c8, 0x19c9, 1},
+ {0x1a17, 0x1a1b, 1},
+ {0x1a55, 0x1a5e, 1},
+ {0x1a60, 0x1a7c, 1},
{0x1a7f, 0x1b00, 129},
- {0x1b01, 0x1b03, 1},
- {0x1b34, 0x1b36, 2},
- {0x1b37, 0x1b3a, 1},
- {0x1b3c, 0x1b42, 6},
+ {0x1b01, 0x1b04, 1},
+ {0x1b34, 0x1b44, 1},
{0x1b6b, 0x1b73, 1},
- {0x1b80, 0x1b81, 1},
- {0x1ba2, 0x1ba5, 1},
- {0x1ba8, 0x1ba9, 1},
- {0x1be6, 0x1be8, 2},
- {0x1be9, 0x1bed, 4},
- {0x1bef, 0x1bf1, 1},
- {0x1c2c, 0x1c33, 1},
- {0x1c36, 0x1c37, 1},
+ {0x1b80, 0x1b82, 1},
+ {0x1ba1, 0x1baa, 1},
+ {0x1be6, 0x1bf3, 1},
+ {0x1c24, 0x1c37, 1},
{0x1cd0, 0x1cd2, 1},
- {0x1cd4, 0x1ce0, 1},
- {0x1ce2, 0x1ce8, 1},
- {0x1ced, 0x1dc0, 211},
- {0x1dc1, 0x1de6, 1},
+ {0x1cd4, 0x1ce8, 1},
+ {0x1ced, 0x1cf2, 5},
+ {0x1dc0, 0x1de6, 1},
{0x1dfc, 0x1dff, 1},
- {0x20d0, 0x20dc, 1},
- {0x20e1, 0x20e5, 4},
- {0x20e6, 0x20f0, 1},
+ {0x20d0, 0x20f0, 1},
{0x2cef, 0x2cf1, 1},
{0x2d7f, 0x2de0, 97},
{0x2de1, 0x2dff, 1},
{0x302a, 0x302f, 1},
{0x3099, 0x309a, 1},
- {0xa66f, 0xa67c, 13},
- {0xa67d, 0xa6f0, 115},
- {0xa6f1, 0xa802, 273},
- {0xa806, 0xa80b, 5},
- {0xa825, 0xa826, 1},
- {0xa8c4, 0xa8e0, 28},
- {0xa8e1, 0xa8f1, 1},
+ {0xa66f, 0xa672, 1},
+ {0xa67c, 0xa67d, 1},
+ {0xa6f0, 0xa6f1, 1},
+ {0xa802, 0xa806, 4},
+ {0xa80b, 0xa823, 24},
+ {0xa824, 0xa827, 1},
+ {0xa880, 0xa881, 1},
+ {0xa8b4, 0xa8c4, 1},
+ {0xa8e0, 0xa8f1, 1},
{0xa926, 0xa92d, 1},
- {0xa947, 0xa951, 1},
- {0xa980, 0xa982, 1},
- {0xa9b3, 0xa9b6, 3},
- {0xa9b7, 0xa9b9, 1},
- {0xa9bc, 0xaa29, 109},
- {0xaa2a, 0xaa2e, 1},
- {0xaa31, 0xaa32, 1},
- {0xaa35, 0xaa36, 1},
+ {0xa947, 0xa953, 1},
+ {0xa980, 0xa983, 1},
+ {0xa9b3, 0xa9c0, 1},
+ {0xaa29, 0xaa36, 1},
{0xaa43, 0xaa4c, 9},
+ {0xaa4d, 0xaa7b, 46},
{0xaab0, 0xaab2, 2},
{0xaab3, 0xaab4, 1},
{0xaab7, 0xaab8, 1},
{0xaabe, 0xaabf, 1},
- {0xaac1, 0xabe5, 292},
- {0xabe8, 0xabed, 5},
+ {0xaac1, 0xabe3, 290},
+ {0xabe4, 0xabea, 1},
+ {0xabec, 0xabed, 1},
{0xfb1e, 0xfe00, 738},
{0xfe01, 0xfe0f, 1},
{0xfe20, 0xfe26, 1},
@@ -515,12 +435,13 @@
{0x10a05, 0x10a06, 1},
{0x10a0c, 0x10a0f, 1},
{0x10a38, 0x10a3a, 1},
- {0x10a3f, 0x11001, 1474},
+ {0x10a3f, 0x11000, 1473},
+ {0x11001, 0x11002, 1},
{0x11038, 0x11046, 1},
- {0x11080, 0x11081, 1},
- {0x110b3, 0x110b6, 1},
- {0x110b9, 0x110ba, 1},
- {0x1d167, 0x1d169, 1},
+ {0x11080, 0x11082, 1},
+ {0x110b0, 0x110ba, 1},
+ {0x1d165, 0x1d169, 1},
+ {0x1d16d, 0x1d172, 1},
{0x1d17b, 0x1d182, 1},
{0x1d185, 0x1d18b, 1},
{0x1d1aa, 0x1d1ad, 1},
@@ -529,13 +450,7 @@
},
}
-var _Zl = &RangeTable{
- R16: []Range16{
- {0x2028, 0x2028, 1},
- },
-}
-
-var letter = &RangeTable{
+var _L = &RangeTable{
R16: []Range16{
{0x0041, 0x005a, 1},
{0x0061, 0x007a, 1},
@@ -956,6 +871,725 @@
},
}
+var _N = &RangeTable{
+ R16: []Range16{
+ {0x0030, 0x0039, 1},
+ {0x00b2, 0x00b3, 1},
+ {0x00b9, 0x00bc, 3},
+ {0x00bd, 0x00be, 1},
+ {0x0660, 0x0669, 1},
+ {0x06f0, 0x06f9, 1},
+ {0x07c0, 0x07c9, 1},
+ {0x0966, 0x096f, 1},
+ {0x09e6, 0x09ef, 1},
+ {0x09f4, 0x09f9, 1},
+ {0x0a66, 0x0a6f, 1},
+ {0x0ae6, 0x0aef, 1},
+ {0x0b66, 0x0b6f, 1},
+ {0x0b72, 0x0b77, 1},
+ {0x0be6, 0x0bf2, 1},
+ {0x0c66, 0x0c6f, 1},
+ {0x0c78, 0x0c7e, 1},
+ {0x0ce6, 0x0cef, 1},
+ {0x0d66, 0x0d75, 1},
+ {0x0e50, 0x0e59, 1},
+ {0x0ed0, 0x0ed9, 1},
+ {0x0f20, 0x0f33, 1},
+ {0x1040, 0x1049, 1},
+ {0x1090, 0x1099, 1},
+ {0x1369, 0x137c, 1},
+ {0x16ee, 0x16f0, 1},
+ {0x17e0, 0x17e9, 1},
+ {0x17f0, 0x17f9, 1},
+ {0x1810, 0x1819, 1},
+ {0x1946, 0x194f, 1},
+ {0x19d0, 0x19da, 1},
+ {0x1a80, 0x1a89, 1},
+ {0x1a90, 0x1a99, 1},
+ {0x1b50, 0x1b59, 1},
+ {0x1bb0, 0x1bb9, 1},
+ {0x1c40, 0x1c49, 1},
+ {0x1c50, 0x1c59, 1},
+ {0x2070, 0x2074, 4},
+ {0x2075, 0x2079, 1},
+ {0x2080, 0x2089, 1},
+ {0x2150, 0x2182, 1},
+ {0x2185, 0x2189, 1},
+ {0x2460, 0x249b, 1},
+ {0x24ea, 0x24ff, 1},
+ {0x2776, 0x2793, 1},
+ {0x2cfd, 0x3007, 778},
+ {0x3021, 0x3029, 1},
+ {0x3038, 0x303a, 1},
+ {0x3192, 0x3195, 1},
+ {0x3220, 0x3229, 1},
+ {0x3251, 0x325f, 1},
+ {0x3280, 0x3289, 1},
+ {0x32b1, 0x32bf, 1},
+ {0xa620, 0xa629, 1},
+ {0xa6e6, 0xa6ef, 1},
+ {0xa830, 0xa835, 1},
+ {0xa8d0, 0xa8d9, 1},
+ {0xa900, 0xa909, 1},
+ {0xa9d0, 0xa9d9, 1},
+ {0xaa50, 0xaa59, 1},
+ {0xabf0, 0xabf9, 1},
+ {0xff10, 0xff19, 1},
+ },
+ R32: []Range32{
+ {0x10107, 0x10133, 1},
+ {0x10140, 0x10178, 1},
+ {0x1018a, 0x10320, 406},
+ {0x10321, 0x10323, 1},
+ {0x10341, 0x1034a, 9},
+ {0x103d1, 0x103d5, 1},
+ {0x104a0, 0x104a9, 1},
+ {0x10858, 0x1085f, 1},
+ {0x10916, 0x1091b, 1},
+ {0x10a40, 0x10a47, 1},
+ {0x10a7d, 0x10a7e, 1},
+ {0x10b58, 0x10b5f, 1},
+ {0x10b78, 0x10b7f, 1},
+ {0x10e60, 0x10e7e, 1},
+ {0x11052, 0x1106f, 1},
+ {0x12400, 0x12462, 1},
+ {0x1d360, 0x1d371, 1},
+ {0x1d7ce, 0x1d7ff, 1},
+ {0x1f100, 0x1f10a, 1},
+ },
+}
+
+var _P = &RangeTable{
+ R16: []Range16{
+ {0x0021, 0x0023, 1},
+ {0x0025, 0x002a, 1},
+ {0x002c, 0x002f, 1},
+ {0x003a, 0x003b, 1},
+ {0x003f, 0x0040, 1},
+ {0x005b, 0x005d, 1},
+ {0x005f, 0x007b, 28},
+ {0x007d, 0x00a1, 36},
+ {0x00ab, 0x00b7, 12},
+ {0x00bb, 0x00bf, 4},
+ {0x037e, 0x0387, 9},
+ {0x055a, 0x055f, 1},
+ {0x0589, 0x058a, 1},
+ {0x05be, 0x05c0, 2},
+ {0x05c3, 0x05c6, 3},
+ {0x05f3, 0x05f4, 1},
+ {0x0609, 0x060a, 1},
+ {0x060c, 0x060d, 1},
+ {0x061b, 0x061e, 3},
+ {0x061f, 0x066a, 75},
+ {0x066b, 0x066d, 1},
+ {0x06d4, 0x0700, 44},
+ {0x0701, 0x070d, 1},
+ {0x07f7, 0x07f9, 1},
+ {0x0830, 0x083e, 1},
+ {0x085e, 0x0964, 262},
+ {0x0965, 0x0970, 11},
+ {0x0df4, 0x0e4f, 91},
+ {0x0e5a, 0x0e5b, 1},
+ {0x0f04, 0x0f12, 1},
+ {0x0f3a, 0x0f3d, 1},
+ {0x0f85, 0x0fd0, 75},
+ {0x0fd1, 0x0fd4, 1},
+ {0x0fd9, 0x0fda, 1},
+ {0x104a, 0x104f, 1},
+ {0x10fb, 0x1361, 614},
+ {0x1362, 0x1368, 1},
+ {0x1400, 0x166d, 621},
+ {0x166e, 0x169b, 45},
+ {0x169c, 0x16eb, 79},
+ {0x16ec, 0x16ed, 1},
+ {0x1735, 0x1736, 1},
+ {0x17d4, 0x17d6, 1},
+ {0x17d8, 0x17da, 1},
+ {0x1800, 0x180a, 1},
+ {0x1944, 0x1945, 1},
+ {0x1a1e, 0x1a1f, 1},
+ {0x1aa0, 0x1aa6, 1},
+ {0x1aa8, 0x1aad, 1},
+ {0x1b5a, 0x1b60, 1},
+ {0x1bfc, 0x1bff, 1},
+ {0x1c3b, 0x1c3f, 1},
+ {0x1c7e, 0x1c7f, 1},
+ {0x1cd3, 0x2010, 829},
+ {0x2011, 0x2027, 1},
+ {0x2030, 0x2043, 1},
+ {0x2045, 0x2051, 1},
+ {0x2053, 0x205e, 1},
+ {0x207d, 0x207e, 1},
+ {0x208d, 0x208e, 1},
+ {0x2329, 0x232a, 1},
+ {0x2768, 0x2775, 1},
+ {0x27c5, 0x27c6, 1},
+ {0x27e6, 0x27ef, 1},
+ {0x2983, 0x2998, 1},
+ {0x29d8, 0x29db, 1},
+ {0x29fc, 0x29fd, 1},
+ {0x2cf9, 0x2cfc, 1},
+ {0x2cfe, 0x2cff, 1},
+ {0x2d70, 0x2e00, 144},
+ {0x2e01, 0x2e2e, 1},
+ {0x2e30, 0x2e31, 1},
+ {0x3001, 0x3003, 1},
+ {0x3008, 0x3011, 1},
+ {0x3014, 0x301f, 1},
+ {0x3030, 0x303d, 13},
+ {0x30a0, 0x30fb, 91},
+ {0xa4fe, 0xa4ff, 1},
+ {0xa60d, 0xa60f, 1},
+ {0xa673, 0xa67e, 11},
+ {0xa6f2, 0xa6f7, 1},
+ {0xa874, 0xa877, 1},
+ {0xa8ce, 0xa8cf, 1},
+ {0xa8f8, 0xa8fa, 1},
+ {0xa92e, 0xa92f, 1},
+ {0xa95f, 0xa9c1, 98},
+ {0xa9c2, 0xa9cd, 1},
+ {0xa9de, 0xa9df, 1},
+ {0xaa5c, 0xaa5f, 1},
+ {0xaade, 0xaadf, 1},
+ {0xabeb, 0xfd3e, 20819},
+ {0xfd3f, 0xfe10, 209},
+ {0xfe11, 0xfe19, 1},
+ {0xfe30, 0xfe52, 1},
+ {0xfe54, 0xfe61, 1},
+ {0xfe63, 0xfe68, 5},
+ {0xfe6a, 0xfe6b, 1},
+ {0xff01, 0xff03, 1},
+ {0xff05, 0xff0a, 1},
+ {0xff0c, 0xff0f, 1},
+ {0xff1a, 0xff1b, 1},
+ {0xff1f, 0xff20, 1},
+ {0xff3b, 0xff3d, 1},
+ {0xff3f, 0xff5b, 28},
+ {0xff5d, 0xff5f, 2},
+ {0xff60, 0xff65, 1},
+ },
+ R32: []Range32{
+ {0x10100, 0x10101, 1},
+ {0x1039f, 0x103d0, 49},
+ {0x10857, 0x1091f, 200},
+ {0x1093f, 0x10a50, 273},
+ {0x10a51, 0x10a58, 1},
+ {0x10a7f, 0x10b39, 186},
+ {0x10b3a, 0x10b3f, 1},
+ {0x11047, 0x1104d, 1},
+ {0x110bb, 0x110bc, 1},
+ {0x110be, 0x110c1, 1},
+ {0x12470, 0x12473, 1},
+ },
+}
+
+var _S = &RangeTable{
+ R16: []Range16{
+ {0x0024, 0x002b, 7},
+ {0x003c, 0x003e, 1},
+ {0x005e, 0x0060, 2},
+ {0x007c, 0x007e, 2},
+ {0x00a2, 0x00a9, 1},
+ {0x00ac, 0x00ae, 2},
+ {0x00af, 0x00b1, 1},
+ {0x00b4, 0x00b8, 2},
+ {0x00d7, 0x00f7, 32},
+ {0x02c2, 0x02c5, 1},
+ {0x02d2, 0x02df, 1},
+ {0x02e5, 0x02eb, 1},
+ {0x02ed, 0x02ef, 2},
+ {0x02f0, 0x02ff, 1},
+ {0x0375, 0x0384, 15},
+ {0x0385, 0x03f6, 113},
+ {0x0482, 0x0606, 388},
+ {0x0607, 0x0608, 1},
+ {0x060b, 0x060e, 3},
+ {0x060f, 0x06de, 207},
+ {0x06e9, 0x06fd, 20},
+ {0x06fe, 0x07f6, 248},
+ {0x09f2, 0x09f3, 1},
+ {0x09fa, 0x09fb, 1},
+ {0x0af1, 0x0b70, 127},
+ {0x0bf3, 0x0bfa, 1},
+ {0x0c7f, 0x0d79, 250},
+ {0x0e3f, 0x0f01, 194},
+ {0x0f02, 0x0f03, 1},
+ {0x0f13, 0x0f17, 1},
+ {0x0f1a, 0x0f1f, 1},
+ {0x0f34, 0x0f38, 2},
+ {0x0fbe, 0x0fc5, 1},
+ {0x0fc7, 0x0fcc, 1},
+ {0x0fce, 0x0fcf, 1},
+ {0x0fd5, 0x0fd8, 1},
+ {0x109e, 0x109f, 1},
+ {0x1360, 0x1390, 48},
+ {0x1391, 0x1399, 1},
+ {0x17db, 0x1940, 357},
+ {0x19de, 0x19ff, 1},
+ {0x1b61, 0x1b6a, 1},
+ {0x1b74, 0x1b7c, 1},
+ {0x1fbd, 0x1fbf, 2},
+ {0x1fc0, 0x1fc1, 1},
+ {0x1fcd, 0x1fcf, 1},
+ {0x1fdd, 0x1fdf, 1},
+ {0x1fed, 0x1fef, 1},
+ {0x1ffd, 0x1ffe, 1},
+ {0x2044, 0x2052, 14},
+ {0x207a, 0x207c, 1},
+ {0x208a, 0x208c, 1},
+ {0x20a0, 0x20b9, 1},
+ {0x2100, 0x2101, 1},
+ {0x2103, 0x2106, 1},
+ {0x2108, 0x2109, 1},
+ {0x2114, 0x2116, 2},
+ {0x2117, 0x2118, 1},
+ {0x211e, 0x2123, 1},
+ {0x2125, 0x2129, 2},
+ {0x212e, 0x213a, 12},
+ {0x213b, 0x2140, 5},
+ {0x2141, 0x2144, 1},
+ {0x214a, 0x214d, 1},
+ {0x214f, 0x2190, 65},
+ {0x2191, 0x2328, 1},
+ {0x232b, 0x23f3, 1},
+ {0x2400, 0x2426, 1},
+ {0x2440, 0x244a, 1},
+ {0x249c, 0x24e9, 1},
+ {0x2500, 0x26ff, 1},
+ {0x2701, 0x2767, 1},
+ {0x2794, 0x27c4, 1},
+ {0x27c7, 0x27ca, 1},
+ {0x27cc, 0x27ce, 2},
+ {0x27cf, 0x27e5, 1},
+ {0x27f0, 0x2982, 1},
+ {0x2999, 0x29d7, 1},
+ {0x29dc, 0x29fb, 1},
+ {0x29fe, 0x2b4c, 1},
+ {0x2b50, 0x2b59, 1},
+ {0x2ce5, 0x2cea, 1},
+ {0x2e80, 0x2e99, 1},
+ {0x2e9b, 0x2ef3, 1},
+ {0x2f00, 0x2fd5, 1},
+ {0x2ff0, 0x2ffb, 1},
+ {0x3004, 0x3012, 14},
+ {0x3013, 0x3020, 13},
+ {0x3036, 0x3037, 1},
+ {0x303e, 0x303f, 1},
+ {0x309b, 0x309c, 1},
+ {0x3190, 0x3191, 1},
+ {0x3196, 0x319f, 1},
+ {0x31c0, 0x31e3, 1},
+ {0x3200, 0x321e, 1},
+ {0x322a, 0x3250, 1},
+ {0x3260, 0x327f, 1},
+ {0x328a, 0x32b0, 1},
+ {0x32c0, 0x32fe, 1},
+ {0x3300, 0x33ff, 1},
+ {0x4dc0, 0x4dff, 1},
+ {0xa490, 0xa4c6, 1},
+ {0xa700, 0xa716, 1},
+ {0xa720, 0xa721, 1},
+ {0xa789, 0xa78a, 1},
+ {0xa828, 0xa82b, 1},
+ {0xa836, 0xa839, 1},
+ {0xaa77, 0xaa79, 1},
+ {0xfb29, 0xfbb2, 137},
+ {0xfbb3, 0xfbc1, 1},
+ {0xfdfc, 0xfdfd, 1},
+ {0xfe62, 0xfe64, 2},
+ {0xfe65, 0xfe66, 1},
+ {0xfe69, 0xff04, 155},
+ {0xff0b, 0xff1c, 17},
+ {0xff1d, 0xff1e, 1},
+ {0xff3e, 0xff40, 2},
+ {0xff5c, 0xff5e, 2},
+ {0xffe0, 0xffe6, 1},
+ {0xffe8, 0xffee, 1},
+ {0xfffc, 0xfffd, 1},
+ },
+ R32: []Range32{
+ {0x10102, 0x10137, 53},
+ {0x10138, 0x1013f, 1},
+ {0x10179, 0x10189, 1},
+ {0x10190, 0x1019b, 1},
+ {0x101d0, 0x101fc, 1},
+ {0x1d000, 0x1d0f5, 1},
+ {0x1d100, 0x1d126, 1},
+ {0x1d129, 0x1d164, 1},
+ {0x1d16a, 0x1d16c, 1},
+ {0x1d183, 0x1d184, 1},
+ {0x1d18c, 0x1d1a9, 1},
+ {0x1d1ae, 0x1d1dd, 1},
+ {0x1d200, 0x1d241, 1},
+ {0x1d245, 0x1d300, 187},
+ {0x1d301, 0x1d356, 1},
+ {0x1d6c1, 0x1d6db, 26},
+ {0x1d6fb, 0x1d715, 26},
+ {0x1d735, 0x1d74f, 26},
+ {0x1d76f, 0x1d789, 26},
+ {0x1d7a9, 0x1d7c3, 26},
+ {0x1f000, 0x1f02b, 1},
+ {0x1f030, 0x1f093, 1},
+ {0x1f0a0, 0x1f0ae, 1},
+ {0x1f0b1, 0x1f0be, 1},
+ {0x1f0c1, 0x1f0cf, 1},
+ {0x1f0d1, 0x1f0df, 1},
+ {0x1f110, 0x1f12e, 1},
+ {0x1f130, 0x1f169, 1},
+ {0x1f170, 0x1f19a, 1},
+ {0x1f1e6, 0x1f202, 1},
+ {0x1f210, 0x1f23a, 1},
+ {0x1f240, 0x1f248, 1},
+ {0x1f250, 0x1f251, 1},
+ {0x1f300, 0x1f320, 1},
+ {0x1f330, 0x1f335, 1},
+ {0x1f337, 0x1f37c, 1},
+ {0x1f380, 0x1f393, 1},
+ {0x1f3a0, 0x1f3c4, 1},
+ {0x1f3c6, 0x1f3ca, 1},
+ {0x1f3e0, 0x1f3f0, 1},
+ {0x1f400, 0x1f43e, 1},
+ {0x1f440, 0x1f442, 2},
+ {0x1f443, 0x1f4f7, 1},
+ {0x1f4f9, 0x1f4fc, 1},
+ {0x1f500, 0x1f53d, 1},
+ {0x1f550, 0x1f567, 1},
+ {0x1f5fb, 0x1f5ff, 1},
+ {0x1f601, 0x1f610, 1},
+ {0x1f612, 0x1f614, 1},
+ {0x1f616, 0x1f61c, 2},
+ {0x1f61d, 0x1f61e, 1},
+ {0x1f620, 0x1f625, 1},
+ {0x1f628, 0x1f62b, 1},
+ {0x1f62d, 0x1f630, 3},
+ {0x1f631, 0x1f633, 1},
+ {0x1f635, 0x1f640, 1},
+ {0x1f645, 0x1f64f, 1},
+ {0x1f680, 0x1f6c5, 1},
+ {0x1f700, 0x1f773, 1},
+ },
+}
+
+var _Z = &RangeTable{
+ R16: []Range16{
+ {0x0020, 0x00a0, 128},
+ {0x1680, 0x180e, 398},
+ {0x2000, 0x200a, 1},
+ {0x2028, 0x2029, 1},
+ {0x202f, 0x205f, 48},
+ {0x3000, 0x3000, 1},
+ },
+}
+
+var _Me = &RangeTable{
+ R16: []Range16{
+ {0x0488, 0x0489, 1},
+ {0x20dd, 0x20e0, 1},
+ {0x20e2, 0x20e4, 1},
+ {0xa670, 0xa672, 1},
+ },
+}
+
+var _Mc = &RangeTable{
+ R16: []Range16{
+ {0x0903, 0x093b, 56},
+ {0x093e, 0x0940, 1},
+ {0x0949, 0x094c, 1},
+ {0x094e, 0x094f, 1},
+ {0x0982, 0x0983, 1},
+ {0x09be, 0x09c0, 1},
+ {0x09c7, 0x09c8, 1},
+ {0x09cb, 0x09cc, 1},
+ {0x09d7, 0x0a03, 44},
+ {0x0a3e, 0x0a40, 1},
+ {0x0a83, 0x0abe, 59},
+ {0x0abf, 0x0ac0, 1},
+ {0x0ac9, 0x0acb, 2},
+ {0x0acc, 0x0b02, 54},
+ {0x0b03, 0x0b3e, 59},
+ {0x0b40, 0x0b47, 7},
+ {0x0b48, 0x0b4b, 3},
+ {0x0b4c, 0x0b57, 11},
+ {0x0bbe, 0x0bbf, 1},
+ {0x0bc1, 0x0bc2, 1},
+ {0x0bc6, 0x0bc8, 1},
+ {0x0bca, 0x0bcc, 1},
+ {0x0bd7, 0x0c01, 42},
+ {0x0c02, 0x0c03, 1},
+ {0x0c41, 0x0c44, 1},
+ {0x0c82, 0x0c83, 1},
+ {0x0cbe, 0x0cc0, 2},
+ {0x0cc1, 0x0cc4, 1},
+ {0x0cc7, 0x0cc8, 1},
+ {0x0cca, 0x0ccb, 1},
+ {0x0cd5, 0x0cd6, 1},
+ {0x0d02, 0x0d03, 1},
+ {0x0d3e, 0x0d40, 1},
+ {0x0d46, 0x0d48, 1},
+ {0x0d4a, 0x0d4c, 1},
+ {0x0d57, 0x0d82, 43},
+ {0x0d83, 0x0dcf, 76},
+ {0x0dd0, 0x0dd1, 1},
+ {0x0dd8, 0x0ddf, 1},
+ {0x0df2, 0x0df3, 1},
+ {0x0f3e, 0x0f3f, 1},
+ {0x0f7f, 0x102b, 172},
+ {0x102c, 0x1031, 5},
+ {0x1038, 0x103b, 3},
+ {0x103c, 0x1056, 26},
+ {0x1057, 0x1062, 11},
+ {0x1063, 0x1064, 1},
+ {0x1067, 0x106d, 1},
+ {0x1083, 0x1084, 1},
+ {0x1087, 0x108c, 1},
+ {0x108f, 0x109a, 11},
+ {0x109b, 0x109c, 1},
+ {0x17b6, 0x17be, 8},
+ {0x17bf, 0x17c5, 1},
+ {0x17c7, 0x17c8, 1},
+ {0x1923, 0x1926, 1},
+ {0x1929, 0x192b, 1},
+ {0x1930, 0x1931, 1},
+ {0x1933, 0x1938, 1},
+ {0x19b0, 0x19c0, 1},
+ {0x19c8, 0x19c9, 1},
+ {0x1a19, 0x1a1b, 1},
+ {0x1a55, 0x1a57, 2},
+ {0x1a61, 0x1a63, 2},
+ {0x1a64, 0x1a6d, 9},
+ {0x1a6e, 0x1a72, 1},
+ {0x1b04, 0x1b35, 49},
+ {0x1b3b, 0x1b3d, 2},
+ {0x1b3e, 0x1b41, 1},
+ {0x1b43, 0x1b44, 1},
+ {0x1b82, 0x1ba1, 31},
+ {0x1ba6, 0x1ba7, 1},
+ {0x1baa, 0x1be7, 61},
+ {0x1bea, 0x1bec, 1},
+ {0x1bee, 0x1bf2, 4},
+ {0x1bf3, 0x1c24, 49},
+ {0x1c25, 0x1c2b, 1},
+ {0x1c34, 0x1c35, 1},
+ {0x1ce1, 0x1cf2, 17},
+ {0xa823, 0xa824, 1},
+ {0xa827, 0xa880, 89},
+ {0xa881, 0xa8b4, 51},
+ {0xa8b5, 0xa8c3, 1},
+ {0xa952, 0xa953, 1},
+ {0xa983, 0xa9b4, 49},
+ {0xa9b5, 0xa9ba, 5},
+ {0xa9bb, 0xa9bd, 2},
+ {0xa9be, 0xa9c0, 1},
+ {0xaa2f, 0xaa30, 1},
+ {0xaa33, 0xaa34, 1},
+ {0xaa4d, 0xaa7b, 46},
+ {0xabe3, 0xabe4, 1},
+ {0xabe6, 0xabe7, 1},
+ {0xabe9, 0xabea, 1},
+ {0xabec, 0xabec, 1},
+ },
+ R32: []Range32{
+ {0x11000, 0x11000, 1},
+ {0x11002, 0x11082, 128},
+ {0x110b0, 0x110b2, 1},
+ {0x110b7, 0x110b8, 1},
+ {0x1d165, 0x1d166, 1},
+ {0x1d16d, 0x1d172, 1},
+ },
+}
+
+var _Mn = &RangeTable{
+ R16: []Range16{
+ {0x0300, 0x036f, 1},
+ {0x0483, 0x0487, 1},
+ {0x0591, 0x05bd, 1},
+ {0x05bf, 0x05c1, 2},
+ {0x05c2, 0x05c4, 2},
+ {0x05c5, 0x05c7, 2},
+ {0x0610, 0x061a, 1},
+ {0x064b, 0x065f, 1},
+ {0x0670, 0x06d6, 102},
+ {0x06d7, 0x06dc, 1},
+ {0x06df, 0x06e4, 1},
+ {0x06e7, 0x06e8, 1},
+ {0x06ea, 0x06ed, 1},
+ {0x0711, 0x0730, 31},
+ {0x0731, 0x074a, 1},
+ {0x07a6, 0x07b0, 1},
+ {0x07eb, 0x07f3, 1},
+ {0x0816, 0x0819, 1},
+ {0x081b, 0x0823, 1},
+ {0x0825, 0x0827, 1},
+ {0x0829, 0x082d, 1},
+ {0x0859, 0x085b, 1},
+ {0x0900, 0x0902, 1},
+ {0x093a, 0x093c, 2},
+ {0x0941, 0x0948, 1},
+ {0x094d, 0x0951, 4},
+ {0x0952, 0x0957, 1},
+ {0x0962, 0x0963, 1},
+ {0x0981, 0x09bc, 59},
+ {0x09c1, 0x09c4, 1},
+ {0x09cd, 0x09e2, 21},
+ {0x09e3, 0x0a01, 30},
+ {0x0a02, 0x0a3c, 58},
+ {0x0a41, 0x0a42, 1},
+ {0x0a47, 0x0a48, 1},
+ {0x0a4b, 0x0a4d, 1},
+ {0x0a51, 0x0a70, 31},
+ {0x0a71, 0x0a75, 4},
+ {0x0a81, 0x0a82, 1},
+ {0x0abc, 0x0ac1, 5},
+ {0x0ac2, 0x0ac5, 1},
+ {0x0ac7, 0x0ac8, 1},
+ {0x0acd, 0x0ae2, 21},
+ {0x0ae3, 0x0b01, 30},
+ {0x0b3c, 0x0b3f, 3},
+ {0x0b41, 0x0b44, 1},
+ {0x0b4d, 0x0b56, 9},
+ {0x0b62, 0x0b63, 1},
+ {0x0b82, 0x0bc0, 62},
+ {0x0bcd, 0x0c3e, 113},
+ {0x0c3f, 0x0c40, 1},
+ {0x0c46, 0x0c48, 1},
+ {0x0c4a, 0x0c4d, 1},
+ {0x0c55, 0x0c56, 1},
+ {0x0c62, 0x0c63, 1},
+ {0x0cbc, 0x0cbf, 3},
+ {0x0cc6, 0x0ccc, 6},
+ {0x0ccd, 0x0ce2, 21},
+ {0x0ce3, 0x0d41, 94},
+ {0x0d42, 0x0d44, 1},
+ {0x0d4d, 0x0d62, 21},
+ {0x0d63, 0x0dca, 103},
+ {0x0dd2, 0x0dd4, 1},
+ {0x0dd6, 0x0e31, 91},
+ {0x0e34, 0x0e3a, 1},
+ {0x0e47, 0x0e4e, 1},
+ {0x0eb1, 0x0eb4, 3},
+ {0x0eb5, 0x0eb9, 1},
+ {0x0ebb, 0x0ebc, 1},
+ {0x0ec8, 0x0ecd, 1},
+ {0x0f18, 0x0f19, 1},
+ {0x0f35, 0x0f39, 2},
+ {0x0f71, 0x0f7e, 1},
+ {0x0f80, 0x0f84, 1},
+ {0x0f86, 0x0f87, 1},
+ {0x0f8d, 0x0f97, 1},
+ {0x0f99, 0x0fbc, 1},
+ {0x0fc6, 0x102d, 103},
+ {0x102e, 0x1030, 1},
+ {0x1032, 0x1037, 1},
+ {0x1039, 0x103a, 1},
+ {0x103d, 0x103e, 1},
+ {0x1058, 0x1059, 1},
+ {0x105e, 0x1060, 1},
+ {0x1071, 0x1074, 1},
+ {0x1082, 0x1085, 3},
+ {0x1086, 0x108d, 7},
+ {0x109d, 0x135d, 704},
+ {0x135e, 0x135f, 1},
+ {0x1712, 0x1714, 1},
+ {0x1732, 0x1734, 1},
+ {0x1752, 0x1753, 1},
+ {0x1772, 0x1773, 1},
+ {0x17b7, 0x17bd, 1},
+ {0x17c6, 0x17c9, 3},
+ {0x17ca, 0x17d3, 1},
+ {0x17dd, 0x180b, 46},
+ {0x180c, 0x180d, 1},
+ {0x18a9, 0x1920, 119},
+ {0x1921, 0x1922, 1},
+ {0x1927, 0x1928, 1},
+ {0x1932, 0x1939, 7},
+ {0x193a, 0x193b, 1},
+ {0x1a17, 0x1a18, 1},
+ {0x1a56, 0x1a58, 2},
+ {0x1a59, 0x1a5e, 1},
+ {0x1a60, 0x1a62, 2},
+ {0x1a65, 0x1a6c, 1},
+ {0x1a73, 0x1a7c, 1},
+ {0x1a7f, 0x1b00, 129},
+ {0x1b01, 0x1b03, 1},
+ {0x1b34, 0x1b36, 2},
+ {0x1b37, 0x1b3a, 1},
+ {0x1b3c, 0x1b42, 6},
+ {0x1b6b, 0x1b73, 1},
+ {0x1b80, 0x1b81, 1},
+ {0x1ba2, 0x1ba5, 1},
+ {0x1ba8, 0x1ba9, 1},
+ {0x1be6, 0x1be8, 2},
+ {0x1be9, 0x1bed, 4},
+ {0x1bef, 0x1bf1, 1},
+ {0x1c2c, 0x1c33, 1},
+ {0x1c36, 0x1c37, 1},
+ {0x1cd0, 0x1cd2, 1},
+ {0x1cd4, 0x1ce0, 1},
+ {0x1ce2, 0x1ce8, 1},
+ {0x1ced, 0x1dc0, 211},
+ {0x1dc1, 0x1de6, 1},
+ {0x1dfc, 0x1dff, 1},
+ {0x20d0, 0x20dc, 1},
+ {0x20e1, 0x20e5, 4},
+ {0x20e6, 0x20f0, 1},
+ {0x2cef, 0x2cf1, 1},
+ {0x2d7f, 0x2de0, 97},
+ {0x2de1, 0x2dff, 1},
+ {0x302a, 0x302f, 1},
+ {0x3099, 0x309a, 1},
+ {0xa66f, 0xa67c, 13},
+ {0xa67d, 0xa6f0, 115},
+ {0xa6f1, 0xa802, 273},
+ {0xa806, 0xa80b, 5},
+ {0xa825, 0xa826, 1},
+ {0xa8c4, 0xa8e0, 28},
+ {0xa8e1, 0xa8f1, 1},
+ {0xa926, 0xa92d, 1},
+ {0xa947, 0xa951, 1},
+ {0xa980, 0xa982, 1},
+ {0xa9b3, 0xa9b6, 3},
+ {0xa9b7, 0xa9b9, 1},
+ {0xa9bc, 0xaa29, 109},
+ {0xaa2a, 0xaa2e, 1},
+ {0xaa31, 0xaa32, 1},
+ {0xaa35, 0xaa36, 1},
+ {0xaa43, 0xaa4c, 9},
+ {0xaab0, 0xaab2, 2},
+ {0xaab3, 0xaab4, 1},
+ {0xaab7, 0xaab8, 1},
+ {0xaabe, 0xaabf, 1},
+ {0xaac1, 0xabe5, 292},
+ {0xabe8, 0xabed, 5},
+ {0xfb1e, 0xfe00, 738},
+ {0xfe01, 0xfe0f, 1},
+ {0xfe20, 0xfe26, 1},
+ },
+ R32: []Range32{
+ {0x101fd, 0x10a01, 2052},
+ {0x10a02, 0x10a03, 1},
+ {0x10a05, 0x10a06, 1},
+ {0x10a0c, 0x10a0f, 1},
+ {0x10a38, 0x10a3a, 1},
+ {0x10a3f, 0x11001, 1474},
+ {0x11038, 0x11046, 1},
+ {0x11080, 0x11081, 1},
+ {0x110b3, 0x110b6, 1},
+ {0x110b9, 0x110ba, 1},
+ {0x1d167, 0x1d169, 1},
+ {0x1d17b, 0x1d182, 1},
+ {0x1d185, 0x1d18b, 1},
+ {0x1d1aa, 0x1d1ad, 1},
+ {0x1d242, 0x1d244, 1},
+ {0xe0100, 0xe01ef, 1},
+ },
+}
+
+var _Zl = &RangeTable{
+ R16: []Range16{
+ {0x2028, 0x2028, 1},
+ },
+}
+
var _Zp = &RangeTable{
R16: []Range16{
{0x2029, 0x2029, 1},
@@ -2068,40 +2702,53 @@
}
var (
- Cc = _Cc // Cc is the set of Unicode characters in category Cc.
- Cf = _Cf // Cf is the set of Unicode characters in category Cf.
- Co = _Co // Co is the set of Unicode characters in category Co.
- Cs = _Cs // Cs is the set of Unicode characters in category Cs.
- Digit = _Nd // Digit is the set of Unicode characters with the "decimal digit" property.
- Nd = _Nd // Nd is the set of Unicode characters in category Nd.
- Letter = letter // Letter is the set of Unicode letters.
- Lm = _Lm // Lm is the set of Unicode characters in category Lm.
- Lo = _Lo // Lo is the set of Unicode characters in category Lo.
- Lower = _Ll // Lower is the set of Unicode lower case letters.
- Ll = _Ll // Ll is the set of Unicode characters in category Ll.
- Mc = _Mc // Mc is the set of Unicode characters in category Mc.
- Me = _Me // Me is the set of Unicode characters in category Me.
- Mn = _Mn // Mn is the set of Unicode characters in category Mn.
- Nl = _Nl // Nl is the set of Unicode characters in category Nl.
- No = _No // No is the set of Unicode characters in category No.
- Pc = _Pc // Pc is the set of Unicode characters in category Pc.
- Pd = _Pd // Pd is the set of Unicode characters in category Pd.
- Pe = _Pe // Pe is the set of Unicode characters in category Pe.
- Pf = _Pf // Pf is the set of Unicode characters in category Pf.
- Pi = _Pi // Pi is the set of Unicode characters in category Pi.
- Po = _Po // Po is the set of Unicode characters in category Po.
- Ps = _Ps // Ps is the set of Unicode characters in category Ps.
- Sc = _Sc // Sc is the set of Unicode characters in category Sc.
- Sk = _Sk // Sk is the set of Unicode characters in category Sk.
- Sm = _Sm // Sm is the set of Unicode characters in category Sm.
- So = _So // So is the set of Unicode characters in category So.
- Title = _Lt // Title is the set of Unicode title case letters.
- Lt = _Lt // Lt is the set of Unicode characters in category Lt.
- Upper = _Lu // Upper is the set of Unicode upper case letters.
- Lu = _Lu // Lu is the set of Unicode characters in category Lu.
- Zl = _Zl // Zl is the set of Unicode characters in category Zl.
- Zp = _Zp // Zp is the set of Unicode characters in category Zp.
- Zs = _Zs // Zs is the set of Unicode characters in category Zs.
+ Cc = _Cc // Cc is the set of Unicode characters in category Cc.
+ Cf = _Cf // Cf is the set of Unicode characters in category Cf.
+ Co = _Co // Co is the set of Unicode characters in category Co.
+ Cs = _Cs // Cs is the set of Unicode characters in category Cs.
+ Digit = _Nd // Digit is the set of Unicode characters with the "decimal digit" property.
+ Nd = _Nd // Nd is the set of Unicode characters in category Nd.
+ Letter = _L // Letter/L is the set of Unicode letters, category L.
+ L = _L
+ Lm = _Lm // Lm is the set of Unicode characters in category Lm.
+ Lo = _Lo // Lo is the set of Unicode characters in category Lo.
+ Lower = _Ll // Lower is the set of Unicode lower case letters.
+ Ll = _Ll // Ll is the set of Unicode characters in category Ll.
+ Mark = _M // Mark/M is the set of Unicode mark characters, category M.
+ M = _M
+ Mc = _Mc // Mc is the set of Unicode characters in category Mc.
+ Me = _Me // Me is the set of Unicode characters in category Me.
+ Mn = _Mn // Mn is the set of Unicode characters in category Mn.
+ Nl = _Nl // Nl is the set of Unicode characters in category Nl.
+ No = _No // No is the set of Unicode characters in category No.
+ Number = _N // Number/N is the set of Unicode number characters, category N.
+ N = _N
+ Other = _C // Other/C is the set of Unicode control and special characters, category C.
+ C = _C
+ Pc = _Pc // Pc is the set of Unicode characters in category Pc.
+ Pd = _Pd // Pd is the set of Unicode characters in category Pd.
+ Pe = _Pe // Pe is the set of Unicode characters in category Pe.
+ Pf = _Pf // Pf is the set of Unicode characters in category Pf.
+ Pi = _Pi // Pi is the set of Unicode characters in category Pi.
+ Po = _Po // Po is the set of Unicode characters in category Po.
+ Ps = _Ps // Ps is the set of Unicode characters in category Ps.
+ Punct = _P // Punct/P is the set of Unicode punctuation characters, category P.
+ P = _P
+ Sc = _Sc // Sc is the set of Unicode characters in category Sc.
+ Sk = _Sk // Sk is the set of Unicode characters in category Sk.
+ Sm = _Sm // Sm is the set of Unicode characters in category Sm.
+ So = _So // So is the set of Unicode characters in category So.
+ Space = _Z // Space/Z is the set of Unicode space characters, category Z.
+ Z = _Z
+ Symbol = _S // Symbol/S is the set of Unicode symbol characters, category S.
+ S = _S
+ Title = _Lt // Title is the set of Unicode title case letters.
+ Lt = _Lt // Lt is the set of Unicode characters in category Lt.
+ Upper = _Lu // Upper is the set of Unicode upper case letters.
+ Lu = _Lu // Lu is the set of Unicode characters in category Lu.
+ Zl = _Zl // Zl is the set of Unicode characters in category Zl.
+ Zp = _Zp // Zp is the set of Unicode characters in category Zp.
+ Zs = _Zs // Zs is the set of Unicode characters in category Zs.
)
// Generated by running
@@ -4764,6 +5411,264 @@
{0x10400, 0x10427, d{0, 40, 0}},
{0x10428, 0x1044F, d{-40, 0, -40}},
}
+var properties = [Latin1Max]uint8{
+ 0x00: pC, // '\x00'
+ 0x01: pC, // '\x01'
+ 0x02: pC, // '\x02'
+ 0x03: pC, // '\x03'
+ 0x04: pC, // '\x04'
+ 0x05: pC, // '\x05'
+ 0x06: pC, // '\x06'
+ 0x07: pC, // '\a'
+ 0x08: pC, // '\b'
+ 0x09: pC, // '\t'
+ 0x0A: pC, // '\n'
+ 0x0B: pC, // '\v'
+ 0x0C: pC, // '\f'
+ 0x0D: pC, // '\r'
+ 0x0E: pC, // '\x0e'
+ 0x0F: pC, // '\x0f'
+ 0x10: pC, // '\x10'
+ 0x11: pC, // '\x11'
+ 0x12: pC, // '\x12'
+ 0x13: pC, // '\x13'
+ 0x14: pC, // '\x14'
+ 0x15: pC, // '\x15'
+ 0x16: pC, // '\x16'
+ 0x17: pC, // '\x17'
+ 0x18: pC, // '\x18'
+ 0x19: pC, // '\x19'
+ 0x1A: pC, // '\x1a'
+ 0x1B: pC, // '\x1b'
+ 0x1C: pC, // '\x1c'
+ 0x1D: pC, // '\x1d'
+ 0x1E: pC, // '\x1e'
+ 0x1F: pC, // '\x1f'
+ 0x20: pZ | pp, // ' '
+ 0x21: pP | pp, // '!'
+ 0x22: pP | pp, // '"'
+ 0x23: pP | pp, // '#'
+ 0x24: pS | pp, // '$'
+ 0x25: pP | pp, // '%'
+ 0x26: pP | pp, // '&'
+ 0x27: pP | pp, // '\''
+ 0x28: pP | pp, // '('
+ 0x29: pP | pp, // ')'
+ 0x2A: pP | pp, // '*'
+ 0x2B: pS | pp, // '+'
+ 0x2C: pP | pp, // ','
+ 0x2D: pP | pp, // '-'
+ 0x2E: pP | pp, // '.'
+ 0x2F: pP | pp, // '/'
+ 0x30: pN | pp, // '0'
+ 0x31: pN | pp, // '1'
+ 0x32: pN | pp, // '2'
+ 0x33: pN | pp, // '3'
+ 0x34: pN | pp, // '4'
+ 0x35: pN | pp, // '5'
+ 0x36: pN | pp, // '6'
+ 0x37: pN | pp, // '7'
+ 0x38: pN | pp, // '8'
+ 0x39: pN | pp, // '9'
+ 0x3A: pP | pp, // ':'
+ 0x3B: pP | pp, // ';'
+ 0x3C: pS | pp, // '<'
+ 0x3D: pS | pp, // '='
+ 0x3E: pS | pp, // '>'
+ 0x3F: pP | pp, // '?'
+ 0x40: pP | pp, // '@'
+ 0x41: pLu | pp, // 'A'
+ 0x42: pLu | pp, // 'B'
+ 0x43: pLu | pp, // 'C'
+ 0x44: pLu | pp, // 'D'
+ 0x45: pLu | pp, // 'E'
+ 0x46: pLu | pp, // 'F'
+ 0x47: pLu | pp, // 'G'
+ 0x48: pLu | pp, // 'H'
+ 0x49: pLu | pp, // 'I'
+ 0x4A: pLu | pp, // 'J'
+ 0x4B: pLu | pp, // 'K'
+ 0x4C: pLu | pp, // 'L'
+ 0x4D: pLu | pp, // 'M'
+ 0x4E: pLu | pp, // 'N'
+ 0x4F: pLu | pp, // 'O'
+ 0x50: pLu | pp, // 'P'
+ 0x51: pLu | pp, // 'Q'
+ 0x52: pLu | pp, // 'R'
+ 0x53: pLu | pp, // 'S'
+ 0x54: pLu | pp, // 'T'
+ 0x55: pLu | pp, // 'U'
+ 0x56: pLu | pp, // 'V'
+ 0x57: pLu | pp, // 'W'
+ 0x58: pLu | pp, // 'X'
+ 0x59: pLu | pp, // 'Y'
+ 0x5A: pLu | pp, // 'Z'
+ 0x5B: pP | pp, // '['
+ 0x5C: pP | pp, // '\\'
+ 0x5D: pP | pp, // ']'
+ 0x5E: pS | pp, // '^'
+ 0x5F: pP | pp, // '_'
+ 0x60: pS | pp, // '`'
+ 0x61: pLl | pp, // 'a'
+ 0x62: pLl | pp, // 'b'
+ 0x63: pLl | pp, // 'c'
+ 0x64: pLl | pp, // 'd'
+ 0x65: pLl | pp, // 'e'
+ 0x66: pLl | pp, // 'f'
+ 0x67: pLl | pp, // 'g'
+ 0x68: pLl | pp, // 'h'
+ 0x69: pLl | pp, // 'i'
+ 0x6A: pLl | pp, // 'j'
+ 0x6B: pLl | pp, // 'k'
+ 0x6C: pLl | pp, // 'l'
+ 0x6D: pLl | pp, // 'm'
+ 0x6E: pLl | pp, // 'n'
+ 0x6F: pLl | pp, // 'o'
+ 0x70: pLl | pp, // 'p'
+ 0x71: pLl | pp, // 'q'
+ 0x72: pLl | pp, // 'r'
+ 0x73: pLl | pp, // 's'
+ 0x74: pLl | pp, // 't'
+ 0x75: pLl | pp, // 'u'
+ 0x76: pLl | pp, // 'v'
+ 0x77: pLl | pp, // 'w'
+ 0x78: pLl | pp, // 'x'
+ 0x79: pLl | pp, // 'y'
+ 0x7A: pLl | pp, // 'z'
+ 0x7B: pP | pp, // '{'
+ 0x7C: pS | pp, // '|'
+ 0x7D: pP | pp, // '}'
+ 0x7E: pS | pp, // '~'
+ 0x7F: pC, // '\x7f'
+ 0x80: pC, // '\u0080'
+ 0x81: pC, // '\u0081'
+ 0x82: pC, // '\u0082'
+ 0x83: pC, // '\u0083'
+ 0x84: pC, // '\u0084'
+ 0x85: pC, // '\u0085'
+ 0x86: pC, // '\u0086'
+ 0x87: pC, // '\u0087'
+ 0x88: pC, // '\u0088'
+ 0x89: pC, // '\u0089'
+ 0x8A: pC, // '\u008a'
+ 0x8B: pC, // '\u008b'
+ 0x8C: pC, // '\u008c'
+ 0x8D: pC, // '\u008d'
+ 0x8E: pC, // '\u008e'
+ 0x8F: pC, // '\u008f'
+ 0x90: pC, // '\u0090'
+ 0x91: pC, // '\u0091'
+ 0x92: pC, // '\u0092'
+ 0x93: pC, // '\u0093'
+ 0x94: pC, // '\u0094'
+ 0x95: pC, // '\u0095'
+ 0x96: pC, // '\u0096'
+ 0x97: pC, // '\u0097'
+ 0x98: pC, // '\u0098'
+ 0x99: pC, // '\u0099'
+ 0x9A: pC, // '\u009a'
+ 0x9B: pC, // '\u009b'
+ 0x9C: pC, // '\u009c'
+ 0x9D: pC, // '\u009d'
+ 0x9E: pC, // '\u009e'
+ 0x9F: pC, // '\u009f'
+ 0xA0: pZ, // '\u00a0'
+ 0xA1: pP | pp, // '\u00a1'
+ 0xA2: pS | pp, // '\u00a2'
+ 0xA3: pS | pp, // '\u00a3'
+ 0xA4: pS | pp, // '\u00a4'
+ 0xA5: pS | pp, // '\u00a5'
+ 0xA6: pS | pp, // '\u00a6'
+ 0xA7: pS | pp, // '\u00a7'
+ 0xA8: pS | pp, // '\u00a8'
+ 0xA9: pS | pp, // '\u00a9'
+ 0xAA: pLl | pp, // '\u00aa'
+ 0xAB: pP | pp, // '\u00ab'
+ 0xAC: pS | pp, // '\u00ac'
+ 0xAD: 0, // '\u00ad'
+ 0xAE: pS | pp, // '\u00ae'
+ 0xAF: pS | pp, // '\u00af'
+ 0xB0: pS | pp, // '\u00b0'
+ 0xB1: pS | pp, // '\u00b1'
+ 0xB2: pN | pp, // '\u00b2'
+ 0xB3: pN | pp, // '\u00b3'
+ 0xB4: pS | pp, // '\u00b4'
+ 0xB5: pLl | pp, // '\u00b5'
+ 0xB6: pS | pp, // '\u00b6'
+ 0xB7: pP | pp, // '\u00b7'
+ 0xB8: pS | pp, // '\u00b8'
+ 0xB9: pN | pp, // '\u00b9'
+ 0xBA: pLl | pp, // '\u00ba'
+ 0xBB: pP | pp, // '\u00bb'
+ 0xBC: pN | pp, // '\u00bc'
+ 0xBD: pN | pp, // '\u00bd'
+ 0xBE: pN | pp, // '\u00be'
+ 0xBF: pP | pp, // '\u00bf'
+ 0xC0: pLu | pp, // '\u00c0'
+ 0xC1: pLu | pp, // '\u00c1'
+ 0xC2: pLu | pp, // '\u00c2'
+ 0xC3: pLu | pp, // '\u00c3'
+ 0xC4: pLu | pp, // '\u00c4'
+ 0xC5: pLu | pp, // '\u00c5'
+ 0xC6: pLu | pp, // '\u00c6'
+ 0xC7: pLu | pp, // '\u00c7'
+ 0xC8: pLu | pp, // '\u00c8'
+ 0xC9: pLu | pp, // '\u00c9'
+ 0xCA: pLu | pp, // '\u00ca'
+ 0xCB: pLu | pp, // '\u00cb'
+ 0xCC: pLu | pp, // '\u00cc'
+ 0xCD: pLu | pp, // '\u00cd'
+ 0xCE: pLu | pp, // '\u00ce'
+ 0xCF: pLu | pp, // '\u00cf'
+ 0xD0: pLu | pp, // '\u00d0'
+ 0xD1: pLu | pp, // '\u00d1'
+ 0xD2: pLu | pp, // '\u00d2'
+ 0xD3: pLu | pp, // '\u00d3'
+ 0xD4: pLu | pp, // '\u00d4'
+ 0xD5: pLu | pp, // '\u00d5'
+ 0xD6: pLu | pp, // '\u00d6'
+ 0xD7: pS | pp, // '\u00d7'
+ 0xD8: pLu | pp, // '\u00d8'
+ 0xD9: pLu | pp, // '\u00d9'
+ 0xDA: pLu | pp, // '\u00da'
+ 0xDB: pLu | pp, // '\u00db'
+ 0xDC: pLu | pp, // '\u00dc'
+ 0xDD: pLu | pp, // '\u00dd'
+ 0xDE: pLu | pp, // '\u00de'
+ 0xDF: pLl | pp, // '\u00df'
+ 0xE0: pLl | pp, // '\u00e0'
+ 0xE1: pLl | pp, // '\u00e1'
+ 0xE2: pLl | pp, // '\u00e2'
+ 0xE3: pLl | pp, // '\u00e3'
+ 0xE4: pLl | pp, // '\u00e4'
+ 0xE5: pLl | pp, // '\u00e5'
+ 0xE6: pLl | pp, // '\u00e6'
+ 0xE7: pLl | pp, // '\u00e7'
+ 0xE8: pLl | pp, // '\u00e8'
+ 0xE9: pLl | pp, // '\u00e9'
+ 0xEA: pLl | pp, // '\u00ea'
+ 0xEB: pLl | pp, // '\u00eb'
+ 0xEC: pLl | pp, // '\u00ec'
+ 0xED: pLl | pp, // '\u00ed'
+ 0xEE: pLl | pp, // '\u00ee'
+ 0xEF: pLl | pp, // '\u00ef'
+ 0xF0: pLl | pp, // '\u00f0'
+ 0xF1: pLl | pp, // '\u00f1'
+ 0xF2: pLl | pp, // '\u00f2'
+ 0xF3: pLl | pp, // '\u00f3'
+ 0xF4: pLl | pp, // '\u00f4'
+ 0xF5: pLl | pp, // '\u00f5'
+ 0xF6: pLl | pp, // '\u00f6'
+ 0xF7: pS | pp, // '\u00f7'
+ 0xF8: pLl | pp, // '\u00f8'
+ 0xF9: pLl | pp, // '\u00f9'
+ 0xFA: pLl | pp, // '\u00fa'
+ 0xFB: pLl | pp, // '\u00fb'
+ 0xFC: pLl | pp, // '\u00fc'
+ 0xFD: pLl | pp, // '\u00fd'
+ 0xFE: pLl | pp, // '\u00fe'
+ 0xFF: pLl | pp, // '\u00ff'
+}
-// Range entries: 2715 16-bit, 545 32-bit, 3260 total.
-// Range bytes: 16290 16-bit, 6540 32-bit, 22830 total.
+// Range entries: 3190 16-bit, 657 32-bit, 3847 total.
+// Range bytes: 19140 16-bit, 7884 32-bit, 27024 total.