internal/ucd: added Parse function
This implements a common pattern. Replaced other occurrences.
This is partly made possible by non-rune fields being handled
even if KeepRanges is not set (see cases).
Change-Id: Ic32f8ce73a90a864d57df5a1c7c8a7d73d06b9e5
Reviewed-on: https://go-review.googlesource.com/17350
Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
diff --git a/bidi/gen.go b/bidi/gen.go
index 8bdcb17..74bf76b 100644
--- a/bidi/gen.go
+++ b/bidi/gen.go
@@ -72,7 +72,7 @@
xorMap := map[rune]int{}
xorMasks := []rune{0} // First value is no-op.
- parse("BidiBrackets.txt", func(p *ucd.Parser) {
+ ucd.Parse(gen.OpenUCDFile("BidiBrackets.txt"), func(p *ucd.Parser) {
r1 := p.Rune(0)
r2 := p.Rune(1)
xor := r1 ^ r2
@@ -106,7 +106,7 @@
}
// Insert the derived BiDi properties.
- parse("extracted/DerivedBidiClass.txt", func(p *ucd.Parser) {
+ ucd.Parse(gen.OpenUCDFile("extracted/DerivedBidiClass.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
class, ok := bidiClass[p.String(1)]
if !ok {
diff --git a/bidi/gen_ranges.go b/bidi/gen_ranges.go
index a0e6a04..9f4c335 100644
--- a/bidi/gen_ranges.go
+++ b/bidi/gen_ranges.go
@@ -7,7 +7,6 @@
package main
import (
- "log"
"unicode"
"golang.org/x/text/internal/gen"
@@ -41,7 +40,7 @@
rangetable.Visit(unicode.Noncharacter_Code_Point, func(r rune) {
fn(r, _BN) // Boundary Neutral
})
- parse("DerivedCoreProperties.txt", func(p *ucd.Parser) {
+ ucd.Parse(gen.OpenUCDFile("DerivedCoreProperties.txt"), func(p *ucd.Parser) {
if p.String(1) == "Default_Ignorable_Code_Point" {
fn(p.Rune(0), _BN) // Boundary Neutral
}
@@ -56,17 +55,3 @@
}
}
}
-
-// parse calls f for each entry in the given UCD file.
-func parse(filename string, f func(p *ucd.Parser)) {
- r := gen.OpenUCDFile(filename)
- defer r.Close()
-
- p := ucd.New(r)
- for p.Next() {
- f(p)
- }
- if err := p.Err(); err != nil {
- log.Fatal(err)
- }
-}
diff --git a/bidi/ranges_test.go b/bidi/ranges_test.go
index a4ba376..dadbe95 100644
--- a/bidi/ranges_test.go
+++ b/bidi/ranges_test.go
@@ -3,7 +3,6 @@
package bidi
import (
- "log"
"unicode"
"golang.org/x/text/internal/gen"
@@ -37,7 +36,7 @@
rangetable.Visit(unicode.Noncharacter_Code_Point, func(r rune) {
fn(r, _BN) // Boundary Neutral
})
- parse("DerivedCoreProperties.txt", func(p *ucd.Parser) {
+ ucd.Parse(gen.OpenUCDFile("DerivedCoreProperties.txt"), func(p *ucd.Parser) {
if p.String(1) == "Default_Ignorable_Code_Point" {
fn(p.Rune(0), _BN) // Boundary Neutral
}
@@ -52,17 +51,3 @@
}
}
}
-
-// parse calls f for each entry in the given UCD file.
-func parse(filename string, f func(p *ucd.Parser)) {
- r := gen.OpenUCDFile(filename)
- defer r.Close()
-
- p := ucd.New(r)
- for p.Next() {
- f(p)
- }
- if err := p.Err(); err != nil {
- log.Fatal(err)
- }
-}
diff --git a/bidi/tables_test.go b/bidi/tables_test.go
index 10d17ba..b2ad39b 100644
--- a/bidi/tables_test.go
+++ b/bidi/tables_test.go
@@ -51,7 +51,7 @@
trie := newBidiTrie(0)
- parse("BidiBrackets.txt", func(p *ucd.Parser) {
+ ucd.Parse(gen.OpenUCDFile("BidiBrackets.txt"), func(p *ucd.Parser) {
r1 := p.Rune(0)
want := p.Rune(1)
@@ -71,7 +71,7 @@
}
// Insert the derived BiDi properties.
- parse("extracted/DerivedBidiClass.txt", func(p *ucd.Parser) {
+ ucd.Parse(gen.OpenUCDFile("extracted/DerivedBidiClass.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
test("derived", r, p.String(1))
})
diff --git a/cases/gen.go b/cases/gen.go
index fcc2633..1b6fdf5 100644
--- a/cases/gen.go
+++ b/cases/gen.go
@@ -90,22 +90,8 @@
return string(r.Rune)
}
-// ucdParser is a parser for UCD files.
-type ucdParser []ucd.Option
-
-func parser(opts ...ucd.Option) ucdParser { return ucdParser(opts) }
-
-// parse calls f for each entry in the given UCD file.
-func (opts ucdParser) parse(filename string, f func(p *ucd.Parser)) {
- r := gen.OpenUCDFile(filename)
- defer r.Close()
- p := ucd.New(r, opts...)
- for p.Next() {
- f(p)
- }
- if err := p.Err(); err != nil {
- log.Fatal(err)
- }
+func parse(file string, f func(p *ucd.Parser)) {
+ ucd.Parse(gen.OpenUCDFile(file), f)
}
func parseUCD() []runeInfo {
@@ -117,7 +103,7 @@
return c
}
- parser().parse("UnicodeData.txt", func(p *ucd.Parser) {
+ parse("UnicodeData.txt", func(p *ucd.Parser) {
ri := get(p.Rune(0))
ri.CCC = byte(p.Int(ucd.CanonicalCombiningClass))
ri.Simple[cLower] = p.Runes(ucd.SimpleLowercaseMapping)
@@ -129,14 +115,14 @@
})
// <code>; <property>
- parser().parse("PropList.txt", func(p *ucd.Parser) {
+ parse("PropList.txt", func(p *ucd.Parser) {
if p.String(1) == "Soft_Dotted" {
chars[p.Rune(0)].SoftDotted = true
}
})
// <code>; <word break type>
- parser().parse("DerivedCoreProperties.txt", func(p *ucd.Parser) {
+ parse("DerivedCoreProperties.txt", func(p *ucd.Parser) {
ri := get(p.Rune(0))
switch p.String(1) {
case "Case_Ignorable":
@@ -151,7 +137,7 @@
})
// <code>; <lower> ; <title> ; <upper> ; (<condition_list> ;)?
- parser().parse("SpecialCasing.txt", func(p *ucd.Parser) {
+ parse("SpecialCasing.txt", func(p *ucd.Parser) {
// We drop all conditional special casing and deal with them manually in
// the language-specific case mappers. Rune 0x03A3 is the only one with
// a conditional formatting that is not language-specific. However,
@@ -170,7 +156,7 @@
// TODO: Use text breaking according to UAX #29.
// <code>; <word break type>
- parser().parse("auxiliary/WordBreakProperty.txt", func(p *ucd.Parser) {
+ parse("auxiliary/WordBreakProperty.txt", func(p *ucd.Parser) {
ri := get(p.Rune(0))
ri.BreakType = p.String(1)
@@ -185,7 +171,7 @@
// TODO: Support case folding.
// // <code>; <status>; <mapping>;
- // parser().parse("CaseFolding.txt", func (p *ucd.Parser) {
+ // parse("CaseFolding.txt", func (p *ucd.Parser) {
// ri := get(p.Rune(0))
// switch p.String(1) {
// case "C":
@@ -647,7 +633,7 @@
// <code>; <lower> ; <title> ; <upper> ; (<condition_list> ;)?
fmt.Fprintln(w, "\tspecial = map[rune]struct{ toLower, toTitle, toUpper string }{")
- parser().parse("SpecialCasing.txt", func(p *ucd.Parser) {
+ parse("SpecialCasing.txt", func(p *ucd.Parser) {
// Skip conditional entries.
if p.String(4) != "" {
return
@@ -660,7 +646,7 @@
// Break property
notBreak := map[rune]bool{}
- parser().parse("auxiliary/WordBreakProperty.txt", func(p *ucd.Parser) {
+ parse("auxiliary/WordBreakProperty.txt", func(p *ucd.Parser) {
switch p.String(1) {
case "Extend", "Format", "MidLetter", "MidNumLet", "Single_Quote",
"ALetter", "Hebrew_Letter", "Numeric", "ExtendNumLet":
@@ -688,14 +674,14 @@
// Word break test
// Filter out all samples that do not contain cased characters.
cased := map[rune]bool{}
- parser().parse("DerivedCoreProperties.txt", func(p *ucd.Parser) {
+ parse("DerivedCoreProperties.txt", func(p *ucd.Parser) {
if p.String(1) == "Cased" {
cased[p.Rune(0)] = true
}
})
fmt.Fprintln(w, "\tbreakTest = []string{")
- parser(ucd.KeepRanges).parse("auxiliary/WordBreakTest.txt", func(p *ucd.Parser) {
+ parse("auxiliary/WordBreakTest.txt", func(p *ucd.Parser) {
c := strings.Split(p.String(0), " ")
const sep = '|'
@@ -771,7 +757,7 @@
varNameParts := strings.Split(property, "_")
varNameParts[0] = strings.ToLower(varNameParts[0])
fmt.Fprintf(w, "\t%s = map[rune]bool{\n", strings.Join(varNameParts, ""))
- parser().parse(file, func(p *ucd.Parser) {
+ parse(file, func(p *ucd.Parser) {
if p.String(1) == property {
r := p.Rune(0)
verify[r] = true
diff --git a/internal/ucd/ucd.go b/internal/ucd/ucd.go
index da2ffa5..2b0d1a1 100644
--- a/internal/ucd/ucd.go
+++ b/internal/ucd/ucd.go
@@ -15,6 +15,7 @@
"errors"
"fmt"
"io"
+ "log"
"regexp"
"strconv"
"strings"
@@ -39,6 +40,23 @@
SimpleTitlecaseMapping
)
+// Parse calls f for each entry in the given reader of a UCD file. It will close
+// the reader upon return. It will call log.Fatal if any error occurred.
+//
+// This implements the most common usage pattern of using Parser.
+func Parse(r io.ReadCloser, f func(p *Parser)) {
+ defer r.Close()
+
+ p := New(r)
+ for p.Next() {
+ f(p)
+ }
+ if err := p.Err(); err != nil {
+ r.Close() // os.Exit will cause defers not to be called.
+ log.Fatal(err)
+ }
+}
+
// An Option is used to configure a Parser.
type Option func(p *Parser)
diff --git a/unicode/rangetable/gen.go b/unicode/rangetable/gen.go
index a6ea172..bea49dd 100644
--- a/unicode/rangetable/gen.go
+++ b/unicode/rangetable/gen.go
@@ -74,7 +74,8 @@
for _, v := range versions {
assigned := []rune{}
- parse(v, func(p *ucd.Parser) {
+ r := gen.Open("http://www.unicode.org/Public/", "", v+"/ucd/UnicodeData.txt")
+ ucd.Parse(r, func(p *ucd.Parser) {
assigned = append(assigned, p.Rune(0))
})
@@ -95,20 +96,6 @@
gen.WriteGoFile("tables.go", "rangetable", w.Bytes())
}
-// parse calls f for each entry in the given UCD file.
-func parse(version string, f func(p *ucd.Parser)) {
- r := gen.Open("http://www.unicode.org/Public/", "", version+"/ucd/UnicodeData.txt")
- defer r.Close()
-
- p := ucd.New(r)
- for p.Next() {
- f(p)
- }
- if err := p.Err(); err != nil {
- log.Fatal(err)
- }
-}
-
func print(w io.Writer, rt *unicode.RangeTable) {
fmt.Fprintln(w, "&unicode.RangeTable{")
fmt.Fprintln(w, "\tR16: []unicode.Range16{")
diff --git a/width/common_test.go b/width/common_test.go
index 0959b66..fb5545e 100644
--- a/width/common_test.go
+++ b/width/common_test.go
@@ -56,7 +56,7 @@
// We cannot reuse package norm's decomposition, as we need an unexpanded
// decomposition. We make use of the opportunity to verify that the
// decomposition type is as expected.
- parse("UnicodeData.txt", func(p *ucd.Parser) {
+ ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
s := strings.SplitN(p.String(ucd.DecompMapping), " ", 2)
if !maps[s[0]] {
@@ -74,7 +74,7 @@
})
// <rune range>;<type>
- parse("EastAsianWidth.txt", func(p *ucd.Parser) {
+ ucd.Parse(gen.OpenUCDFile("EastAsianWidth.txt"), func(p *ucd.Parser) {
tag, ok := typeMap[p.String(1)]
if !ok {
log.Fatalf("Unknown width type %q", p.String(1))
@@ -90,17 +90,3 @@
f(r, tag, alt)
})
}
-
-// parse calls f for each entry in the given UCD file.
-func parse(filename string, f func(p *ucd.Parser)) {
- r := gen.OpenUCDFile(filename)
- defer r.Close()
-
- p := ucd.New(r)
- for p.Next() {
- f(p)
- }
- if err := p.Err(); err != nil {
- log.Fatal(err)
- }
-}
diff --git a/width/gen_common.go b/width/gen_common.go
index 813792c..601e752 100644
--- a/width/gen_common.go
+++ b/width/gen_common.go
@@ -60,7 +60,7 @@
// We cannot reuse package norm's decomposition, as we need an unexpanded
// decomposition. We make use of the opportunity to verify that the
// decomposition type is as expected.
- parse("UnicodeData.txt", func(p *ucd.Parser) {
+ ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
s := strings.SplitN(p.String(ucd.DecompMapping), " ", 2)
if !maps[s[0]] {
@@ -78,7 +78,7 @@
})
// <rune range>;<type>
- parse("EastAsianWidth.txt", func(p *ucd.Parser) {
+ ucd.Parse(gen.OpenUCDFile("EastAsianWidth.txt"), func(p *ucd.Parser) {
tag, ok := typeMap[p.String(1)]
if !ok {
log.Fatalf("Unknown width type %q", p.String(1))
@@ -94,17 +94,3 @@
f(r, tag, alt)
})
}
-
-// parse calls f for each entry in the given UCD file.
-func parse(filename string, f func(p *ucd.Parser)) {
- r := gen.OpenUCDFile(filename)
- defer r.Close()
-
- p := ucd.New(r)
- for p.Next() {
- f(p)
- }
- if err := p.Err(); err != nil {
- log.Fatal(err)
- }
-}