| // Copyright 2014 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package cases |
| |
| import ( |
| "strings" |
| "testing" |
| "unicode" |
| |
| "golang.org/x/text/internal/testtext" |
| "golang.org/x/text/language" |
| "golang.org/x/text/transform" |
| "golang.org/x/text/unicode/norm" |
| "golang.org/x/text/unicode/rangetable" |
| ) |
| |
| // The following definitions are taken directly from Chapter 3 of The Unicode |
| // Standard. |
| |
| func propCased(r rune) bool { |
| return propLower(r) || propUpper(r) || unicode.IsTitle(r) |
| } |
| |
| func propLower(r rune) bool { |
| return unicode.IsLower(r) || unicode.Is(unicode.Other_Lowercase, r) |
| } |
| |
| func propUpper(r rune) bool { |
| return unicode.IsUpper(r) || unicode.Is(unicode.Other_Uppercase, r) |
| } |
| |
| func propIgnore(r rune) bool { |
| if unicode.In(r, unicode.Mn, unicode.Me, unicode.Cf, unicode.Lm, unicode.Sk) { |
| return true |
| } |
| return caseIgnorable[r] |
| } |
| |
| func hasBreakProp(r rune) bool { |
| // binary search over ranges |
| lo := 0 |
| hi := len(breakProp) |
| for lo < hi { |
| m := lo + (hi-lo)/2 |
| bp := &breakProp[m] |
| if bp.lo <= r && r <= bp.hi { |
| return true |
| } |
| if r < bp.lo { |
| hi = m |
| } else { |
| lo = m + 1 |
| } |
| } |
| return false |
| } |
| |
| func contextFromRune(r rune) *context { |
| c := context{dst: make([]byte, 128), src: []byte(string(r)), atEOF: true} |
| c.next() |
| return &c |
| } |
| |
| func TestCaseProperties(t *testing.T) { |
| if unicode.Version != UnicodeVersion { |
| // Properties of existing code points may change by Unicode version, so |
| // we need to skip. |
| t.Skipf("Skipping as core Unicode version %s different than %s", unicode.Version, UnicodeVersion) |
| } |
| assigned := rangetable.Assigned(UnicodeVersion) |
| coreVersion := rangetable.Assigned(unicode.Version) |
| for r := rune(0); r <= lastRuneForTesting; r++ { |
| if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) { |
| continue |
| } |
| c := contextFromRune(r) |
| if got, want := c.info.isCaseIgnorable(), propIgnore(r); got != want { |
| t.Errorf("caseIgnorable(%U): got %v; want %v (%x)", r, got, want, c.info) |
| } |
| // New letters may change case types, but existing case pairings should |
| // not change. See Case Pair Stability in |
| // https://unicode.org/policies/stability_policy.html. |
| if rf := unicode.SimpleFold(r); rf != r && unicode.In(rf, assigned) { |
| if got, want := c.info.isCased(), propCased(r); got != want { |
| t.Errorf("cased(%U): got %v; want %v (%x)", r, got, want, c.info) |
| } |
| if got, want := c.caseType() == cUpper, propUpper(r); got != want { |
| t.Errorf("upper(%U): got %v; want %v (%x)", r, got, want, c.info) |
| } |
| if got, want := c.caseType() == cLower, propLower(r); got != want { |
| t.Errorf("lower(%U): got %v; want %v (%x)", r, got, want, c.info) |
| } |
| } |
| if got, want := c.info.isBreak(), hasBreakProp(r); got != want { |
| t.Errorf("isBreak(%U): got %v; want %v (%x)", r, got, want, c.info) |
| } |
| } |
| // TODO: get title case from unicode file. |
| } |
| |
| func TestMapping(t *testing.T) { |
| assigned := rangetable.Assigned(UnicodeVersion) |
| coreVersion := rangetable.Assigned(unicode.Version) |
| if coreVersion == nil { |
| coreVersion = assigned |
| } |
| apply := func(r rune, f func(c *context) bool) string { |
| c := contextFromRune(r) |
| f(c) |
| return string(c.dst[:c.pDst]) |
| } |
| |
| for r, tt := range special { |
| if got, want := apply(r, lower), tt.toLower; got != want { |
| t.Errorf("lowerSpecial:(%U): got %+q; want %+q", r, got, want) |
| } |
| if got, want := apply(r, title), tt.toTitle; got != want { |
| t.Errorf("titleSpecial:(%U): got %+q; want %+q", r, got, want) |
| } |
| if got, want := apply(r, upper), tt.toUpper; got != want { |
| t.Errorf("upperSpecial:(%U): got %+q; want %+q", r, got, want) |
| } |
| } |
| |
| for r := rune(0); r <= lastRuneForTesting; r++ { |
| if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) { |
| continue |
| } |
| if rf := unicode.SimpleFold(r); rf == r || !unicode.In(rf, assigned) { |
| continue |
| } |
| if _, ok := special[r]; ok { |
| continue |
| } |
| want := string(unicode.ToLower(r)) |
| if got := apply(r, lower); got != want { |
| t.Errorf("lower:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want)) |
| } |
| |
| want = string(unicode.ToUpper(r)) |
| if got := apply(r, upper); got != want { |
| t.Errorf("upper:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want)) |
| } |
| |
| want = string(unicode.ToTitle(r)) |
| if got := apply(r, title); got != want { |
| t.Errorf("title:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want)) |
| } |
| } |
| } |
| |
| func runeFoldData(r rune) (x struct{ simple, full, special string }) { |
| x = foldMap[r] |
| if x.simple == "" { |
| x.simple = string(unicode.ToLower(r)) |
| } |
| if x.full == "" { |
| x.full = string(unicode.ToLower(r)) |
| } |
| if x.special == "" { |
| x.special = x.full |
| } |
| return |
| } |
| |
| func TestFoldData(t *testing.T) { |
| assigned := rangetable.Assigned(UnicodeVersion) |
| coreVersion := rangetable.Assigned(unicode.Version) |
| if coreVersion == nil { |
| coreVersion = assigned |
| } |
| apply := func(r rune, f func(c *context) bool) (string, info) { |
| c := contextFromRune(r) |
| f(c) |
| return string(c.dst[:c.pDst]), c.info.cccType() |
| } |
| for r := rune(0); r <= lastRuneForTesting; r++ { |
| if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) { |
| continue |
| } |
| x := runeFoldData(r) |
| if got, info := apply(r, foldFull); got != x.full { |
| t.Errorf("full:%q (%U): got %q %U; want %q %U (ccc=%x)", r, r, got, []rune(got), x.full, []rune(x.full), info) |
| } |
| // TODO: special and simple. |
| } |
| } |
| |
| func TestCCC(t *testing.T) { |
| assigned := rangetable.Assigned(UnicodeVersion) |
| normVersion := rangetable.Assigned(norm.Version) |
| for r := rune(0); r <= lastRuneForTesting; r++ { |
| if !unicode.In(r, assigned) || !unicode.In(r, normVersion) { |
| continue |
| } |
| c := contextFromRune(r) |
| |
| p := norm.NFC.PropertiesString(string(r)) |
| want := cccOther |
| switch p.CCC() { |
| case 0: |
| want = cccZero |
| case above: |
| want = cccAbove |
| } |
| if got := c.info.cccType(); got != want { |
| t.Errorf("%U: got %x; want %x", r, got, want) |
| } |
| } |
| } |
| |
| func TestWordBreaks(t *testing.T) { |
| for _, tt := range breakTest { |
| desc := norm.NFC.String(tt) |
| testtext.Run(t, desc, func(t *testing.T) { |
| parts := strings.Split(tt, "|") |
| want := "" |
| for _, s := range parts { |
| found := false |
| // This algorithm implements title casing given word breaks |
| // as defined in the Unicode standard 3.13 R3. |
| for _, r := range s { |
| title := unicode.ToTitle(r) |
| lower := unicode.ToLower(r) |
| if !found && title != lower { |
| found = true |
| want += string(title) |
| } else { |
| want += string(lower) |
| } |
| } |
| } |
| src := strings.Join(parts, "") |
| got := Title(language.Und).String(src) |
| if got != want { |
| t.Errorf("got %q; want %q", got, want) |
| } |
| }) |
| } |
| } |
| |
| func TestContext(t *testing.T) { |
| tests := []struct { |
| desc string |
| dstSize int |
| atEOF bool |
| src string |
| out string |
| nSrc int |
| err error |
| ops string |
| prefixArg string |
| prefixWant bool |
| }{{ |
| desc: "next: past end, atEOF, no checkpoint", |
| dstSize: 10, |
| atEOF: true, |
| src: "12", |
| out: "", |
| nSrc: 2, |
| ops: "next;next;next", |
| // Test that calling prefix with a non-empty argument when the buffer |
| // is depleted returns false. |
| prefixArg: "x", |
| prefixWant: false, |
| }, { |
| desc: "next: not at end, atEOF, no checkpoint", |
| dstSize: 10, |
| atEOF: false, |
| src: "12", |
| out: "", |
| nSrc: 0, |
| err: transform.ErrShortSrc, |
| ops: "next;next", |
| prefixArg: "", |
| prefixWant: true, |
| }, { |
| desc: "next: past end, !atEOF, no checkpoint", |
| dstSize: 10, |
| atEOF: false, |
| src: "12", |
| out: "", |
| nSrc: 0, |
| err: transform.ErrShortSrc, |
| ops: "next;next;next", |
| prefixArg: "", |
| prefixWant: true, |
| }, { |
| desc: "next: past end, !atEOF, checkpoint", |
| dstSize: 10, |
| atEOF: false, |
| src: "12", |
| out: "", |
| nSrc: 2, |
| ops: "next;next;checkpoint;next", |
| prefixArg: "", |
| prefixWant: true, |
| }, { |
| desc: "copy: exact count, atEOF, no checkpoint", |
| dstSize: 2, |
| atEOF: true, |
| src: "12", |
| out: "12", |
| nSrc: 2, |
| ops: "next;copy;next;copy;next", |
| prefixArg: "", |
| prefixWant: true, |
| }, { |
| desc: "copy: past end, !atEOF, no checkpoint", |
| dstSize: 2, |
| atEOF: false, |
| src: "12", |
| out: "", |
| nSrc: 0, |
| err: transform.ErrShortSrc, |
| ops: "next;copy;next;copy;next", |
| prefixArg: "", |
| prefixWant: true, |
| }, { |
| desc: "copy: past end, !atEOF, checkpoint", |
| dstSize: 2, |
| atEOF: false, |
| src: "12", |
| out: "12", |
| nSrc: 2, |
| ops: "next;copy;next;copy;checkpoint;next", |
| prefixArg: "", |
| prefixWant: true, |
| }, { |
| desc: "copy: short dst", |
| dstSize: 1, |
| atEOF: false, |
| src: "12", |
| out: "", |
| nSrc: 0, |
| err: transform.ErrShortDst, |
| ops: "next;copy;next;copy;checkpoint;next", |
| prefixArg: "12", |
| prefixWant: false, |
| }, { |
| desc: "copy: short dst, checkpointed", |
| dstSize: 1, |
| atEOF: false, |
| src: "12", |
| out: "1", |
| nSrc: 1, |
| err: transform.ErrShortDst, |
| ops: "next;copy;checkpoint;next;copy;next", |
| prefixArg: "", |
| prefixWant: true, |
| }, { |
| desc: "writeString: simple", |
| dstSize: 3, |
| atEOF: true, |
| src: "1", |
| out: "1ab", |
| nSrc: 1, |
| ops: "next;copy;writeab;next", |
| prefixArg: "", |
| prefixWant: true, |
| }, { |
| desc: "writeString: short dst", |
| dstSize: 2, |
| atEOF: true, |
| src: "12", |
| out: "", |
| nSrc: 0, |
| err: transform.ErrShortDst, |
| ops: "next;copy;writeab;next", |
| prefixArg: "2", |
| prefixWant: true, |
| }, { |
| desc: "writeString: simple", |
| dstSize: 3, |
| atEOF: true, |
| src: "12", |
| out: "1ab", |
| nSrc: 2, |
| ops: "next;copy;next;writeab;next", |
| prefixArg: "", |
| prefixWant: true, |
| }, { |
| desc: "writeString: short dst", |
| dstSize: 2, |
| atEOF: true, |
| src: "12", |
| out: "", |
| nSrc: 0, |
| err: transform.ErrShortDst, |
| ops: "next;copy;next;writeab;next", |
| prefixArg: "1", |
| prefixWant: false, |
| }, { |
| desc: "prefix", |
| dstSize: 2, |
| atEOF: true, |
| src: "12", |
| out: "", |
| nSrc: 0, |
| // Context will assign an ErrShortSrc if the input wasn't exhausted. |
| err: transform.ErrShortSrc, |
| prefixArg: "12", |
| prefixWant: true, |
| }} |
| for _, tt := range tests { |
| c := context{dst: make([]byte, tt.dstSize), src: []byte(tt.src), atEOF: tt.atEOF} |
| |
| for _, op := range strings.Split(tt.ops, ";") { |
| switch op { |
| case "next": |
| c.next() |
| case "checkpoint": |
| c.checkpoint() |
| case "writeab": |
| c.writeString("ab") |
| case "copy": |
| c.copy() |
| case "": |
| default: |
| t.Fatalf("unknown op %q", op) |
| } |
| } |
| if got := c.hasPrefix(tt.prefixArg); got != tt.prefixWant { |
| t.Errorf("%s:\nprefix was %v; want %v", tt.desc, got, tt.prefixWant) |
| } |
| nDst, nSrc, err := c.ret() |
| if err != tt.err { |
| t.Errorf("%s:\nerror was %v; want %v", tt.desc, err, tt.err) |
| } |
| if out := string(c.dst[:nDst]); out != tt.out { |
| t.Errorf("%s:\nout was %q; want %q", tt.desc, out, tt.out) |
| } |
| if nSrc != tt.nSrc { |
| t.Errorf("%s:\nnSrc was %d; want %d", tt.desc, nSrc, tt.nSrc) |
| } |
| } |
| } |