| // Copyright 2011 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package unicode |
| |
| // Bit masks for each code point under U+0100, for fast lookup. |
| const ( |
| pC = 1 << iota // a control character. |
| pP // a punctuation character. |
| pN // a numeral. |
| pS // a symbolic character. |
| pZ // a spacing character. |
| pLu // an upper-case letter. |
| pLl // a lower-case letter. |
| pp // a printable character according to Go's definition. |
| pg = pp | pZ // a graphical character according to the Unicode definition. |
| pLo = pLl | pLu // a letter that is neither upper nor lower case. |
| pLmask = pLo |
| ) |
| |
| // GraphicRanges defines the set of graphic characters according to Unicode. |
| var GraphicRanges = []*RangeTable{ |
| L, M, N, P, S, Zs, |
| } |
| |
| // PrintRanges defines the set of printable characters according to Go. |
| // ASCII space, U+0020, is handled separately. |
| var PrintRanges = []*RangeTable{ |
| L, M, N, P, S, |
| } |
| |
| // IsGraphic reports whether the rune is defined as a Graphic by Unicode. |
| // Such characters include letters, marks, numbers, punctuation, symbols, and |
| // spaces, from categories [L], [M], [N], [P], [S], [Zs]. |
| func IsGraphic(r rune) bool { |
| // We convert to uint32 to avoid the extra test for negative, |
| // and in the index we convert to uint8 to avoid the range check. |
| if uint32(r) <= MaxLatin1 { |
| return properties[uint8(r)]&pg != 0 |
| } |
| return In(r, GraphicRanges...) |
| } |
| |
| // IsPrint reports whether the rune is defined as printable by Go. Such |
| // characters include letters, marks, numbers, punctuation, symbols, and the |
| // ASCII space character, from categories [L], [M], [N], [P], [S] and the ASCII space |
| // character. This categorization is the same as [IsGraphic] except that the |
| // only spacing character is ASCII space, U+0020. |
| func IsPrint(r rune) bool { |
| if uint32(r) <= MaxLatin1 { |
| return properties[uint8(r)]&pp != 0 |
| } |
| return In(r, PrintRanges...) |
| } |
| |
| // IsOneOf reports whether the rune is a member of one of the ranges. |
| // The function "In" provides a nicer signature and should be used in preference to IsOneOf. |
| func IsOneOf(ranges []*RangeTable, r rune) bool { |
| for _, inside := range ranges { |
| if Is(inside, r) { |
| return true |
| } |
| } |
| return false |
| } |
| |
| // In reports whether the rune is a member of one of the ranges. |
| func In(r rune, ranges ...*RangeTable) bool { |
| for _, inside := range ranges { |
| if Is(inside, r) { |
| return true |
| } |
| } |
| return false |
| } |
| |
| // IsControl reports whether the rune is a control character. |
| // The [C] ([Other]) Unicode category includes more code points |
| // such as surrogates; use [Is](C, r) to test for them. |
| func IsControl(r rune) bool { |
| if uint32(r) <= MaxLatin1 { |
| return properties[uint8(r)]&pC != 0 |
| } |
| // All control characters are < MaxLatin1. |
| return false |
| } |
| |
| // IsLetter reports whether the rune is a letter (category [L]). |
| func IsLetter(r rune) bool { |
| if uint32(r) <= MaxLatin1 { |
| return properties[uint8(r)]&(pLmask) != 0 |
| } |
| return isExcludingLatin(Letter, r) |
| } |
| |
| // IsMark reports whether the rune is a mark character (category [M]). |
| func IsMark(r rune) bool { |
| // There are no mark characters in Latin-1. |
| return isExcludingLatin(Mark, r) |
| } |
| |
| // IsNumber reports whether the rune is a number (category [N]). |
| func IsNumber(r rune) bool { |
| if uint32(r) <= MaxLatin1 { |
| return properties[uint8(r)]&pN != 0 |
| } |
| return isExcludingLatin(Number, r) |
| } |
| |
| // IsPunct reports whether the rune is a Unicode punctuation character |
| // (category [P]). |
| func IsPunct(r rune) bool { |
| if uint32(r) <= MaxLatin1 { |
| return properties[uint8(r)]&pP != 0 |
| } |
| return Is(Punct, r) |
| } |
| |
| // IsSpace reports whether the rune is a space character as defined |
| // by Unicode's White Space property; in the Latin-1 space |
| // this is |
| // |
| // '\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP). |
| // |
| // Other definitions of spacing characters are set by category |
| // Z and property [Pattern_White_Space]. |
| func IsSpace(r rune) bool { |
| // This property isn't the same as Z; special-case it. |
| if uint32(r) <= MaxLatin1 { |
| switch r { |
| case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0: |
| return true |
| } |
| return false |
| } |
| return isExcludingLatin(White_Space, r) |
| } |
| |
| // IsSymbol reports whether the rune is a symbolic character. |
| func IsSymbol(r rune) bool { |
| if uint32(r) <= MaxLatin1 { |
| return properties[uint8(r)]&pS != 0 |
| } |
| return isExcludingLatin(Symbol, r) |
| } |