|  | // Copyright 2011 The Go Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style | 
|  | // license that can be found in the LICENSE file. | 
|  |  | 
|  | package unicode | 
|  |  | 
|  | // Bit masks for each code point under U+0100, for fast lookup. | 
|  | const ( | 
|  | pC     = 1 << iota // a control character. | 
|  | pP                 // a punctuation character. | 
|  | pN                 // a numeral. | 
|  | pS                 // a symbolic character. | 
|  | pZ                 // a spacing character. | 
|  | pLu                // an upper-case letter. | 
|  | pLl                // a lower-case letter. | 
|  | pp                 // a printable character according to Go's definition. | 
|  | pg     = pp | pZ   // a graphical character according to the Unicode definition. | 
|  | pLo    = pLl | pLu // a letter that is neither upper nor lower case. | 
|  | pLmask = pLo | 
|  | ) | 
|  |  | 
|  | // GraphicRanges defines the set of graphic characters according to Unicode. | 
|  | var GraphicRanges = []*RangeTable{ | 
|  | L, M, N, P, S, Zs, | 
|  | } | 
|  |  | 
|  | // PrintRanges defines the set of printable characters according to Go. | 
|  | // ASCII space, U+0020, is handled separately. | 
|  | var PrintRanges = []*RangeTable{ | 
|  | L, M, N, P, S, | 
|  | } | 
|  |  | 
|  | // IsGraphic reports whether the rune is defined as a Graphic by Unicode. | 
|  | // Such characters include letters, marks, numbers, punctuation, symbols, and | 
|  | // spaces, from categories L, M, N, P, S, Zs. | 
|  | func IsGraphic(r rune) bool { | 
|  | // We convert to uint32 to avoid the extra test for negative, | 
|  | // and in the index we convert to uint8 to avoid the range check. | 
|  | if uint32(r) <= MaxLatin1 { | 
|  | return properties[uint8(r)]&pg != 0 | 
|  | } | 
|  | return In(r, GraphicRanges...) | 
|  | } | 
|  |  | 
|  | // IsPrint reports whether the rune is defined as printable by Go. Such | 
|  | // characters include letters, marks, numbers, punctuation, symbols, and the | 
|  | // ASCII space character, from categories L, M, N, P, S and the ASCII space | 
|  | // character. This categorization is the same as IsGraphic except that the | 
|  | // only spacing character is ASCII space, U+0020. | 
|  | func IsPrint(r rune) bool { | 
|  | if uint32(r) <= MaxLatin1 { | 
|  | return properties[uint8(r)]&pp != 0 | 
|  | } | 
|  | return In(r, PrintRanges...) | 
|  | } | 
|  |  | 
|  | // IsOneOf reports whether the rune is a member of one of the ranges. | 
|  | // The function "In" provides a nicer signature and should be used in preference to IsOneOf. | 
|  | func IsOneOf(ranges []*RangeTable, r rune) bool { | 
|  | for _, inside := range ranges { | 
|  | if Is(inside, r) { | 
|  | return true | 
|  | } | 
|  | } | 
|  | return false | 
|  | } | 
|  |  | 
|  | // In reports whether the rune is a member of one of the ranges. | 
|  | func In(r rune, ranges ...*RangeTable) bool { | 
|  | for _, inside := range ranges { | 
|  | if Is(inside, r) { | 
|  | return true | 
|  | } | 
|  | } | 
|  | return false | 
|  | } | 
|  |  | 
|  | // IsControl reports whether the rune is a control character. | 
|  | // The C (Other) Unicode category includes more code points | 
|  | // such as surrogates; use Is(C, r) to test for them. | 
|  | func IsControl(r rune) bool { | 
|  | if uint32(r) <= MaxLatin1 { | 
|  | return properties[uint8(r)]&pC != 0 | 
|  | } | 
|  | // All control characters are < MaxLatin1. | 
|  | return false | 
|  | } | 
|  |  | 
|  | // IsLetter reports whether the rune is a letter (category L). | 
|  | func IsLetter(r rune) bool { | 
|  | if uint32(r) <= MaxLatin1 { | 
|  | return properties[uint8(r)]&(pLmask) != 0 | 
|  | } | 
|  | return isExcludingLatin(Letter, r) | 
|  | } | 
|  |  | 
|  | // IsMark reports whether the rune is a mark character (category M). | 
|  | func IsMark(r rune) bool { | 
|  | // There are no mark characters in Latin-1. | 
|  | return isExcludingLatin(Mark, r) | 
|  | } | 
|  |  | 
|  | // IsNumber reports whether the rune is a number (category N). | 
|  | func IsNumber(r rune) bool { | 
|  | if uint32(r) <= MaxLatin1 { | 
|  | return properties[uint8(r)]&pN != 0 | 
|  | } | 
|  | return isExcludingLatin(Number, r) | 
|  | } | 
|  |  | 
|  | // IsPunct reports whether the rune is a Unicode punctuation character | 
|  | // (category P). | 
|  | func IsPunct(r rune) bool { | 
|  | if uint32(r) <= MaxLatin1 { | 
|  | return properties[uint8(r)]&pP != 0 | 
|  | } | 
|  | return Is(Punct, r) | 
|  | } | 
|  |  | 
|  | // IsSpace reports whether the rune is a space character as defined | 
|  | // by Unicode's White Space property; in the Latin-1 space | 
|  | // this is | 
|  | // | 
|  | //	'\t', '\n', '\v', '\f', '\r', ' ', U+0085 (NEL), U+00A0 (NBSP). | 
|  | // | 
|  | // Other definitions of spacing characters are set by category | 
|  | // Z and property Pattern_White_Space. | 
|  | func IsSpace(r rune) bool { | 
|  | // This property isn't the same as Z; special-case it. | 
|  | if uint32(r) <= MaxLatin1 { | 
|  | switch r { | 
|  | case '\t', '\n', '\v', '\f', '\r', ' ', 0x85, 0xA0: | 
|  | return true | 
|  | } | 
|  | return false | 
|  | } | 
|  | return isExcludingLatin(White_Space, r) | 
|  | } | 
|  |  | 
|  | // IsSymbol reports whether the rune is a symbolic character. | 
|  | func IsSymbol(r rune) bool { | 
|  | if uint32(r) <= MaxLatin1 { | 
|  | return properties[uint8(r)]&pS != 0 | 
|  | } | 
|  | return isExcludingLatin(Symbol, r) | 
|  | } |