blob: 0a4e61f7b5a17ca22fdc48fb8e24da60bdcfeb7a [file] [log] [blame]
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// The locale package provides a type to represent BCP 47 locale identifiers.
// It supports various canonicalizations defined in CLDR.
package locale
import "strings"
var (
// Und represents the undefined langauge. It is also the root locale.
Und = und
En = en // Default Locale for English.
En_US = en_US // Default locale for American English.
De = de // Default locale for German.
// TODO: list of most common language identifiers.
)
var (
Supported Set // All supported locales.
Common Set // A selection of common locales.
)
var (
de = ID{lang: getLangID([]byte("de")), region: unknownRegion, script: unknownScript}
en = ID{lang: getLangID([]byte("en")), region: unknownRegion, script: unknownScript}
en_US = en
und = ID{lang: unknownLang, region: unknownRegion, script: unknownScript}
)
// ID represents a BCP 47 locale identifier. It can be used to
// select an instance for a specific locale. All Locale values are guaranteed
// to be well-formed.
type ID struct {
// In most cases, just lang, region and script will be needed. In such cases
// str may be nil.
lang langID
region regionID
script scriptID
pVariant byte // offset in str
pExt uint16 // offset of first extension
str *string
}
// Make calls Parse and Canonicalize and returns the resulting ID.
// Any errors are ignored and a sensible default is returned.
// In most cases, locale IDs should be created using this method.
func Make(id string) ID {
loc, _ := Parse(id)
return loc.Canonicalize(All)
}
// IsRoot returns true if loc is equal to locale "und".
func (loc ID) IsRoot() bool {
if loc.str != nil {
n := len(*loc.str)
if n > 0 && loc.pExt > 0 && int(loc.pExt) < n {
return false
}
if uint16(loc.pVariant) != loc.pExt || strings.HasPrefix(*loc.str, "x-") {
return false
}
loc.str = nil
}
return loc == und
}
// CanonType is can be used to enable or disable various types of canonicalization.
type CanonType int
const (
// Replace deprecated values with their preferred ones.
Deprecated CanonType = 1 << iota
// Remove redundant scripts.
SuppressScript
// Map the dominant language of macro language group to the macro language identifier.
// For example cmn -> zh.
Macro
// All canonicalizations prescribed by BCP 47.
BCP47 = Deprecated | SuppressScript
All = BCP47 | Macro
// TODO: LikelyScript, LikelyRegion: supress similar to ICU.
)
// Canonicalize replaces the identifier with its canonical equivalent.
func (loc ID) Canonicalize(t CanonType) ID {
changed := false
if t&SuppressScript != 0 {
if loc.lang < langNoIndexOffset && uint8(loc.script) == suppressScript[loc.lang] {
loc.script = unknownScript
changed = true
}
}
if t&Deprecated != 0 {
l := normLang(langOldMap[:], loc.lang)
if l != loc.lang {
changed = true
}
loc.lang = l
}
if t&Macro != 0 {
l := normLang(langMacroMap[:], loc.lang)
if l != loc.lang {
changed = true
}
loc.lang = l
}
if changed && loc.str != nil {
ext := ""
if loc.pExt > 0 {
ext = (*loc.str)[loc.pExt+1:]
}
s := loc.makeString(loc.Part(VariantPart), ext)
loc.str = &s
}
return loc
}
// Parent returns the direct parent for this locale, which is the locale
// from which this locale inherits any undefined values.
func (loc ID) Parent() ID {
// TODO: implement
return und
}
// Written strips qualifiers from the identifier until the resulting identfier
// inherits from root.
func (loc ID) Written() ID {
// TODO: implement
return und
}
// Confidence indicates the level of certainty for a given return value.
// For example, Serbian may be written in cyrillic or latin script.
// The confidence level indicates whether a value was explicitly specified,
// whether it is typically the only possible value, or whether there is
// an ambiguity.
type Confidence int
const (
Not Confidence = iota // full confidence that there was no match
Low // most likely value picked out of a set of alternatives
High // value inferred from a parent and is generally assumed to be the correct match
Exact // exact match or explicitly specified value
)
func (loc *ID) makeString(vars, ext string) string {
buf := [128]byte{}
n := loc.lang.stringToBuf(buf[:])
if loc.script != unknownScript {
n += copy(buf[n:], "-")
n += copy(buf[n:], loc.script.String())
}
if loc.region != unknownRegion {
n += copy(buf[n:], "-")
n += copy(buf[n:], loc.region.String())
}
b := buf[:n]
if vars != "" {
b = append(b, '-')
loc.pVariant = byte(len(b))
b = append(b, vars...)
loc.pExt = uint16(len(b))
}
if ext != "" {
loc.pExt = uint16(len(b))
b = append(b, '-')
b = append(b, ext...)
}
return string(b)
}
// String returns the canonical string representation of the locale.
func (loc ID) String() string {
if loc.str == nil {
return loc.makeString("", "")
}
return *loc.str
}
// Language returns the language for the locale.
func (loc ID) Language() Language {
// TODO: implement
return Language{0}
}
// Script infers the script for the locale. If it was not explictly given, it will infer
// a most likely candidate from the parent locales.
// If more than one script is commonly used for a language, the most likely one
// is returned with a low confidence indication. For example, it returns (Cyrl, Low)
// for Serbian.
// Note that an inferred script is never guaranteed to be the correct one. Latn is
// almost exclusively used for Afrikaans, but Arabic has been used for some texts
// in the past. Also, the script that is commonly used may change over time.
func (loc ID) Script() (Script, Confidence) {
// TODO: implement
return Script{0}, Exact
}
// Region returns the region for l. If it was not explicitly given, it will
// infer a most likely candidate from the parent locales.
func (loc ID) Region() (Region, Confidence) {
// TODO: implement
return Region{0}, Exact
}
// Variant returns the variant specified explicitly for this locale
// or nil if no variant was specified.
func (loc ID) Variant() Variant {
return Variant{""}
}
// Scope returns a Set that indicates the common variants for which the
// locale may be applicable.
// Locales will returns all valid sublocales. Languages will return the language
// for this locale. Regions will return all regions for which a locale with
// this language is defined. And Scripts will return all scripts that are
// commonly used for this locale.
// If any of these properties is explicitly specified, the respective lists
// will be constraint. For example, for sr_Latn Scripts will return [Latn]
// instead of [Cyrl Latn].
func (loc ID) Scope() Set {
// TODO: implement
return nil
}
// TypeForKey returns the type associated with the given key, where key
// is one of the allowed values defined for the Unicode locale extension ('u') in
// http://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
// TypeForKey will traverse the inheritance chain to get the correct value.
func (loc ID) TypeForKey(key string) string {
// TODO: implement
return ""
}
// KeyValueString returns a string to be set with KeyValuePart.
// Error handling is done by Compose.
func KeyValueString(m map[string]string) (string, error) {
// TODO: implement
return "", nil
}
// SimplifyOptions removes options in loc that it would inherit
// by default from its parent.
func (loc ID) SimplifyOptions() ID {
// TODO: implement
return ID{}
}
// Language is an ISO 639 language identifier.
type Language struct {
langID
}
// Scope returns a Set of all pre-defined sublocales for this language.
func (l Language) Scope() Set {
// TODO: implement
return nil
}
// Script is a 4-letter ISO 15924 code for representing scripts.
// It is idiomatically represented in title case.
type Script struct {
scriptID
}
// Scope returns a Set of all pre-defined sublocales applicable to the script.
func (s Script) Scope() Set {
// TODO: implement
return nil
}
// Region is an ISO 3166-1 or UN M.49 code for representing countries and regions.
type Region struct {
regionID
}
// IsCountry returns whether this region is a country.
func (r Region) IsCountry() bool {
// TODO: implement
return true
}
// Scope returns a Set of all pre-defined sublocales applicable to the region.
func (r Region) Scope() Set {
// TODO: implement
return nil
}
// Variant represents a registered variant of a language as defined by BCP 47.
type Variant struct {
// TODO: implement
variant string
}
// String returns the string representation of the variant.
func (v Variant) String() string {
// TODO: implement
return v.variant
}
// Currency is an ISO 4217 currency designator.
type Currency struct {
currencyID
}
// Set provides information about a set of locales.
type Set interface {
Locales() []ID
Languages() []Language
Regions() []Region
Scripts() []Script
Currencies() []Currency
}