| // Copyright 2015 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| //go:generate go run gen.go gen_trieval.go gen_ranges.go |
| |
| // Package bidi contains functionality for bidirectional text support. |
| // |
| // See https://www.unicode.org/reports/tr9. |
| // |
| // NOTE: UNDER CONSTRUCTION. This API may change in backwards incompatible ways |
| // and without notice. |
| package bidi // import "golang.org/x/text/unicode/bidi" |
| |
| // TODO |
| // - Transformer for reordering? |
| // - Transformer (validator, really) for Bidi Rule. |
| |
| import ( |
| "bytes" |
| ) |
| |
| // This API tries to avoid dealing with embedding levels for now. Under the hood |
| // these will be computed, but the question is to which extent the user should |
| // know they exist. We should at some point allow the user to specify an |
| // embedding hierarchy, though. |
| |
| // A Direction indicates the overall flow of text. |
| type Direction int |
| |
| const ( |
| // LeftToRight indicates the text contains no right-to-left characters and |
| // that either there are some left-to-right characters or the option |
| // DefaultDirection(LeftToRight) was passed. |
| LeftToRight Direction = iota |
| |
| // RightToLeft indicates the text contains no left-to-right characters and |
| // that either there are some right-to-left characters or the option |
| // DefaultDirection(RightToLeft) was passed. |
| RightToLeft |
| |
| // Mixed indicates text contains both left-to-right and right-to-left |
| // characters. |
| Mixed |
| |
| // Neutral means that text contains no left-to-right and right-to-left |
| // characters and that no default direction has been set. |
| Neutral |
| ) |
| |
| type options struct { |
| defaultDirection Direction |
| } |
| |
| // An Option is an option for Bidi processing. |
| type Option func(*options) |
| |
| // ICU allows the user to define embedding levels. This may be used, for example, |
| // to use hierarchical structure of markup languages to define embeddings. |
| // The following option may be a way to expose this functionality in this API. |
| // // LevelFunc sets a function that associates nesting levels with the given text. |
| // // The levels function will be called with monotonically increasing values for p. |
| // func LevelFunc(levels func(p int) int) Option { |
| // panic("unimplemented") |
| // } |
| |
| // DefaultDirection sets the default direction for a Paragraph. The direction is |
| // overridden if the text contains directional characters. |
| func DefaultDirection(d Direction) Option { |
| return func(opts *options) { |
| opts.defaultDirection = d |
| } |
| } |
| |
| // A Paragraph holds a single Paragraph for Bidi processing. |
| type Paragraph struct { |
| p []byte |
| o Ordering |
| opts []Option |
| types []Class |
| pairTypes []bracketType |
| pairValues []rune |
| runes []rune |
| options options |
| } |
| |
| // Initialize the p.pairTypes, p.pairValues and p.types from the input previously |
| // set by p.SetBytes() or p.SetString(). Also limit the input up to (and including) a paragraph |
| // separator (bidi class B). |
| // |
| // The function p.Order() needs these values to be set, so this preparation could be postponed. |
| // But since the SetBytes and SetStrings functions return the length of the input up to the paragraph |
| // separator, the whole input needs to be processed anyway and should not be done twice. |
| // |
| // The function has the same return values as SetBytes() / SetString() |
| func (p *Paragraph) prepareInput() (n int, err error) { |
| p.runes = bytes.Runes(p.p) |
| bytecount := 0 |
| // clear slices from previous SetString or SetBytes |
| p.pairTypes = nil |
| p.pairValues = nil |
| p.types = nil |
| |
| for _, r := range p.runes { |
| props, i := LookupRune(r) |
| bytecount += i |
| cls := props.Class() |
| if cls == B { |
| return bytecount, nil |
| } |
| p.types = append(p.types, cls) |
| if props.IsOpeningBracket() { |
| p.pairTypes = append(p.pairTypes, bpOpen) |
| p.pairValues = append(p.pairValues, r) |
| } else if props.IsBracket() { |
| // this must be a closing bracket, |
| // since IsOpeningBracket is not true |
| p.pairTypes = append(p.pairTypes, bpClose) |
| p.pairValues = append(p.pairValues, r) |
| } else { |
| p.pairTypes = append(p.pairTypes, bpNone) |
| p.pairValues = append(p.pairValues, 0) |
| } |
| } |
| return bytecount, nil |
| } |
| |
| // SetBytes configures p for the given paragraph text. It replaces text |
| // previously set by SetBytes or SetString. If b contains a paragraph separator |
| // it will only process the first paragraph and report the number of bytes |
| // consumed from b including this separator. Error may be non-nil if options are |
| // given. |
| func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error) { |
| p.p = b |
| p.opts = opts |
| return p.prepareInput() |
| } |
| |
| // SetString configures s for the given paragraph text. It replaces text |
| // previously set by SetBytes or SetString. If s contains a paragraph separator |
| // it will only process the first paragraph and report the number of bytes |
| // consumed from s including this separator. Error may be non-nil if options are |
| // given. |
| func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error) { |
| p.p = []byte(s) |
| p.opts = opts |
| return p.prepareInput() |
| } |
| |
| // IsLeftToRight reports whether the principle direction of rendering for this |
| // paragraphs is left-to-right. If this returns false, the principle direction |
| // of rendering is right-to-left. |
| func (p *Paragraph) IsLeftToRight() bool { |
| return p.Direction() == LeftToRight |
| } |
| |
| // Direction returns the direction of the text of this paragraph. |
| // |
| // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral. |
| func (p *Paragraph) Direction() Direction { |
| return p.o.Direction() |
| } |
| |
| // TODO: what happens if the position is > len(input)? This should return an error. |
| |
| // RunAt reports the Run at the given position of the input text. |
| // |
| // This method can be used for computing line breaks on paragraphs. |
| func (p *Paragraph) RunAt(pos int) Run { |
| c := 0 |
| runNumber := 0 |
| for i, r := range p.o.runes { |
| c += len(r) |
| if pos < c { |
| runNumber = i |
| } |
| } |
| return p.o.Run(runNumber) |
| } |
| |
| func calculateOrdering(levels []level, runes []rune) Ordering { |
| var curDir Direction |
| |
| prevDir := Neutral |
| prevI := 0 |
| |
| o := Ordering{} |
| // lvl = 0,2,4,...: left to right |
| // lvl = 1,3,5,...: right to left |
| for i, lvl := range levels { |
| if lvl%2 == 0 { |
| curDir = LeftToRight |
| } else { |
| curDir = RightToLeft |
| } |
| if curDir != prevDir { |
| if i > 0 { |
| o.runes = append(o.runes, runes[prevI:i]) |
| o.directions = append(o.directions, prevDir) |
| o.startpos = append(o.startpos, prevI) |
| } |
| prevI = i |
| prevDir = curDir |
| } |
| } |
| o.runes = append(o.runes, runes[prevI:]) |
| o.directions = append(o.directions, prevDir) |
| o.startpos = append(o.startpos, prevI) |
| return o |
| } |
| |
| // Order computes the visual ordering of all the runs in a Paragraph. |
| func (p *Paragraph) Order() (Ordering, error) { |
| if len(p.types) == 0 { |
| return Ordering{}, nil |
| } |
| |
| for _, fn := range p.opts { |
| fn(&p.options) |
| } |
| lvl := level(-1) |
| if p.options.defaultDirection == RightToLeft { |
| lvl = 1 |
| } |
| para, err := newParagraph(p.types, p.pairTypes, p.pairValues, lvl) |
| if err != nil { |
| return Ordering{}, err |
| } |
| |
| levels := para.getLevels([]int{len(p.types)}) |
| |
| p.o = calculateOrdering(levels, p.runes) |
| return p.o, nil |
| } |
| |
| // Line computes the visual ordering of runs for a single line starting and |
| // ending at the given positions in the original text. |
| func (p *Paragraph) Line(start, end int) (Ordering, error) { |
| lineTypes := p.types[start:end] |
| para, err := newParagraph(lineTypes, p.pairTypes[start:end], p.pairValues[start:end], -1) |
| if err != nil { |
| return Ordering{}, err |
| } |
| levels := para.getLevels([]int{len(lineTypes)}) |
| o := calculateOrdering(levels, p.runes[start:end]) |
| return o, nil |
| } |
| |
| // An Ordering holds the computed visual order of runs of a Paragraph. Calling |
| // SetBytes or SetString on the originating Paragraph invalidates an Ordering. |
| // The methods of an Ordering should only be called by one goroutine at a time. |
| type Ordering struct { |
| runes [][]rune |
| directions []Direction |
| startpos []int |
| } |
| |
| // Direction reports the directionality of the runs. |
| // |
| // The direction may be LeftToRight, RightToLeft, Mixed, or Neutral. |
| func (o *Ordering) Direction() Direction { |
| return o.directions[0] |
| } |
| |
| // NumRuns returns the number of runs. |
| func (o *Ordering) NumRuns() int { |
| return len(o.runes) |
| } |
| |
| // Run returns the ith run within the ordering. |
| func (o *Ordering) Run(i int) Run { |
| r := Run{ |
| runes: o.runes[i], |
| direction: o.directions[i], |
| startpos: o.startpos[i], |
| } |
| return r |
| } |
| |
| // TODO: perhaps with options. |
| // // Reorder creates a reader that reads the runes in visual order per character. |
| // // Modifiers remain after the runes they modify. |
| // func (l *Runs) Reorder() io.Reader { |
| // panic("unimplemented") |
| // } |
| |
| // A Run is a continuous sequence of characters of a single direction. |
| type Run struct { |
| runes []rune |
| direction Direction |
| startpos int |
| } |
| |
| // String returns the text of the run in its original order. |
| func (r *Run) String() string { |
| return string(r.runes) |
| } |
| |
| // Bytes returns the text of the run in its original order. |
| func (r *Run) Bytes() []byte { |
| return []byte(r.String()) |
| } |
| |
| // TODO: methods for |
| // - Display order |
| // - headers and footers |
| // - bracket replacement. |
| |
| // Direction reports the direction of the run. |
| func (r *Run) Direction() Direction { |
| return r.direction |
| } |
| |
| // Pos returns the position of the Run within the text passed to SetBytes or SetString of the |
| // originating Paragraph value. |
| func (r *Run) Pos() (start, end int) { |
| return r.startpos, r.startpos + len(r.runes) - 1 |
| } |
| |
| // AppendReverse reverses the order of characters of in, appends them to out, |
| // and returns the result. Modifiers will still follow the runes they modify. |
| // Brackets are replaced with their counterparts. |
| func AppendReverse(out, in []byte) []byte { |
| ret := make([]byte, len(in)+len(out)) |
| copy(ret, out) |
| inRunes := bytes.Runes(in) |
| |
| for i, r := range inRunes { |
| prop, _ := LookupRune(r) |
| if prop.IsBracket() { |
| inRunes[i] = prop.reverseBracket(r) |
| } |
| } |
| |
| for i, j := 0, len(inRunes)-1; i < j; i, j = i+1, j-1 { |
| inRunes[i], inRunes[j] = inRunes[j], inRunes[i] |
| } |
| copy(ret[len(out):], string(inRunes)) |
| |
| return ret |
| } |
| |
| // ReverseString reverses the order of characters in s and returns a new string. |
| // Modifiers will still follow the runes they modify. Brackets are replaced with |
| // their counterparts. |
| func ReverseString(s string) string { |
| input := []rune(s) |
| li := len(input) |
| ret := make([]rune, li) |
| for i, r := range input { |
| prop, _ := LookupRune(r) |
| if prop.IsBracket() { |
| ret[li-i-1] = prop.reverseBracket(r) |
| } else { |
| ret[li-i-1] = r |
| } |
| } |
| return string(ret) |
| } |