unicode/bidi/core.go - text - Git at Google

 // Copyright 2015 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package bidi

 import "log"

 // This implementation is a port based on the reference implementation found at:
 // http://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/
 //
 // described in Unicode Bidirectional Algorithm (UAX #9).
 //
 // Input:
 // There are two levels of input to the algorithm, since clients may prefer to
 // supply some information from out-of-band sources rather than relying on the
 // default behavior.
 //
 // - Bidi class array
 // - Bidi class array, with externally supplied base line direction
 //
 // Output:
 // Output is separated into several stages:
 //
 //  - levels array over entire paragraph
 //  - reordering array over entire paragraph
 //  - levels array over line
 //  - reordering array over line
 //
 // Note that for conformance to the Unicode Bidirectional Algorithm,
 // implementations are only required to generate correct reordering and
 // character directionality (odd or even levels) over a line. Generating
 // identical level arrays over a line is not required. Bidi explicit format
 // codes (LRE, RLE, LRO, RLO, PDF) and BN can be assigned arbitrary levels and
 // positions as long as the rest of the input is properly reordered.
 //
 // As the algorithm is defined to operate on a single paragraph at a time, this
 // implementation is written to handle single paragraphs. Thus rule P1 is
 // presumed by this implementation-- the data provided to the implementation is
 // assumed to be a single paragraph, and either contains no 'B' codes, or a
 // single 'B' code at the end of the input. 'B' is allowed as input to
 // illustrate how the algorithm assigns it a level.
 //
 // Also note that rules L3 and L4 depend on the rendering engine that uses the
 // result of the bidi algorithm. This implementation assumes that the rendering
 // engine expects combining marks in visual order (e.g. to the left of their
 // base character in RTL runs) and that it adjusts the glyphs used to render
 // mirrored characters that are in RTL runs so that they render appropriately.

 // level is the embedding level of a character. Even embedding levels indicate
 // left-to-right order and odd levels indicate right-to-left order. The special
 // level of -1 is reserved for undefined order.
 type level int8

 const implicitLevel level = -1

 // in returns if x is equal to any of the values in set.
 func (c Class) in(set ...Class) bool {
 	for _, s := range set {
 		if c == s {
 			return true
 		}
 	}
 	return false
 }

 // A paragraph contains the state of a paragraph.
 type paragraph struct {
 	initialTypes []Class

 	// Arrays of properties needed for paired bracket evaluation in N0
 	pairTypes  []bracketType // paired Bracket types for paragraph
 	pairValues []rune        // rune for opening bracket or pbOpen and pbClose; 0 for pbNone

 	embeddingLevel level // default: = implicitLevel;

 	// at the paragraph levels
 	resultTypes  []Class
 	resultLevels []level

 	// Index of matching PDI for isolate initiator characters. For other
 	// characters, the value of matchingPDI will be set to -1. For isolate
 	// initiators with no matching PDI, matchingPDI will be set to the length of
 	// the input string.
 	matchingPDI []int

 	// Index of matching isolate initiator for PDI characters. For other
 	// characters, and for PDIs with no matching isolate initiator, the value of
 	// matchingIsolateInitiator will be set to -1.
 	matchingIsolateInitiator []int
 }

 // newParagraph initializes a paragraph. The user needs to supply a few arrays
 // corresponding to the preprocessed text input. The types correspond to the
 // Unicode BiDi classes for each rune. pairTypes indicates the bracket type for
 // each rune. pairValues provides a unique bracket class identifier for each
 // rune (suggested is the rune of the open bracket for opening and matching
 // close brackets, after normalization). The embedding levels are optional, but
 // may be supplied to encode embedding levels of styled text.
 //
 // TODO: return an error.
 func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, levels level) *paragraph {
 	validateTypes(types)
 	validatePbTypes(pairTypes)
 	validatePbValues(pairValues, pairTypes)
 	validateParagraphEmbeddingLevel(levels)

 	p := &paragraph{
 		initialTypes:   append([]Class(nil), types...),
 		embeddingLevel: levels,

 		pairTypes:  pairTypes,
 		pairValues: pairValues,

 		resultTypes: append([]Class(nil), types...),
 	}
 	p.run()
 	return p
 }

 func (p *paragraph) Len() int { return len(p.initialTypes) }

 // The algorithm. Does not include line-based processing (Rules L1, L2).
 // These are applied later in the line-based phase of the algorithm.
 func (p *paragraph) run() {
 	p.determineMatchingIsolates()

 	// 1) determining the paragraph level
 	// Rule P1 is the requirement for entering this algorithm.
 	// Rules P2, P3.
 	// If no externally supplied paragraph embedding level, use default.
 	if p.embeddingLevel == implicitLevel {
 		p.embeddingLevel = p.determineParagraphEmbeddingLevel(0, p.Len())
 	}

 	// Initialize result levels to paragraph embedding level.
 	p.resultLevels = make([]level, p.Len())
 	setLevels(p.resultLevels, p.embeddingLevel)

 	// 2) Explicit levels and directions
 	// Rules X1-X8.
 	p.determineExplicitEmbeddingLevels()

 	// Rule X9.
 	// We do not remove the embeddings, the overrides, the PDFs, and the BNs
 	// from the string explicitly. But they are not copied into isolating run
 	// sequences when they are created, so they are removed for all
 	// practical purposes.

 	// Rule X10.
 	// Run remainder of algorithm one isolating run sequence at a time
 	for _, seq := range p.determineIsolatingRunSequences() {
 		// 3) resolving weak types
 		// Rules W1-W7.
 		seq.resolveWeakTypes()

 		// 4a) resolving paired brackets
 		// Rule N0
 		resolvePairedBrackets(seq)

 		// 4b) resolving neutral types
 		// Rules N1-N3.
 		seq.resolveNeutralTypes()

 		// 5) resolving implicit embedding levels
 		// Rules I1, I2.
 		seq.resolveImplicitLevels()

 		// Apply the computed levels and types
 		seq.applyLevelsAndTypes()
 	}

 	// Assign appropriate levels to 'hide' LREs, RLEs, LROs, RLOs, PDFs, and
 	// BNs. This is for convenience, so the resulting level array will have
 	// a value for every character.
 	p.assignLevelsToCharactersRemovedByX9()
 }

 // determineMatchingIsolates determines the matching PDI for each isolate
 // initiator and vice versa.
 //
 // Definition BD9.
 //
 // At the end of this function:
 //
 //  - The member variable matchingPDI is set to point to the index of the
 //    matching PDI character for each isolate initiator character. If there is
 //    no matching PDI, it is set to the length of the input text. For other
 //    characters, it is set to -1.
 //  - The member variable matchingIsolateInitiator is set to point to the
 //    index of the matching isolate initiator character for each PDI character.
 //    If there is no matching isolate initiator, or the character is not a PDI,
 //    it is set to -1.
 func (p *paragraph) determineMatchingIsolates() {
 	p.matchingPDI = make([]int, p.Len())
 	p.matchingIsolateInitiator = make([]int, p.Len())

 	for i := range p.matchingIsolateInitiator {
 		p.matchingIsolateInitiator[i] = -1
 	}

 	for i := range p.matchingPDI {
 		p.matchingPDI[i] = -1

 		if t := p.resultTypes[i]; t.in(LRI, RLI, FSI) {
 			depthCounter := 1
 			for j := i + 1; j < p.Len(); j++ {
 				if u := p.resultTypes[j]; u.in(LRI, RLI, FSI) {
 					depthCounter++
 				} else if u == PDI {
 					if depthCounter--; depthCounter == 0 {
 						p.matchingPDI[i] = j
 						p.matchingIsolateInitiator[j] = i
 						break
 					}
 				}
 			}
 			if p.matchingPDI[i] == -1 {
 				p.matchingPDI[i] = p.Len()
 			}
 		}
 	}
 }

 // determineParagraphEmbeddingLevel reports the resolved paragraph direction of
 // the substring limited by the given range [start, end).
 //
 // Determines the paragraph level based on rules P2, P3. This is also used
 // in rule X5c to find if an FSI should resolve to LRI or RLI.
 func (p *paragraph) determineParagraphEmbeddingLevel(start, end int) level {
 	var strongType Class = unknownClass

 	// Rule P2.
 	for i := start; i < end; i++ {
 		if t := p.resultTypes[i]; t.in(L, AL, R) {
 			strongType = t
 			break
 		} else if t.in(FSI, LRI, RLI) {
 			i = p.matchingPDI[i] // skip over to the matching PDI
 			if i > end {
 				log.Panic("assert (i <= end)")
 			}
 		}
 	}
 	// Rule P3.
 	switch strongType {
 	case unknownClass: // none found
 		// default embedding level when no strong types found is 0.
 		return 0
 	case L:
 		return 0
 	default: // AL, R
 		return 1
 	}
 }

 const maxDepth = 125

 // This stack will store the embedding levels and override and isolated
 // statuses
 type directionalStatusStack struct {
 	stackCounter        int
 	embeddingLevelStack [maxDepth + 1]level
 	overrideStatusStack [maxDepth + 1]Class
 	isolateStatusStack  [maxDepth + 1]bool
 }

 func (s *directionalStatusStack) empty()     { s.stackCounter = 0 }
 func (s *directionalStatusStack) pop()       { s.stackCounter-- }
 func (s *directionalStatusStack) depth() int { return s.stackCounter }

 func (s *directionalStatusStack) push(level level, overrideStatus Class, isolateStatus bool) {
 	s.embeddingLevelStack[s.stackCounter] = level
 	s.overrideStatusStack[s.stackCounter] = overrideStatus
 	s.isolateStatusStack[s.stackCounter] = isolateStatus
 	s.stackCounter++
 }

 func (s *directionalStatusStack) lastEmbeddingLevel() level {
 	return s.embeddingLevelStack[s.stackCounter-1]
 }

 func (s *directionalStatusStack) lastDirectionalOverrideStatus() Class {
 	return s.overrideStatusStack[s.stackCounter-1]
 }

 func (s *directionalStatusStack) lastDirectionalIsolateStatus() bool {
 	return s.isolateStatusStack[s.stackCounter-1]
 }

 // Determine explicit levels using rules X1 - X8
 func (p *paragraph) determineExplicitEmbeddingLevels() {
 	var stack directionalStatusStack
 	var overflowIsolateCount, overflowEmbeddingCount, validIsolateCount int

 	// Rule X1.
 	stack.push(p.embeddingLevel, ON, false)

 	for i, t := range p.resultTypes {
 		// Rules X2, X3, X4, X5, X5a, X5b, X5c
 		switch t {
 		case RLE, LRE, RLO, LRO, RLI, LRI, FSI:
 			isIsolate := t.in(RLI, LRI, FSI)
 			isRTL := t.in(RLE, RLO, RLI)

 			// override if this is an FSI that resolves to RLI
 			if t == FSI {
 				isRTL = (p.determineParagraphEmbeddingLevel(i+1, p.matchingPDI[i]) == 1)
 			}
 			if isIsolate {
 				p.resultLevels[i] = stack.lastEmbeddingLevel()
 				if stack.lastDirectionalOverrideStatus() != ON {
 					p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
 				}
 			}

 			var newLevel level
 			if isRTL {
 				// least greater odd
 				newLevel = (stack.lastEmbeddingLevel() + 1) | 1
 			} else {
 				// least greater even
 				newLevel = (stack.lastEmbeddingLevel() + 2) &^ 1
 			}

 			if newLevel <= maxDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0 {
 				if isIsolate {
 					validIsolateCount++
 				}
 				// Push new embedding level, override status, and isolated
 				// status.
 				// No check for valid stack counter, since the level check
 				// suffices.
 				switch t {
 				case LRO:
 					stack.push(newLevel, L, isIsolate)
 				case RLO:
 					stack.push(newLevel, R, isIsolate)
 				default:
 					stack.push(newLevel, ON, isIsolate)
 				}
 				// Not really part of the spec
 				if !isIsolate {
 					p.resultLevels[i] = newLevel
 				}
 			} else {
 				// This is an invalid explicit formatting character,
 				// so apply the "Otherwise" part of rules X2-X5b.
 				if isIsolate {
 					overflowIsolateCount++
 				} else { // !isIsolate
 					if overflowIsolateCount == 0 {
 						overflowEmbeddingCount++
 					}
 				}
 			}

 		// Rule X6a
 		case PDI:
 			if overflowIsolateCount > 0 {
 				overflowIsolateCount--
 			} else if validIsolateCount == 0 {
 				// do nothing
 			} else {
 				overflowEmbeddingCount = 0
 				for !stack.lastDirectionalIsolateStatus() {
 					stack.pop()
 				}
 				stack.pop()
 				validIsolateCount--
 			}
 			p.resultLevels[i] = stack.lastEmbeddingLevel()

 		// Rule X7
 		case PDF:
 			// Not really part of the spec
 			p.resultLevels[i] = stack.lastEmbeddingLevel()

 			if overflowIsolateCount > 0 {
 				// do nothing
 			} else if overflowEmbeddingCount > 0 {
 				overflowEmbeddingCount--
 			} else if !stack.lastDirectionalIsolateStatus() && stack.depth() >= 2 {
 				stack.pop()
 			}

 		case B: // paragraph separator.
 			// Rule X8.

 			// These values are reset for clarity, in this implementation B
 			// can only occur as the last code in the array.
 			stack.empty()
 			overflowIsolateCount = 0
 			overflowEmbeddingCount = 0
 			validIsolateCount = 0
 			p.resultLevels[i] = p.embeddingLevel

 		default:
 			p.resultLevels[i] = stack.lastEmbeddingLevel()
 			if stack.lastDirectionalOverrideStatus() != ON {
 				p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
 			}
 		}
 	}
 }

 type isolatingRunSequence struct {
 	p *paragraph

 	indexes []int // indexes to the original string

 	types          []Class // type of each character using the index
 	resolvedLevels []level // resolved levels after application of rules
 	level          level
 	sos, eos       Class
 }

 func (i *isolatingRunSequence) Len() int { return len(i.indexes) }

 func maxLevel(a, b level) level {
 	if a > b {
 		return a
 	}
 	return b
 }

 // Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types,
 // 			 either L or R, for each isolating run sequence.
 func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence {
 	length := len(indexes)
 	types := make([]Class, length)
 	for i, x := range indexes {
 		types[i] = p.resultTypes[x]
 	}

 	// assign level, sos and eos
 	prevChar := indexes[0] - 1
 	for prevChar >= 0 && isRemovedByX9(p.initialTypes[prevChar]) {
 		prevChar--
 	}
 	prevLevel := p.embeddingLevel
 	if prevChar >= 0 {
 		prevLevel = p.resultLevels[prevChar]
 	}

 	var succLevel level
 	lastType := types[length-1]
 	if lastType.in(LRI, RLI, FSI) {
 		succLevel = p.embeddingLevel
 	} else {
 		// the first character after the end of run sequence
 		limit := indexes[length-1] + 1
 		for ; limit < p.Len() && isRemovedByX9(p.initialTypes[limit]); limit++ {

 		}
 		succLevel = p.embeddingLevel
 		if limit < p.Len() {
 			succLevel = p.resultLevels[limit]
 		}
 	}
 	level := p.resultLevels[indexes[0]]
 	return &isolatingRunSequence{
 		p:       p,
 		indexes: indexes,
 		types:   types,
 		level:   level,
 		sos:     typeForLevel(maxLevel(prevLevel, level)),
 		eos:     typeForLevel(maxLevel(succLevel, level)),
 	}
 }

 // Resolving weak types Rules W1-W7.
 //
 // Note that some weak types (EN, AN) remain after this processing is
 // complete.
 func (s *isolatingRunSequence) resolveWeakTypes() {

 	// on entry, only these types remain
 	s.assertOnly(L, R, AL, EN, ES, ET, AN, CS, B, S, WS, ON, NSM, LRI, RLI, FSI, PDI)

 	// Rule W1.
 	// Changes all NSMs.
 	preceedingCharacterType := s.sos
 	for i, t := range s.types {
 		if t == NSM {
 			s.types[i] = preceedingCharacterType
 		} else {
 			if t.in(LRI, RLI, FSI, PDI) {
 				preceedingCharacterType = ON
 			}
 			preceedingCharacterType = t
 		}
 	}

 	// Rule W2.
 	// EN does not change at the start of the run, because sos != AL.
 	for i, t := range s.types {
 		if t == EN {
 			for j := i - 1; j >= 0; j-- {
 				if t := s.types[j]; t.in(L, R, AL) {
 					if t == AL {
 						s.types[i] = AN
 					}
 					break
 				}
 			}
 		}
 	}

 	// Rule W3.
 	for i, t := range s.types {
 		if t == AL {
 			s.types[i] = R
 		}
 	}

 	// Rule W4.
 	// Since there must be values on both sides for this rule to have an
 	// effect, the scan skips the first and last value.
 	//
 	// Although the scan proceeds left to right, and changes the type
 	// values in a way that would appear to affect the computations
 	// later in the scan, there is actually no problem. A change in the
 	// current value can only affect the value to its immediate right,
 	// and only affect it if it is ES or CS. But the current value can
 	// only change if the value to its right is not ES or CS. Thus
 	// either the current value will not change, or its change will have
 	// no effect on the remainder of the analysis.

 	for i := 1; i < s.Len()-1; i++ {
 		t := s.types[i]
 		if t == ES || t == CS {
 			prevSepType := s.types[i-1]
 			succSepType := s.types[i+1]
 			if prevSepType == EN && succSepType == EN {
 				s.types[i] = EN
 			} else if s.types[i] == CS && prevSepType == AN && succSepType == AN {
 				s.types[i] = AN
 			}
 		}
 	}

 	// Rule W5.
 	for i, t := range s.types {
 		if t == ET {
 			// locate end of sequence
 			runStart := i
 			runEnd := s.findRunLimit(runStart, ET)

 			// check values at ends of sequence
 			t := s.sos
 			if runStart > 0 {
 				t = s.types[runStart-1]
 			}
 			if t != EN {
 				t = s.eos
 				if runEnd < len(s.types) {
 					t = s.types[runEnd]
 				}
 			}
 			if t == EN {
 				setTypes(s.types[runStart:runEnd], EN)
 			}
 			// continue at end of sequence
 			i = runEnd
 		}
 	}

 	// Rule W6.
 	for i, t := range s.types {
 		if t.in(ES, ET, CS) {
 			s.types[i] = ON
 		}
 	}

 	// Rule W7.
 	for i, t := range s.types {
 		if t == EN {
 			// set default if we reach start of run
 			prevStrongType := s.sos
 			for j := i - 1; j >= 0; j-- {
 				t = s.types[j]
 				if t == L || t == R { // AL's have been changed to R
 					prevStrongType = t
 					break
 				}
 			}
 			if prevStrongType == L {
 				s.types[i] = L
 			}
 		}
 	}
 }

 // 6) resolving neutral types Rules N1-N2.
 func (s *isolatingRunSequence) resolveNeutralTypes() {

 	// on entry, only these types can be in resultTypes
 	s.assertOnly(L, R, EN, AN, B, S, WS, ON, RLI, LRI, FSI, PDI)

 	for i, t := range s.types {
 		switch t {
 		case WS, ON, B, S, RLI, LRI, FSI, PDI:
 			// find bounds of run of neutrals
 			runStart := i
 			runEnd := s.findRunLimit(runStart, B, S, WS, ON, RLI, LRI, FSI, PDI)

 			// determine effective types at ends of run
 			var leadType, trailType Class

 			// Note that the character found can only be L, R, AN, or
 			// EN.
 			if runStart == 0 {
 				leadType = s.sos
 			} else {
 				leadType = s.types[runStart-1]
 				if leadType.in(AN, EN) {
 					leadType = R
 				}
 			}
 			if runEnd == len(s.types) {
 				trailType = s.eos
 			} else {
 				trailType = s.types[runEnd]
 				if trailType.in(AN, EN) {
 					trailType = R
 				}
 			}

 			var resolvedType Class
 			if leadType == trailType {
 				// Rule N1.
 				resolvedType = leadType
 			} else {
 				// Rule N2.
 				// Notice the embedding level of the run is used, not
 				// the paragraph embedding level.
 				resolvedType = typeForLevel(s.level)
 			}

 			setTypes(s.types[runStart:runEnd], resolvedType)

 			// skip over run of (former) neutrals
 			i = runEnd
 		}
 	}
 }

 func setLevels(levels []level, newLevel level) {
 	for i := range levels {
 		levels[i] = newLevel
 	}
 }

 func setTypes(types []Class, newType Class) {
 	for i := range types {
 		types[i] = newType
 	}
 }

 // 7) resolving implicit embedding levels Rules I1, I2.
 func (s *isolatingRunSequence) resolveImplicitLevels() {

 	// on entry, only these types can be in resultTypes
 	s.assertOnly(L, R, EN, AN)

 	s.resolvedLevels = make([]level, len(s.types))
 	setLevels(s.resolvedLevels, s.level)

 	if (s.level & 1) == 0 { // even level
 		for i, t := range s.types {
 			// Rule I1.
 			if t == L {
 				// no change
 			} else if t == R {
 				s.resolvedLevels[i] += 1
 			} else { // t == AN || t == EN
 				s.resolvedLevels[i] += 2
 			}
 		}
 	} else { // odd level
 		for i, t := range s.types {
 			// Rule I2.
 			if t == R {
 				// no change
 			} else { // t == L || t == AN || t == EN
 				s.resolvedLevels[i] += 1
 			}
 		}
 	}
 }

 // Applies the levels and types resolved in rules W1-I2 to the
 // resultLevels array.
 func (s *isolatingRunSequence) applyLevelsAndTypes() {
 	for i, x := range s.indexes {
 		s.p.resultTypes[x] = s.types[i]
 		s.p.resultLevels[x] = s.resolvedLevels[i]
 	}
 }

 // Return the limit of the run consisting only of the types in validSet
 // starting at index. This checks the value at index, and will return
 // index if that value is not in validSet.
 func (s *isolatingRunSequence) findRunLimit(index int, validSet ...Class) int {
 loop:
 	for ; index < len(s.types); index++ {
 		t := s.types[index]
 		for _, valid := range validSet {
 			if t == valid {
 				continue loop
 			}
 		}
 		return index // didn't find a match in validSet
 	}
 	return len(s.types)
 }

 // Algorithm validation. Assert that all values in types are in the
 // provided set.
 func (s *isolatingRunSequence) assertOnly(codes ...Class) {
 loop:
 	for i, t := range s.types {
 		for _, c := range codes {
 			if t == c {
 				continue loop
 			}
 		}
 		log.Panicf("invalid bidi code %v present in assertOnly at position %d", t, s.indexes[i])
 	}
 }

 // determineLevelRuns returns an array of level runs. Each level run is
 // described as an array of indexes into the input string.
 //
 // Determines the level runs. Rule X9 will be applied in determining the
 // runs, in the way that makes sure the characters that are supposed to be
 // removed are not included in the runs.
 func (p *paragraph) determineLevelRuns() [][]int {
 	run := []int{}
 	allRuns := [][]int{}
 	currentLevel := implicitLevel

 	for i := range p.initialTypes {
 		if !isRemovedByX9(p.initialTypes[i]) {
 			if p.resultLevels[i] != currentLevel {
 				// we just encountered a new run; wrap up last run
 				if currentLevel >= 0 { // only wrap it up if there was a run
 					allRuns = append(allRuns, run)
 					run = nil
 				}
 				// Start new run
 				currentLevel = p.resultLevels[i]
 			}
 			run = append(run, i)
 		}
 	}
 	// Wrap up the final run, if any
 	if len(run) > 0 {
 		allRuns = append(allRuns, run)
 	}
 	return allRuns
 }

 // Definition BD13. Determine isolating run sequences.
 func (p *paragraph) determineIsolatingRunSequences() []*isolatingRunSequence {
 	levelRuns := p.determineLevelRuns()

 	// Compute the run that each character belongs to
 	runForCharacter := make([]int, p.Len())
 	for i, run := range levelRuns {
 		for _, index := range run {
 			runForCharacter[index] = i
 		}
 	}

 	sequences := []*isolatingRunSequence{}

 	var currentRunSequence []int

 	for _, run := range levelRuns {
 		first := run[0]
 		if p.initialTypes[first] != PDI || p.matchingIsolateInitiator[first] == -1 {
 			currentRunSequence = nil
 			// int run = i;
 			for {
 				// Copy this level run into currentRunSequence
 				currentRunSequence = append(currentRunSequence, run...)

 				last := currentRunSequence[len(currentRunSequence)-1]
 				lastT := p.initialTypes[last]
 				if lastT.in(LRI, RLI, FSI) && p.matchingPDI[last] != p.Len() {
 					run = levelRuns[runForCharacter[p.matchingPDI[last]]]
 				} else {
 					break
 				}
 			}
 			sequences = append(sequences, p.isolatingRunSequence(currentRunSequence))
 		}
 	}
 	return sequences
 }

 // Assign level information to characters removed by rule X9. This is for
 // ease of relating the level information to the original input data. Note
 // that the levels assigned to these codes are arbitrary, they're chosen so
 // as to avoid breaking level runs.
 func (p *paragraph) assignLevelsToCharactersRemovedByX9() {
 	for i, t := range p.initialTypes {
 		if t.in(LRE, RLE, LRO, RLO, PDF, BN) {
 			p.resultTypes[i] = t
 			p.resultLevels[i] = -1
 		}
 	}
 	// now propagate forward the levels information (could have
 	// propagated backward, the main thing is not to introduce a level
 	// break where one doesn't already exist).

 	if p.resultLevels[0] == -1 {
 		p.resultLevels[0] = p.embeddingLevel
 	}
 	for i := 1; i < len(p.initialTypes); i++ {
 		if p.resultLevels[i] == -1 {
 			p.resultLevels[i] = p.resultLevels[i-1]
 		}
 	}
 	// Embedding information is for informational purposes only so need not be
 	// adjusted.
 }

 //
 // Output
 //

 // getLevels computes levels array breaking lines at offsets in linebreaks.
 // Rule L1.
 //
 // The linebreaks array must include at least one value. The values must be
 // in strictly increasing order (no duplicates) between 1 and the length of
 // the text, inclusive. The last value must be the length of the text.
 func (p *paragraph) getLevels(linebreaks []int) []level {
 	// Note that since the previous processing has removed all
 	// P, S, and WS values from resultTypes, the values referred to
 	// in these rules are the initial types, before any processing
 	// has been applied (including processing of overrides).
 	//
 	// This example implementation has reinserted explicit format codes
 	// and BN, in order that the levels array correspond to the
 	// initial text. Their final placement is not normative.
 	// These codes are treated like WS in this implementation,
 	// so they don't interrupt sequences of WS.

 	validateLineBreaks(linebreaks, p.Len())

 	result := append([]level(nil), p.resultLevels...)

 	// don't worry about linebreaks since if there is a break within
 	// a series of WS values preceding S, the linebreak itself
 	// causes the reset.
 	for i, t := range p.initialTypes {
 		if t.in(B, S) {
 			// Rule L1, clauses one and two.
 			result[i] = p.embeddingLevel

 			// Rule L1, clause three.
 			for j := i - 1; j >= 0; j-- {
 				if isWhitespace(p.initialTypes[j]) { // including format codes
 					result[j] = p.embeddingLevel
 				} else {
 					break
 				}
 			}
 		}
 	}

 	// Rule L1, clause four.
 	start := 0
 	for _, limit := range linebreaks {
 		for j := limit - 1; j >= start; j-- {
 			if isWhitespace(p.initialTypes[j]) { // including format codes
 				result[j] = p.embeddingLevel
 			} else {
 				break
 			}
 		}
 		start = limit
 	}

 	return result
 }

 // getReordering returns the reordering of lines from a visual index to a
 // logical index for line breaks at the given offsets.
 //
 // Lines are concatenated from left to right. So for example, the fifth
 // character from the left on the third line is
 //
 // 		getReordering(linebreaks)[linebreaks[1] + 4]
 //
 // (linebreaks[1] is the position after the last character of the second
 // line, which is also the index of the first character on the third line,
 // and adding four gets the fifth character from the left).
 //
 // The linebreaks array must include at least one value. The values must be
 // in strictly increasing order (no duplicates) between 1 and the length of
 // the text, inclusive. The last value must be the length of the text.
 func (p *paragraph) getReordering(linebreaks []int) []int {
 	validateLineBreaks(linebreaks, p.Len())

 	return computeMultilineReordering(p.getLevels(linebreaks), linebreaks)
 }

 // Return multiline reordering array for a given level array. Reordering
 // does not occur across a line break.
 func computeMultilineReordering(levels []level, linebreaks []int) []int {
 	result := make([]int, len(levels))

 	start := 0
 	for _, limit := range linebreaks {
 		tempLevels := make([]level, limit-start)
 		copy(tempLevels, levels[start:])

 		for j, order := range computeReordering(tempLevels) {
 			result[start+j] = order + start
 		}
 		start = limit
 	}
 	return result
 }

 // Return reordering array for a given level array. This reorders a single
 // line. The reordering is a visual to logical map. For example, the
 // leftmost char is string.charAt(order[0]). Rule L2.
 func computeReordering(levels []level) []int {
 	result := make([]int, len(levels))
 	// initialize order
 	for i := range result {
 		result[i] = i
 	}

 	// locate highest level found on line.
 	// Note the rules say text, but no reordering across line bounds is
 	// performed, so this is sufficient.
 	highestLevel := level(0)
 	lowestOddLevel := level(maxDepth + 2)
 	for _, level := range levels {
 		if level > highestLevel {
 			highestLevel = level
 		}
 		if level&1 != 0 && level < lowestOddLevel {
 			lowestOddLevel = level
 		}
 	}

 	for level := highestLevel; level >= lowestOddLevel; level-- {
 		for i := 0; i < len(levels); i++ {
 			if levels[i] >= level {
 				// find range of text at or above this level
 				start := i
 				limit := i + 1
 				for limit < len(levels) && levels[limit] >= level {
 					limit++
 				}

 				for j, k := start, limit-1; j < k; j, k = j+1, k-1 {
 					result[j], result[k] = result[k], result[j]
 				}
 				// skip to end of level run
 				i = limit
 			}
 		}
 	}

 	return result
 }

 // isWhitespace reports whether the type is considered a whitespace type for the
 // line break rules.
 func isWhitespace(c Class) bool {
 	switch c {
 	case LRE, RLE, LRO, RLO, PDF, LRI, RLI, FSI, PDI, BN, WS:
 		return true
 	}
 	return false
 }

 // isRemovedByX9 reports whether the type is one of the types removed in X9.
 func isRemovedByX9(c Class) bool {
 	switch c {
 	case LRE, RLE, LRO, RLO, PDF, BN:
 		return true
 	}
 	return false
 }

 // typeForLevel reports the strong type (L or R) corresponding to the level.
 func typeForLevel(level level) Class {
 	if (level & 0x1) == 0 {
 		return L
 	}
 	return R
 }

 // TODO: change validation to not panic

 func validateTypes(types []Class) {
 	if len(types) == 0 {
 		log.Panic("types is null")
 	}
 	for i, t := range types[:len(types)-1] {
 		if t == B {
 			log.Panicf("B type before end of paragraph at index: %d", i)
 		}
 	}
 }

 func validateParagraphEmbeddingLevel(embeddingLevel level) {
 	if embeddingLevel != implicitLevel &&
 		embeddingLevel != 0 &&
 		embeddingLevel != 1 {
 		log.Panicf("illegal paragraph embedding level: %d", embeddingLevel)
 	}
 }

 func validateLineBreaks(linebreaks []int, textLength int) {
 	prev := 0
 	for i, next := range linebreaks {
 		if next <= prev {
 			log.Panicf("bad linebreak: %d at index: %d", next, i)
 		}
 		prev = next
 	}
 	if prev != textLength {
 		log.Panicf("last linebreak was %d, want %d", prev, textLength)
 	}
 }

 func validatePbTypes(pairTypes []bracketType) {
 	if len(pairTypes) == 0 {
 		log.Panic("pairTypes is null")
 	}
 	for i, pt := range pairTypes {
 		switch pt {
 		case bpNone, bpOpen, bpClose:
 		default:
 			log.Panicf("illegal pairType value at %d: %v", i, pairTypes[i])
 		}
 	}
 }

 func validatePbValues(pairValues []rune, pairTypes []bracketType) {
 	if pairValues == nil {
 		log.Panic("pairValues is null")
 	}
 	if len(pairTypes) != len(pairValues) {
 		log.Panic("pairTypes is different length from pairValues")
 	}
 }