src/regexp/onepass.go - go - Git at Google

 // Copyright 2014 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package regexp

 import (
 	"regexp/syntax"
 	"sort"
 	"strings"
 	"unicode"
 )

 // "One-pass" regexp execution.
 // Some regexps can be analyzed to determine that they never need
 // backtracking: they are guaranteed to run in one pass over the string
 // without bothering to save all the usual NFA state.
 // Detect those and execute them more quickly.

 // A onePassProg is a compiled one-pass regular expression program.
 // It is the same as syntax.Prog except for the use of onePassInst.
 type onePassProg struct {
 	Inst   []onePassInst
 	Start  int // index of start instruction
 	NumCap int // number of InstCapture insts in re
 }

 // A onePassInst is a single instruction in a one-pass regular expression program.
 // It is the same as syntax.Inst except for the new 'Next' field.
 type onePassInst struct {
 	syntax.Inst
 	Next []uint32
 }

 // OnePassPrefix returns a literal string that all matches for the
 // regexp must start with. Complete is true if the prefix
 // is the entire match. Pc is the index of the last rune instruction
 // in the string. The OnePassPrefix skips over the mandatory
 // EmptyBeginText
 func onePassPrefix(p *syntax.Prog) (prefix string, complete bool, pc uint32) {
 	i := &p.Inst[p.Start]
 	if i.Op != syntax.InstEmptyWidth || (syntax.EmptyOp(i.Arg))&syntax.EmptyBeginText == 0 {
 		return "", i.Op == syntax.InstMatch, uint32(p.Start)
 	}
 	pc = i.Out
 	i = &p.Inst[pc]
 	for i.Op == syntax.InstNop {
 		pc = i.Out
 		i = &p.Inst[pc]
 	}
 	// Avoid allocation of buffer if prefix is empty.
 	if iop(i) != syntax.InstRune || len(i.Rune) != 1 {
 		return "", i.Op == syntax.InstMatch, uint32(p.Start)
 	}

 	// Have prefix; gather characters.
 	var buf strings.Builder
 	for iop(i) == syntax.InstRune && len(i.Rune) == 1 && syntax.Flags(i.Arg)&syntax.FoldCase == 0 {
 		buf.WriteRune(i.Rune[0])
 		pc, i = i.Out, &p.Inst[i.Out]
 	}
 	if i.Op == syntax.InstEmptyWidth &&
 		syntax.EmptyOp(i.Arg)&syntax.EmptyEndText != 0 &&
 		p.Inst[i.Out].Op == syntax.InstMatch {
 		complete = true
 	}
 	return buf.String(), complete, pc
 }

 // OnePassNext selects the next actionable state of the prog, based on the input character.
 // It should only be called when i.Op == InstAlt or InstAltMatch, and from the one-pass machine.
 // One of the alternates may ultimately lead without input to end of line. If the instruction
 // is InstAltMatch the path to the InstMatch is in i.Out, the normal node in i.Next.
 func onePassNext(i *onePassInst, r rune) uint32 {
 	next := i.MatchRunePos(r)
 	if next >= 0 {
 		return i.Next[next]
 	}
 	if i.Op == syntax.InstAltMatch {
 		return i.Out
 	}
 	return 0
 }

 func iop(i *syntax.Inst) syntax.InstOp {
 	op := i.Op
 	switch op {
 	case syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
 		op = syntax.InstRune
 	}
 	return op
 }

 // Sparse Array implementation is used as a queueOnePass.
 type queueOnePass struct {
 	sparse          []uint32
 	dense           []uint32
 	size, nextIndex uint32
 }

 func (q *queueOnePass) empty() bool {
 	return q.nextIndex >= q.size
 }

 func (q *queueOnePass) next() (n uint32) {
 	n = q.dense[q.nextIndex]
 	q.nextIndex++
 	return
 }

 func (q *queueOnePass) clear() {
 	q.size = 0
 	q.nextIndex = 0
 }

 func (q *queueOnePass) contains(u uint32) bool {
 	if u >= uint32(len(q.sparse)) {
 		return false
 	}
 	return q.sparse[u] < q.size && q.dense[q.sparse[u]] == u
 }

 func (q *queueOnePass) insert(u uint32) {
 	if !q.contains(u) {
 		q.insertNew(u)
 	}
 }

 func (q *queueOnePass) insertNew(u uint32) {
 	if u >= uint32(len(q.sparse)) {
 		return
 	}
 	q.sparse[u] = q.size
 	q.dense[q.size] = u
 	q.size++
 }

 func newQueue(size int) (q *queueOnePass) {
 	return &queueOnePass{
 		sparse: make([]uint32, size),
 		dense:  make([]uint32, size),
 	}
 }

 // mergeRuneSets merges two non-intersecting runesets, and returns the merged result,
 // and a NextIp array. The idea is that if a rune matches the OnePassRunes at index
 // i, NextIp[i/2] is the target. If the input sets intersect, an empty runeset and a
 // NextIp array with the single element mergeFailed is returned.
 // The code assumes that both inputs contain ordered and non-intersecting rune pairs.
 const mergeFailed = uint32(0xffffffff)

 var (
 	noRune = []rune{}
 	noNext = []uint32{mergeFailed}
 )

 func mergeRuneSets(leftRunes, rightRunes *[]rune, leftPC, rightPC uint32) ([]rune, []uint32) {
 	leftLen := len(*leftRunes)
 	rightLen := len(*rightRunes)
 	if leftLen&0x1 != 0 || rightLen&0x1 != 0 {
 		panic("mergeRuneSets odd length []rune")
 	}
 	var (
 		lx, rx int
 	)
 	merged := make([]rune, 0)
 	next := make([]uint32, 0)
 	ok := true
 	defer func() {
 		if !ok {
 			merged = nil
 			next = nil
 		}
 	}()

 	ix := -1
 	extend := func(newLow *int, newArray *[]rune, pc uint32) bool {
 		if ix > 0 && (*newArray)[*newLow] <= merged[ix] {
 			return false
 		}
 		merged = append(merged, (*newArray)[*newLow], (*newArray)[*newLow+1])
 		*newLow += 2
 		ix += 2
 		next = append(next, pc)
 		return true
 	}

 	for lx < leftLen || rx < rightLen {
 		switch {
 		case rx >= rightLen:
 			ok = extend(&lx, leftRunes, leftPC)
 		case lx >= leftLen:
 			ok = extend(&rx, rightRunes, rightPC)
 		case (*rightRunes)[rx] < (*leftRunes)[lx]:
 			ok = extend(&rx, rightRunes, rightPC)
 		default:
 			ok = extend(&lx, leftRunes, leftPC)
 		}
 		if !ok {
 			return noRune, noNext
 		}
 	}
 	return merged, next
 }

 // cleanupOnePass drops working memory, and restores certain shortcut instructions.
 func cleanupOnePass(prog *onePassProg, original *syntax.Prog) {
 	for ix, instOriginal := range original.Inst {
 		switch instOriginal.Op {
 		case syntax.InstAlt, syntax.InstAltMatch, syntax.InstRune:
 		case syntax.InstCapture, syntax.InstEmptyWidth, syntax.InstNop, syntax.InstMatch, syntax.InstFail:
 			prog.Inst[ix].Next = nil
 		case syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
 			prog.Inst[ix].Next = nil
 			prog.Inst[ix] = onePassInst{Inst: instOriginal}
 		}
 	}
 }

 // onePassCopy creates a copy of the original Prog, as we'll be modifying it
 func onePassCopy(prog *syntax.Prog) *onePassProg {
 	p := &onePassProg{
 		Start:  prog.Start,
 		NumCap: prog.NumCap,
 		Inst:   make([]onePassInst, len(prog.Inst)),
 	}
 	for i, inst := range prog.Inst {
 		p.Inst[i] = onePassInst{Inst: inst}
 	}

 	// rewrites one or more common Prog constructs that enable some otherwise
 	// non-onepass Progs to be onepass. A:BD (for example) means an InstAlt at
 	// ip A, that points to ips B & C.
 	// A:BC + B:DA => A:BC + B:CD
 	// A:BC + B:DC => A:DC + B:DC
 	for pc := range p.Inst {
 		switch p.Inst[pc].Op {
 		default:
 			continue
 		case syntax.InstAlt, syntax.InstAltMatch:
 			// A:Bx + B:Ay
 			p_A_Other := &p.Inst[pc].Out
 			p_A_Alt := &p.Inst[pc].Arg
 			// make sure a target is another Alt
 			instAlt := p.Inst[*p_A_Alt]
 			if !(instAlt.Op == syntax.InstAlt || instAlt.Op == syntax.InstAltMatch) {
 				p_A_Alt, p_A_Other = p_A_Other, p_A_Alt
 				instAlt = p.Inst[*p_A_Alt]
 				if !(instAlt.Op == syntax.InstAlt || instAlt.Op == syntax.InstAltMatch) {
 					continue
 				}
 			}
 			instOther := p.Inst[*p_A_Other]
 			// Analyzing both legs pointing to Alts is for another day
 			if instOther.Op == syntax.InstAlt || instOther.Op == syntax.InstAltMatch {
 				// too complicated
 				continue
 			}
 			// simple empty transition loop
 			// A:BC + B:DA => A:BC + B:DC
 			p_B_Alt := &p.Inst[*p_A_Alt].Out
 			p_B_Other := &p.Inst[*p_A_Alt].Arg
 			patch := false
 			if instAlt.Out == uint32(pc) {
 				patch = true
 			} else if instAlt.Arg == uint32(pc) {
 				patch = true
 				p_B_Alt, p_B_Other = p_B_Other, p_B_Alt
 			}
 			if patch {
 				*p_B_Alt = *p_A_Other
 			}

 			// empty transition to common target
 			// A:BC + B:DC => A:DC + B:DC
 			if *p_A_Other == *p_B_Alt {
 				*p_A_Alt = *p_B_Other
 			}
 		}
 	}
 	return p
 }

 // runeSlice exists to permit sorting the case-folded rune sets.
 type runeSlice []rune

 func (p runeSlice) Len() int           { return len(p) }
 func (p runeSlice) Less(i, j int) bool { return p[i] < p[j] }
 func (p runeSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }

 var anyRuneNotNL = []rune{0, '\n' - 1, '\n' + 1, unicode.MaxRune}
 var anyRune = []rune{0, unicode.MaxRune}

 // makeOnePass creates a onepass Prog, if possible. It is possible if at any alt,
 // the match engine can always tell which branch to take. The routine may modify
 // p if it is turned into a onepass Prog. If it isn't possible for this to be a
 // onepass Prog, the Prog nil is returned. makeOnePass is recursive
 // to the size of the Prog.
 func makeOnePass(p *onePassProg) *onePassProg {
 	// If the machine is very long, it's not worth the time to check if we can use one pass.
 	if len(p.Inst) >= 1000 {
 		return nil
 	}

 	var (
 		instQueue    = newQueue(len(p.Inst))
 		visitQueue   = newQueue(len(p.Inst))
 		check        func(uint32, []bool) bool
 		onePassRunes = make([][]rune, len(p.Inst))
 	)

 	// check that paths from Alt instructions are unambiguous, and rebuild the new
 	// program as a onepass program
 	check = func(pc uint32, m []bool) (ok bool) {
 		ok = true
 		inst := &p.Inst[pc]
 		if visitQueue.contains(pc) {
 			return
 		}
 		visitQueue.insert(pc)
 		switch inst.Op {
 		case syntax.InstAlt, syntax.InstAltMatch:
 			ok = check(inst.Out, m) && check(inst.Arg, m)
 			// check no-input paths to InstMatch
 			matchOut := m[inst.Out]
 			matchArg := m[inst.Arg]
 			if matchOut && matchArg {
 				ok = false
 				break
 			}
 			// Match on empty goes in inst.Out
 			if matchArg {
 				inst.Out, inst.Arg = inst.Arg, inst.Out
 				matchOut, matchArg = matchArg, matchOut
 			}
 			if matchOut {
 				m[pc] = true
 				inst.Op = syntax.InstAltMatch
 			}

 			// build a dispatch operator from the two legs of the alt.
 			onePassRunes[pc], inst.Next = mergeRuneSets(
 				&onePassRunes[inst.Out], &onePassRunes[inst.Arg], inst.Out, inst.Arg)
 			if len(inst.Next) > 0 && inst.Next[0] == mergeFailed {
 				ok = false
 				break
 			}
 		case syntax.InstCapture, syntax.InstNop:
 			ok = check(inst.Out, m)
 			m[pc] = m[inst.Out]
 			// pass matching runes back through these no-ops.
 			onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...)
 			inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
 			for i := range inst.Next {
 				inst.Next[i] = inst.Out
 			}
 		case syntax.InstEmptyWidth:
 			ok = check(inst.Out, m)
 			m[pc] = m[inst.Out]
 			onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...)
 			inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
 			for i := range inst.Next {
 				inst.Next[i] = inst.Out
 			}
 		case syntax.InstMatch, syntax.InstFail:
 			m[pc] = inst.Op == syntax.InstMatch
 		case syntax.InstRune:
 			m[pc] = false
 			if len(inst.Next) > 0 {
 				break
 			}
 			instQueue.insert(inst.Out)
 			if len(inst.Rune) == 0 {
 				onePassRunes[pc] = []rune{}
 				inst.Next = []uint32{inst.Out}
 				break
 			}
 			runes := make([]rune, 0)
 			if len(inst.Rune) == 1 && syntax.Flags(inst.Arg)&syntax.FoldCase != 0 {
 				r0 := inst.Rune[0]
 				runes = append(runes, r0, r0)
 				for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
 					runes = append(runes, r1, r1)
 				}
 				sort.Sort(runeSlice(runes))
 			} else {
 				runes = append(runes, inst.Rune...)
 			}
 			onePassRunes[pc] = runes
 			inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
 			for i := range inst.Next {
 				inst.Next[i] = inst.Out
 			}
 			inst.Op = syntax.InstRune
 		case syntax.InstRune1:
 			m[pc] = false
 			if len(inst.Next) > 0 {
 				break
 			}
 			instQueue.insert(inst.Out)
 			runes := []rune{}
 			// expand case-folded runes
 			if syntax.Flags(inst.Arg)&syntax.FoldCase != 0 {
 				r0 := inst.Rune[0]
 				runes = append(runes, r0, r0)
 				for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
 					runes = append(runes, r1, r1)
 				}
 				sort.Sort(runeSlice(runes))
 			} else {
 				runes = append(runes, inst.Rune[0], inst.Rune[0])
 			}
 			onePassRunes[pc] = runes
 			inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
 			for i := range inst.Next {
 				inst.Next[i] = inst.Out
 			}
 			inst.Op = syntax.InstRune
 		case syntax.InstRuneAny:
 			m[pc] = false
 			if len(inst.Next) > 0 {
 				break
 			}
 			instQueue.insert(inst.Out)
 			onePassRunes[pc] = append([]rune{}, anyRune...)
 			inst.Next = []uint32{inst.Out}
 		case syntax.InstRuneAnyNotNL:
 			m[pc] = false
 			if len(inst.Next) > 0 {
 				break
 			}
 			instQueue.insert(inst.Out)
 			onePassRunes[pc] = append([]rune{}, anyRuneNotNL...)
 			inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
 			for i := range inst.Next {
 				inst.Next[i] = inst.Out
 			}
 		}
 		return
 	}

 	instQueue.clear()
 	instQueue.insert(uint32(p.Start))
 	m := make([]bool, len(p.Inst))
 	for !instQueue.empty() {
 		visitQueue.clear()
 		pc := instQueue.next()
 		if !check(pc, m) {
 			p = nil
 			break
 		}
 	}
 	if p != nil {
 		for i := range p.Inst {
 			p.Inst[i].Rune = onePassRunes[i]
 		}
 	}
 	return p
 }

 // compileOnePass returns a new *syntax.Prog suitable for onePass execution if the original Prog
 // can be recharacterized as a one-pass regexp program, or syntax.nil if the
 // Prog cannot be converted. For a one pass prog, the fundamental condition that must
 // be true is: at any InstAlt, there must be no ambiguity about what branch to  take.
 func compileOnePass(prog *syntax.Prog) (p *onePassProg) {
 	if prog.Start == 0 {
 		return nil
 	}
 	// onepass regexp is anchored
 	if prog.Inst[prog.Start].Op != syntax.InstEmptyWidth ||
 		syntax.EmptyOp(prog.Inst[prog.Start].Arg)&syntax.EmptyBeginText != syntax.EmptyBeginText {
 		return nil
 	}
 	// every instruction leading to InstMatch must be EmptyEndText
 	for _, inst := range prog.Inst {
 		opOut := prog.Inst[inst.Out].Op
 		switch inst.Op {
 		default:
 			if opOut == syntax.InstMatch {
 				return nil
 			}
 		case syntax.InstAlt, syntax.InstAltMatch:
 			if opOut == syntax.InstMatch || prog.Inst[inst.Arg].Op == syntax.InstMatch {
 				return nil
 			}
 		case syntax.InstEmptyWidth:
 			if opOut == syntax.InstMatch {
 				if syntax.EmptyOp(inst.Arg)&syntax.EmptyEndText == syntax.EmptyEndText {
 					continue
 				}
 				return nil
 			}
 		}
 	}
 	// Creates a slightly optimized copy of the original Prog
 	// that cleans up some Prog idioms that block valid onepass programs
 	p = onePassCopy(prog)

 	// checkAmbiguity on InstAlts, build onepass Prog if possible
 	p = makeOnePass(p)

 	if p != nil {
 		cleanupOnePass(p, prog)
 	}
 	return p
 }
	// Copyright 2014 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package regexp

	import (
	"regexp/syntax"
	"sort"
	"strings"
	"unicode"
	)

	// "One-pass" regexp execution.
	// Some regexps can be analyzed to determine that they never need
	// backtracking: they are guaranteed to run in one pass over the string
	// without bothering to save all the usual NFA state.
	// Detect those and execute them more quickly.

	// A onePassProg is a compiled one-pass regular expression program.
	// It is the same as syntax.Prog except for the use of onePassInst.
	type onePassProg struct {
	Inst []onePassInst
	Start int // index of start instruction
	NumCap int // number of InstCapture insts in re
	}

	// A onePassInst is a single instruction in a one-pass regular expression program.
	// It is the same as syntax.Inst except for the new 'Next' field.
	type onePassInst struct {
	syntax.Inst
	Next []uint32
	}

	// OnePassPrefix returns a literal string that all matches for the
	// regexp must start with. Complete is true if the prefix
	// is the entire match. Pc is the index of the last rune instruction
	// in the string. The OnePassPrefix skips over the mandatory
	// EmptyBeginText
	func onePassPrefix(p *syntax.Prog) (prefix string, complete bool, pc uint32) {
	i := &p.Inst[p.Start]
	if i.Op != syntax.InstEmptyWidth \|\| (syntax.EmptyOp(i.Arg))&syntax.EmptyBeginText == 0 {
	return "", i.Op == syntax.InstMatch, uint32(p.Start)
	}
	pc = i.Out
	i = &p.Inst[pc]
	for i.Op == syntax.InstNop {
	pc = i.Out
	i = &p.Inst[pc]
	}
	// Avoid allocation of buffer if prefix is empty.
	if iop(i) != syntax.InstRune \|\| len(i.Rune) != 1 {
	return "", i.Op == syntax.InstMatch, uint32(p.Start)
	}

	// Have prefix; gather characters.
	var buf strings.Builder
	for iop(i) == syntax.InstRune && len(i.Rune) == 1 && syntax.Flags(i.Arg)&syntax.FoldCase == 0 {
	buf.WriteRune(i.Rune[0])
	pc, i = i.Out, &p.Inst[i.Out]
	}
	if i.Op == syntax.InstEmptyWidth &&
	syntax.EmptyOp(i.Arg)&syntax.EmptyEndText != 0 &&
	p.Inst[i.Out].Op == syntax.InstMatch {
	complete = true
	}
	return buf.String(), complete, pc
	}

	// OnePassNext selects the next actionable state of the prog, based on the input character.
	// It should only be called when i.Op == InstAlt or InstAltMatch, and from the one-pass machine.
	// One of the alternates may ultimately lead without input to end of line. If the instruction
	// is InstAltMatch the path to the InstMatch is in i.Out, the normal node in i.Next.
	func onePassNext(i *onePassInst, r rune) uint32 {
	next := i.MatchRunePos(r)
	if next >= 0 {
	return i.Next[next]
	}
	if i.Op == syntax.InstAltMatch {
	return i.Out
	}
	return 0
	}

	func iop(i *syntax.Inst) syntax.InstOp {
	op := i.Op
	switch op {
	case syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
	op = syntax.InstRune
	}
	return op
	}

	// Sparse Array implementation is used as a queueOnePass.
	type queueOnePass struct {
	sparse []uint32
	dense []uint32
	size, nextIndex uint32
	}

	func (q *queueOnePass) empty() bool {
	return q.nextIndex >= q.size
	}

	func (q *queueOnePass) next() (n uint32) {
	n = q.dense[q.nextIndex]
	q.nextIndex++
	return
	}

	func (q *queueOnePass) clear() {
	q.size = 0
	q.nextIndex = 0
	}

	func (q *queueOnePass) contains(u uint32) bool {
	if u >= uint32(len(q.sparse)) {
	return false
	}
	return q.sparse[u] < q.size && q.dense[q.sparse[u]] == u
	}

	func (q *queueOnePass) insert(u uint32) {
	if !q.contains(u) {
	q.insertNew(u)
	}
	}

	func (q *queueOnePass) insertNew(u uint32) {
	if u >= uint32(len(q.sparse)) {
	return
	}
	q.sparse[u] = q.size
	q.dense[q.size] = u
	q.size++
	}

	func newQueue(size int) (q *queueOnePass) {
	return &queueOnePass{
	sparse: make([]uint32, size),
	dense: make([]uint32, size),
	}
	}

	// mergeRuneSets merges two non-intersecting runesets, and returns the merged result,
	// and a NextIp array. The idea is that if a rune matches the OnePassRunes at index
	// i, NextIp[i/2] is the target. If the input sets intersect, an empty runeset and a
	// NextIp array with the single element mergeFailed is returned.
	// The code assumes that both inputs contain ordered and non-intersecting rune pairs.
	const mergeFailed = uint32(0xffffffff)

	var (
	noRune = []rune{}
	noNext = []uint32{mergeFailed}
	)

	func mergeRuneSets(leftRunes, rightRunes *[]rune, leftPC, rightPC uint32) ([]rune, []uint32) {
	leftLen := len(*leftRunes)
	rightLen := len(*rightRunes)
	if leftLen&0x1 != 0 \|\| rightLen&0x1 != 0 {
	panic("mergeRuneSets odd length []rune")
	}
	var (
	lx, rx int
	)
	merged := make([]rune, 0)
	next := make([]uint32, 0)
	ok := true
	defer func() {
	if !ok {
	merged = nil
	next = nil
	}
	}()

	ix := -1
	extend := func(newLow int, newArray []rune, pc uint32) bool {
	if ix > 0 && (newArray)[newLow] <= merged[ix] {
	return false
	}
	merged = append(merged, (newArray)[newLow], (newArray)[newLow+1])
	*newLow += 2
	ix += 2
	next = append(next, pc)
	return true
	}

	for lx < leftLen \|\| rx < rightLen {
	switch {
	case rx >= rightLen:
	ok = extend(&lx, leftRunes, leftPC)
	case lx >= leftLen:
	ok = extend(&rx, rightRunes, rightPC)
	case (rightRunes)[rx] < (leftRunes)[lx]:
	ok = extend(&rx, rightRunes, rightPC)
	default:
	ok = extend(&lx, leftRunes, leftPC)
	}
	if !ok {
	return noRune, noNext
	}
	}
	return merged, next
	}

	// cleanupOnePass drops working memory, and restores certain shortcut instructions.
	func cleanupOnePass(prog onePassProg, original syntax.Prog) {
	for ix, instOriginal := range original.Inst {
	switch instOriginal.Op {
	case syntax.InstAlt, syntax.InstAltMatch, syntax.InstRune:
	case syntax.InstCapture, syntax.InstEmptyWidth, syntax.InstNop, syntax.InstMatch, syntax.InstFail:
	prog.Inst[ix].Next = nil
	case syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
	prog.Inst[ix].Next = nil
	prog.Inst[ix] = onePassInst{Inst: instOriginal}
	}
	}
	}

	// onePassCopy creates a copy of the original Prog, as we'll be modifying it
	func onePassCopy(prog syntax.Prog) onePassProg {
	p := &onePassProg{
	Start: prog.Start,
	NumCap: prog.NumCap,
	Inst: make([]onePassInst, len(prog.Inst)),
	}
	for i, inst := range prog.Inst {
	p.Inst[i] = onePassInst{Inst: inst}
	}

	// rewrites one or more common Prog constructs that enable some otherwise
	// non-onepass Progs to be onepass. A:BD (for example) means an InstAlt at
	// ip A, that points to ips B & C.
	// A:BC + B:DA => A:BC + B:CD
	// A:BC + B:DC => A:DC + B:DC
	for pc := range p.Inst {
	switch p.Inst[pc].Op {
	default:
	continue
	case syntax.InstAlt, syntax.InstAltMatch:
	// A:Bx + B:Ay
	p_A_Other := &p.Inst[pc].Out
	p_A_Alt := &p.Inst[pc].Arg
	// make sure a target is another Alt
	instAlt := p.Inst[*p_A_Alt]
	if !(instAlt.Op == syntax.InstAlt \|\| instAlt.Op == syntax.InstAltMatch) {
	p_A_Alt, p_A_Other = p_A_Other, p_A_Alt
	instAlt = p.Inst[*p_A_Alt]
	if !(instAlt.Op == syntax.InstAlt \|\| instAlt.Op == syntax.InstAltMatch) {
	continue
	}
	}
	instOther := p.Inst[*p_A_Other]
	// Analyzing both legs pointing to Alts is for another day
	if instOther.Op == syntax.InstAlt \|\| instOther.Op == syntax.InstAltMatch {
	// too complicated
	continue
	}
	// simple empty transition loop
	// A:BC + B:DA => A:BC + B:DC
	p_B_Alt := &p.Inst[*p_A_Alt].Out
	p_B_Other := &p.Inst[*p_A_Alt].Arg
	patch := false
	if instAlt.Out == uint32(pc) {
	patch = true
	} else if instAlt.Arg == uint32(pc) {
	patch = true
	p_B_Alt, p_B_Other = p_B_Other, p_B_Alt
	}
	if patch {
	p_B_Alt = p_A_Other
	}

	// empty transition to common target
	// A:BC + B:DC => A:DC + B:DC
	if p_A_Other == p_B_Alt {
	p_A_Alt = p_B_Other
	}
	}
	}
	return p
	}

	// runeSlice exists to permit sorting the case-folded rune sets.
	type runeSlice []rune

	func (p runeSlice) Len() int { return len(p) }
	func (p runeSlice) Less(i, j int) bool { return p[i] < p[j] }
	func (p runeSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }

	var anyRuneNotNL = []rune{0, '\n' - 1, '\n' + 1, unicode.MaxRune}
	var anyRune = []rune{0, unicode.MaxRune}

	// makeOnePass creates a onepass Prog, if possible. It is possible if at any alt,
	// the match engine can always tell which branch to take. The routine may modify
	// p if it is turned into a onepass Prog. If it isn't possible for this to be a
	// onepass Prog, the Prog nil is returned. makeOnePass is recursive
	// to the size of the Prog.
	func makeOnePass(p onePassProg) onePassProg {
	// If the machine is very long, it's not worth the time to check if we can use one pass.
	if len(p.Inst) >= 1000 {
	return nil
	}

	var (
	instQueue = newQueue(len(p.Inst))
	visitQueue = newQueue(len(p.Inst))
	check func(uint32, []bool) bool
	onePassRunes = make([][]rune, len(p.Inst))
	)

	// check that paths from Alt instructions are unambiguous, and rebuild the new
	// program as a onepass program
	check = func(pc uint32, m []bool) (ok bool) {
	ok = true
	inst := &p.Inst[pc]
	if visitQueue.contains(pc) {
	return
	}
	visitQueue.insert(pc)
	switch inst.Op {
	case syntax.InstAlt, syntax.InstAltMatch:
	ok = check(inst.Out, m) && check(inst.Arg, m)
	// check no-input paths to InstMatch
	matchOut := m[inst.Out]
	matchArg := m[inst.Arg]
	if matchOut && matchArg {
	ok = false
	break
	}
	// Match on empty goes in inst.Out
	if matchArg {
	inst.Out, inst.Arg = inst.Arg, inst.Out
	matchOut, matchArg = matchArg, matchOut
	}
	if matchOut {
	m[pc] = true
	inst.Op = syntax.InstAltMatch
	}

	// build a dispatch operator from the two legs of the alt.
	onePassRunes[pc], inst.Next = mergeRuneSets(
	&onePassRunes[inst.Out], &onePassRunes[inst.Arg], inst.Out, inst.Arg)
	if len(inst.Next) > 0 && inst.Next[0] == mergeFailed {
	ok = false
	break
	}
	case syntax.InstCapture, syntax.InstNop:
	ok = check(inst.Out, m)
	m[pc] = m[inst.Out]
	// pass matching runes back through these no-ops.
	onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...)
	inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
	for i := range inst.Next {
	inst.Next[i] = inst.Out
	}
	case syntax.InstEmptyWidth:
	ok = check(inst.Out, m)
	m[pc] = m[inst.Out]
	onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...)
	inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
	for i := range inst.Next {
	inst.Next[i] = inst.Out
	}
	case syntax.InstMatch, syntax.InstFail:
	m[pc] = inst.Op == syntax.InstMatch
	case syntax.InstRune:
	m[pc] = false
	if len(inst.Next) > 0 {
	break
	}
	instQueue.insert(inst.Out)
	if len(inst.Rune) == 0 {
	onePassRunes[pc] = []rune{}
	inst.Next = []uint32{inst.Out}
	break
	}
	runes := make([]rune, 0)
	if len(inst.Rune) == 1 && syntax.Flags(inst.Arg)&syntax.FoldCase != 0 {
	r0 := inst.Rune[0]
	runes = append(runes, r0, r0)
	for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
	runes = append(runes, r1, r1)
	}
	sort.Sort(runeSlice(runes))
	} else {
	runes = append(runes, inst.Rune...)
	}
	onePassRunes[pc] = runes
	inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
	for i := range inst.Next {
	inst.Next[i] = inst.Out
	}
	inst.Op = syntax.InstRune
	case syntax.InstRune1:
	m[pc] = false
	if len(inst.Next) > 0 {
	break
	}
	instQueue.insert(inst.Out)
	runes := []rune{}
	// expand case-folded runes
	if syntax.Flags(inst.Arg)&syntax.FoldCase != 0 {
	r0 := inst.Rune[0]
	runes = append(runes, r0, r0)
	for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
	runes = append(runes, r1, r1)
	}
	sort.Sort(runeSlice(runes))
	} else {
	runes = append(runes, inst.Rune[0], inst.Rune[0])
	}
	onePassRunes[pc] = runes
	inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
	for i := range inst.Next {
	inst.Next[i] = inst.Out
	}
	inst.Op = syntax.InstRune
	case syntax.InstRuneAny:
	m[pc] = false
	if len(inst.Next) > 0 {
	break
	}
	instQueue.insert(inst.Out)
	onePassRunes[pc] = append([]rune{}, anyRune...)
	inst.Next = []uint32{inst.Out}
	case syntax.InstRuneAnyNotNL:
	m[pc] = false
	if len(inst.Next) > 0 {
	break
	}
	instQueue.insert(inst.Out)
	onePassRunes[pc] = append([]rune{}, anyRuneNotNL...)
	inst.Next = make([]uint32, len(onePassRunes[pc])/2+1)
	for i := range inst.Next {
	inst.Next[i] = inst.Out
	}
	}
	return
	}

	instQueue.clear()
	instQueue.insert(uint32(p.Start))
	m := make([]bool, len(p.Inst))
	for !instQueue.empty() {
	visitQueue.clear()
	pc := instQueue.next()
	if !check(pc, m) {
	p = nil
	break
	}
	}
	if p != nil {
	for i := range p.Inst {
	p.Inst[i].Rune = onePassRunes[i]
	}
	}
	return p
	}

	// compileOnePass returns a new *syntax.Prog suitable for onePass execution if the original Prog
	// can be recharacterized as a one-pass regexp program, or syntax.nil if the
	// Prog cannot be converted. For a one pass prog, the fundamental condition that must
	// be true is: at any InstAlt, there must be no ambiguity about what branch to take.
	func compileOnePass(prog syntax.Prog) (p onePassProg) {
	if prog.Start == 0 {
	return nil
	}
	// onepass regexp is anchored
	if prog.Inst[prog.Start].Op != syntax.InstEmptyWidth \|\|
	syntax.EmptyOp(prog.Inst[prog.Start].Arg)&syntax.EmptyBeginText != syntax.EmptyBeginText {
	return nil
	}
	// every instruction leading to InstMatch must be EmptyEndText
	for _, inst := range prog.Inst {
	opOut := prog.Inst[inst.Out].Op
	switch inst.Op {
	default:
	if opOut == syntax.InstMatch {
	return nil
	}
	case syntax.InstAlt, syntax.InstAltMatch:
	if opOut == syntax.InstMatch \|\| prog.Inst[inst.Arg].Op == syntax.InstMatch {
	return nil
	}
	case syntax.InstEmptyWidth:
	if opOut == syntax.InstMatch {
	if syntax.EmptyOp(inst.Arg)&syntax.EmptyEndText == syntax.EmptyEndText {
	continue
	}
	return nil
	}
	}
	}
	// Creates a slightly optimized copy of the original Prog
	// that cleans up some Prog idioms that block valid onepass programs
	p = onePassCopy(prog)

	// checkAmbiguity on InstAlts, build onepass Prog if possible
	p = makeOnePass(p)

	if p != nil {
	cleanupOnePass(p, prog)
	}
	return p
	}