regexp: hide one-pass code from exported API Update #8112 Hide one-pass regexp API. This means moving the code from regexp/syntax to regexp, but it avoids being locked into the specific API chosen for the implementation. It also removes a slice field from the syntax.Inst, which should avoid bloating the memory footprint of a non-one-pass regexp unnecessarily. LGTM=r R=golang-codereviews, r CC=golang-codereviews, iant https://golang.org/cl/98610046

commit: 6c10e64a906039678f5dffda8b453005ffcdb92e [log] [tgz]
author: Russ Cox <rsc@golang.org> Wed May 28 14:08:44 2014 -0400
committer: Russ Cox <rsc@golang.org> Wed May 28 14:08:44 2014 -0400
tree: 0e7aa14f692e05d57180593035367aaad4a24a29
parent: 0782ee3ad57a21bd3566f20e76e4e453613e7a23 [diff]
diff --git a/src/pkg/regexp/exec.go b/src/pkg/regexp/exec.go
index 22c33cf..c4cb201 100644
--- a/src/pkg/regexp/exec.go
+++ b/src/pkg/regexp/exec.go

@@ -37,7 +37,7 @@
 type machine struct {
 	re       *Regexp      // corresponding Regexp
 	p        *syntax.Prog // compiled program
-	op       *syntax.Prog // compiled onepass program, or syntax.NotOnePass
+	op       *onePassProg // compiled onepass program, or notOnePass
 	q0, q1   queue        // two queues for runq, nextq
 	pool     []*thread    // pool of available threads
 	matched  bool         // whether a match was found
@@ -67,7 +67,7 @@
 }
 
 // progMachine returns a new machine running the prog p.
-func progMachine(p, op *syntax.Prog) *machine {
+func progMachine(p *syntax.Prog, op *onePassProg) *machine {
 	m := &machine{p: p, op: op}
 	n := len(m.p.Inst)
 	m.q0 = queue{make([]uint32, n), make([]entry, 0, n)}
@@ -382,7 +382,7 @@
 			}
 		// peek at the input rune to see which branch of the Alt to take
 		case syntax.InstAlt, syntax.InstAltMatch:
-			pc = int(inst.OnePassNext(r))
+			pc = int(onePassNext(&inst, r))
 			continue
 		case syntax.InstFail:
 			return m.matched
@@ -429,7 +429,7 @@
 	} else {
 		i = m.newInputString(s)
 	}
-	if m.op != syntax.NotOnePass {
+	if m.op != notOnePass {
 		if !m.onepass(i, pos) {
 			re.put(m)
 			return nil

diff --git a/src/pkg/regexp/onepass.go b/src/pkg/regexp/onepass.go
new file mode 100644
index 0000000..501fb28
--- /dev/null
+++ b/src/pkg/regexp/onepass.go

@@ -0,0 +1,582 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+
+package regexp
+
+import (
+	"bytes"
+	"regexp/syntax"
+	"sort"
+	"unicode"
+)
+
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// "One-pass" regexp execution.
+// Some regexps can be analyzed to determine that they never need
+// backtracking: they are guaranteed to run in one pass over the string
+// without bothering to save all the usual NFA state.
+// Detect those and execute them more quickly.
+
+// A onePassProg is a compiled one-pass regular expression program.
+// It is the same as syntax.Prog except for the use of onePassInst.
+type onePassProg struct {
+	Inst   []onePassInst
+	Start  int // index of start instruction
+	NumCap int // number of InstCapture insts in re
+}
+
+// A onePassInst is a single instruction in a one-pass regular expression program.
+// It is the same as syntax.Inst except for the new 'Next' field.
+type onePassInst struct {
+	syntax.Inst
+	Next []uint32
+}
+
+// OnePassPrefix returns a literal string that all matches for the
+// regexp must start with.  Complete is true if the prefix
+// is the entire match. Pc is the index of the last rune instruction
+// in the string. The OnePassPrefix skips over the mandatory
+// EmptyBeginText
+func onePassPrefix(p *syntax.Prog) (prefix string, complete bool, pc uint32) {
+	i := &p.Inst[p.Start]
+	if i.Op != syntax.InstEmptyWidth || (syntax.EmptyOp(i.Arg))&syntax.EmptyBeginText == 0 {
+		return "", i.Op == syntax.InstMatch, uint32(p.Start)
+	}
+	pc = i.Out
+	i = &p.Inst[pc]
+	for i.Op == syntax.InstNop {
+		pc = i.Out
+		i = &p.Inst[pc]
+	}
+	// Avoid allocation of buffer if prefix is empty.
+	if iop(i) != syntax.InstRune || len(i.Rune) != 1 {
+		return "", i.Op == syntax.InstMatch, uint32(p.Start)
+	}
+
+	// Have prefix; gather characters.
+	var buf bytes.Buffer
+	for iop(i) == syntax.InstRune && len(i.Rune) == 1 && syntax.Flags(i.Arg)&syntax.FoldCase == 0 {
+		buf.WriteRune(i.Rune[0])
+		pc, i = i.Out, &p.Inst[i.Out]
+	}
+	return buf.String(), i.Op == syntax.InstEmptyWidth && (syntax.EmptyOp(i.Arg))&syntax.EmptyBeginText != 0, pc
+}
+
+// OnePassNext selects the next actionable state of the prog, based on the input character.
+// It should only be called when i.Op == InstAlt or InstAltMatch, and from the one-pass machine.
+// One of the alternates may ultimately lead without input to end of line. If the instruction
+// is InstAltMatch the path to the InstMatch is in i.Out, the normal node in i.Next.
+func onePassNext(i *onePassInst, r rune) uint32 {
+	next := i.MatchRunePos(r)
+	if next >= 0 {
+		return i.Next[next]
+	}
+	if i.Op == syntax.InstAltMatch {
+		return i.Out
+	}
+	return 0
+}
+
+func iop(i *syntax.Inst) syntax.InstOp {
+	op := i.Op
+	switch op {
+	case syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
+		op = syntax.InstRune
+	}
+	return op
+}
+
+// Sparse Array implementation is used as a queueOnePass.
+type queueOnePass struct {
+	sparse          []uint32
+	dense           []uint32
+	size, nextIndex uint32
+}
+
+func (q *queueOnePass) empty() bool {
+	return q.nextIndex >= q.size
+}
+
+func (q *queueOnePass) next() (n uint32) {
+	n = q.dense[q.nextIndex]
+	q.nextIndex++
+	return
+}
+
+func (q *queueOnePass) clear() {
+	q.size = 0
+	q.nextIndex = 0
+}
+
+func (q *queueOnePass) reset() {
+	q.nextIndex = 0
+}
+
+func (q *queueOnePass) contains(u uint32) bool {
+	if u >= uint32(len(q.sparse)) {
+		return false
+	}
+	return q.sparse[u] < q.size && q.dense[q.sparse[u]] == u
+}
+
+func (q *queueOnePass) insert(u uint32) {
+	if !q.contains(u) {
+		q.insertNew(u)
+	}
+}
+
+func (q *queueOnePass) insertNew(u uint32) {
+	if u >= uint32(len(q.sparse)) {
+		return
+	}
+	q.sparse[u] = q.size
+	q.dense[q.size] = u
+	q.size++
+}
+
+func newQueue(size int) (q *queueOnePass) {
+	return &queueOnePass{
+		sparse: make([]uint32, size),
+		dense:  make([]uint32, size),
+	}
+}
+
+// mergeRuneSets merges two non-intersecting runesets, and returns the merged result,
+// and a NextIp array. The idea is that if a rune matches the OnePassRunes at index
+// i, NextIp[i/2] is the target. If the input sets intersect, an empty runeset and a
+// NextIp array with the single element mergeFailed is returned.
+// The code assumes that both inputs contain ordered and non-intersecting rune pairs.
+const mergeFailed = uint32(0xffffffff)
+
+var (
+	noRune = []rune{}
+	noNext = []uint32{mergeFailed}
+)
+
+func mergeRuneSets(leftRunes, rightRunes *[]rune, leftPC, rightPC uint32) ([]rune, []uint32) {
+	leftLen := len(*leftRunes)
+	rightLen := len(*rightRunes)
+	if leftLen&0x1 != 0 || rightLen&0x1 != 0 {
+		panic("mergeRuneSets odd length []rune")
+	}
+	var (
+		lx, rx int
+	)
+	merged := make([]rune, 0)
+	next := make([]uint32, 0)
+	ok := true
+	defer func() {
+		if !ok {
+			merged = nil
+			next = nil
+		}
+	}()
+
+	ix := -1
+	extend := func(newLow *int, newArray *[]rune, pc uint32) bool {
+		if ix > 0 && (*newArray)[*newLow] <= merged[ix] {
+			return false
+		}
+		merged = append(merged, (*newArray)[*newLow], (*newArray)[*newLow+1])
+		*newLow += 2
+		ix += 2
+		next = append(next, pc)
+		return true
+	}
+
+	for lx < leftLen || rx < rightLen {
+		switch {
+		case rx >= rightLen:
+			ok = extend(&lx, leftRunes, leftPC)
+		case lx >= leftLen:
+			ok = extend(&rx, rightRunes, rightPC)
+		case (*rightRunes)[rx] < (*leftRunes)[lx]:
+			ok = extend(&rx, rightRunes, rightPC)
+		default:
+			ok = extend(&lx, leftRunes, leftPC)
+		}
+		if !ok {
+			return noRune, noNext
+		}
+	}
+	return merged, next
+}
+
+// cleanupOnePass drops working memory, and restores certain shortcut instructions.
+func cleanupOnePass(prog *onePassProg, original *syntax.Prog) {
+	for ix, instOriginal := range original.Inst {
+		switch instOriginal.Op {
+		case syntax.InstAlt, syntax.InstAltMatch, syntax.InstRune:
+		case syntax.InstCapture, syntax.InstEmptyWidth, syntax.InstNop, syntax.InstMatch, syntax.InstFail:
+			prog.Inst[ix].Next = nil
+		case syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
+			prog.Inst[ix].Next = nil
+			prog.Inst[ix] = onePassInst{Inst: instOriginal}
+		}
+	}
+}
+
+// onePassCopy creates a copy of the original Prog, as we'll be modifying it
+func onePassCopy(prog *syntax.Prog) *onePassProg {
+	p := &onePassProg{
+		Start:  prog.Start,
+		NumCap: prog.NumCap,
+	}
+	for _, inst := range prog.Inst {
+		p.Inst = append(p.Inst, onePassInst{Inst: inst})
+	}
+
+	// rewrites one or more common Prog constructs that enable some otherwise
+	// non-onepass Progs to be onepass. A:BD (for example) means an InstAlt at
+	// ip A, that points to ips B & C.
+	// A:BC + B:DA => A:BC + B:CD
+	// A:BC + B:DC => A:DC + B:DC
+	for pc := range p.Inst {
+		switch p.Inst[pc].Op {
+		default:
+			continue
+		case syntax.InstAlt, syntax.InstAltMatch:
+			// A:Bx + B:Ay
+			p_A_Other := &p.Inst[pc].Out
+			p_A_Alt := &p.Inst[pc].Arg
+			// make sure a target is another Alt
+			instAlt := p.Inst[*p_A_Alt]
+			if !(instAlt.Op == syntax.InstAlt || instAlt.Op == syntax.InstAltMatch) {
+				p_A_Alt, p_A_Other = p_A_Other, p_A_Alt
+				instAlt = p.Inst[*p_A_Alt]
+				if !(instAlt.Op == syntax.InstAlt || instAlt.Op == syntax.InstAltMatch) {
+					continue
+				}
+			}
+			instOther := p.Inst[*p_A_Other]
+			// Analyzing both legs pointing to Alts is for another day
+			if instOther.Op == syntax.InstAlt || instOther.Op == syntax.InstAltMatch {
+				// too complicated
+				continue
+			}
+			// simple empty transition loop
+			// A:BC + B:DA => A:BC + B:DC
+			p_B_Alt := &p.Inst[*p_A_Alt].Out
+			p_B_Other := &p.Inst[*p_A_Alt].Arg
+			patch := false
+			if instAlt.Out == uint32(pc) {
+				patch = true
+			} else if instAlt.Arg == uint32(pc) {
+				patch = true
+				p_B_Alt, p_B_Other = p_B_Other, p_B_Alt
+			}
+			if patch {
+				*p_B_Alt = *p_A_Other
+			}
+
+			// empty transition to common target
+			// A:BC + B:DC => A:DC + B:DC
+			if *p_A_Other == *p_B_Alt {
+				*p_A_Alt = *p_B_Other
+			}
+		}
+	}
+	return p
+}
+
+// runeSlice exists to permit sorting the case-folded rune sets.
+type runeSlice []rune
+
+func (p runeSlice) Len() int           { return len(p) }
+func (p runeSlice) Less(i, j int) bool { return p[i] < p[j] }
+func (p runeSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
+
+// Sort is a convenience method.
+func (p runeSlice) Sort() {
+	sort.Sort(p)
+}
+
+var anyRuneNotNL = []rune{0, '\n' - 1, '\n' + 1, unicode.MaxRune}
+var anyRune = []rune{0, unicode.MaxRune}
+
+// makeOnePass creates a onepass Prog, if possible. It is possible if at any alt,
+// the match engine can always tell which branch to take. The routine may modify
+// p if it is turned into a onepass Prog. If it isn't possible for this to be a
+// onepass Prog, the Prog notOnePass is returned. makeOnePass is recursive
+// to the size of the Prog.
+func makeOnePass(p *onePassProg) *onePassProg {
+	// If the machine is very long, it's not worth the time to check if we can use one pass.
+	if len(p.Inst) >= 1000 {
+		return notOnePass
+	}
+
+	var (
+		instQueue    = newQueue(len(p.Inst))
+		visitQueue   = newQueue(len(p.Inst))
+		build        func(uint32, *queueOnePass)
+		check        func(uint32, map[uint32]bool) bool
+		onePassRunes = make([][]rune, len(p.Inst))
+	)
+	build = func(pc uint32, q *queueOnePass) {
+		if q.contains(pc) {
+			return
+		}
+		inst := p.Inst[pc]
+		switch inst.Op {
+		case syntax.InstAlt, syntax.InstAltMatch:
+			q.insert(inst.Out)
+			build(inst.Out, q)
+			q.insert(inst.Arg)
+		case syntax.InstMatch, syntax.InstFail:
+		default:
+			q.insert(inst.Out)
+		}
+	}
+
+	// check that paths from Alt instructions are unambiguous, and rebuild the new
+	// program as a onepass program
+	check = func(pc uint32, m map[uint32]bool) (ok bool) {
+		ok = true
+		inst := &p.Inst[pc]
+		if visitQueue.contains(pc) {
+			return
+		}
+		visitQueue.insert(pc)
+		switch inst.Op {
+		case syntax.InstAlt, syntax.InstAltMatch:
+			ok = check(inst.Out, m) && check(inst.Arg, m)
+			// check no-input paths to InstMatch
+			matchOut := m[inst.Out]
+			matchArg := m[inst.Arg]
+			if matchOut && matchArg {
+				ok = false
+				break
+			}
+			// Match on empty goes in inst.Out
+			if matchArg {
+				inst.Out, inst.Arg = inst.Arg, inst.Out
+				matchOut, matchArg = matchArg, matchOut
+			}
+			if matchOut {
+				m[pc] = true
+				inst.Op = syntax.InstAltMatch
+			}
+
+			// build a dispatch operator from the two legs of the alt.
+			onePassRunes[pc], inst.Next = mergeRuneSets(
+				&onePassRunes[inst.Out], &onePassRunes[inst.Arg], inst.Out, inst.Arg)
+			if len(inst.Next) > 0 && inst.Next[0] == mergeFailed {
+				ok = false
+				break
+			}
+		case syntax.InstCapture, syntax.InstNop:
+			ok = check(inst.Out, m)
+			m[pc] = m[inst.Out]
+			// pass matching runes back through these no-ops.
+			onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...)
+			inst.Next = []uint32{}
+			for i := len(onePassRunes[pc]) / 2; i >= 0; i-- {
+				inst.Next = append(inst.Next, inst.Out)
+			}
+		case syntax.InstEmptyWidth:
+			ok = check(inst.Out, m)
+			m[pc] = m[inst.Out]
+			onePassRunes[pc] = append([]rune{}, onePassRunes[inst.Out]...)
+			inst.Next = []uint32{}
+			for i := len(onePassRunes[pc]) / 2; i >= 0; i-- {
+				inst.Next = append(inst.Next, inst.Out)
+			}
+		case syntax.InstMatch, syntax.InstFail:
+			m[pc] = inst.Op == syntax.InstMatch
+			break
+		case syntax.InstRune:
+			ok = check(inst.Out, m)
+			m[pc] = false
+			if len(inst.Next) > 0 {
+				break
+			}
+			if len(inst.Rune) == 0 {
+				onePassRunes[pc] = []rune{}
+				inst.Next = []uint32{inst.Out}
+				break
+			}
+			runes := make([]rune, 0)
+			if len(inst.Rune) == 1 && syntax.Flags(inst.Arg)&syntax.FoldCase != 0 {
+				r0 := inst.Rune[0]
+				runes = append(runes, r0, r0)
+				for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
+					runes = append(runes, r1, r1)
+				}
+				sort.Sort(runeSlice(runes))
+			} else {
+				runes = append(runes, inst.Rune...)
+			}
+			onePassRunes[pc] = runes
+			inst.Next = []uint32{}
+			for i := len(onePassRunes[pc]) / 2; i >= 0; i-- {
+				inst.Next = append(inst.Next, inst.Out)
+			}
+			inst.Op = syntax.InstRune
+		case syntax.InstRune1:
+			ok = check(inst.Out, m)
+			m[pc] = false
+			if len(inst.Next) > 0 {
+				break
+			}
+			runes := []rune{}
+			// expand case-folded runes
+			if syntax.Flags(inst.Arg)&syntax.FoldCase != 0 {
+				r0 := inst.Rune[0]
+				runes = append(runes, r0, r0)
+				for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
+					runes = append(runes, r1, r1)
+				}
+				sort.Sort(runeSlice(runes))
+			} else {
+				runes = append(runes, inst.Rune[0], inst.Rune[0])
+			}
+			onePassRunes[pc] = runes
+			inst.Next = []uint32{}
+			for i := len(onePassRunes[pc]) / 2; i >= 0; i-- {
+				inst.Next = append(inst.Next, inst.Out)
+			}
+			inst.Op = syntax.InstRune
+		case syntax.InstRuneAny:
+			ok = check(inst.Out, m)
+			m[pc] = false
+			if len(inst.Next) > 0 {
+				break
+			}
+			onePassRunes[pc] = append([]rune{}, anyRune...)
+			inst.Next = []uint32{inst.Out}
+		case syntax.InstRuneAnyNotNL:
+			ok = check(inst.Out, m)
+			m[pc] = false
+			if len(inst.Next) > 0 {
+				break
+			}
+			onePassRunes[pc] = append([]rune{}, anyRuneNotNL...)
+			inst.Next = []uint32{}
+			for i := len(onePassRunes[pc]) / 2; i >= 0; i-- {
+				inst.Next = append(inst.Next, inst.Out)
+			}
+		}
+		return
+	}
+
+	instQueue.clear()
+	instQueue.insert(uint32(p.Start))
+	m := make(map[uint32]bool, len(p.Inst))
+	for !instQueue.empty() {
+		pc := instQueue.next()
+		inst := p.Inst[pc]
+		visitQueue.clear()
+		if !check(uint32(pc), m) {
+			p = notOnePass
+			break
+		}
+		switch inst.Op {
+		case syntax.InstAlt, syntax.InstAltMatch:
+			instQueue.insert(inst.Out)
+			instQueue.insert(inst.Arg)
+		case syntax.InstCapture, syntax.InstEmptyWidth, syntax.InstNop:
+			instQueue.insert(inst.Out)
+		case syntax.InstMatch:
+		case syntax.InstFail:
+		case syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
+		default:
+		}
+	}
+	if p != notOnePass {
+		for i, _ := range p.Inst {
+			p.Inst[i].Rune = onePassRunes[i]
+		}
+	}
+	return p
+}
+
+// walk visits each Inst in the prog once, and applies the argument
+// function(ip, next), in pre-order.
+func walk(prog *syntax.Prog, funcs ...func(ip, next uint32)) {
+	var walk1 func(uint32)
+	progQueue := newQueue(len(prog.Inst))
+	walk1 = func(ip uint32) {
+		if progQueue.contains(ip) {
+			return
+		}
+		progQueue.insert(ip)
+		inst := prog.Inst[ip]
+		switch inst.Op {
+		case syntax.InstAlt, syntax.InstAltMatch:
+			for _, f := range funcs {
+				f(ip, inst.Out)
+				f(ip, inst.Arg)
+			}
+			walk1(inst.Out)
+			walk1(inst.Arg)
+		default:
+			for _, f := range funcs {
+				f(ip, inst.Out)
+			}
+			walk1(inst.Out)
+		}
+	}
+	walk1(uint32(prog.Start))
+}
+
+// find returns the Insts that match the argument predicate function
+func find(prog *syntax.Prog, f func(*syntax.Prog, int) bool) (matches []uint32) {
+	matches = []uint32{}
+
+	for ip := range prog.Inst {
+		if f(prog, ip) {
+			matches = append(matches, uint32(ip))
+		}
+	}
+	return
+}
+
+var notOnePass *onePassProg = nil
+
+// compileOnePass returns a new *syntax.Prog suitable for onePass execution if the original Prog
+// can be recharacterized as a one-pass regexp program, or syntax.notOnePass if the
+// Prog cannot be converted. For a one pass prog, the fundamental condition that must
+// be true is: at any InstAlt, there must be no ambiguity about what branch to  take.
+func compileOnePass(prog *syntax.Prog) (p *onePassProg) {
+	if prog.Start == 0 {
+		return notOnePass
+	}
+	// onepass regexp is anchored
+	if prog.Inst[prog.Start].Op != syntax.InstEmptyWidth ||
+		syntax.EmptyOp(prog.Inst[prog.Start].Arg)&syntax.EmptyBeginText != syntax.EmptyBeginText {
+		return notOnePass
+	}
+	// every instruction leading to InstMatch must be EmptyEndText
+	for _, inst := range prog.Inst {
+		opOut := prog.Inst[inst.Out].Op
+		switch inst.Op {
+		default:
+			if opOut == syntax.InstMatch {
+				return notOnePass
+			}
+		case syntax.InstAlt, syntax.InstAltMatch:
+			if opOut == syntax.InstMatch || prog.Inst[inst.Arg].Op == syntax.InstMatch {
+				return notOnePass
+			}
+		case syntax.InstEmptyWidth:
+			if opOut == syntax.InstMatch {
+				if syntax.EmptyOp(inst.Arg)&syntax.EmptyEndText == syntax.EmptyEndText {
+					continue
+				}
+				return notOnePass
+			}
+		}
+	}
+	// Creates a slightly optimized copy of the original Prog
+	// that cleans up some Prog idioms that block valid onepass programs
+	p = onePassCopy(prog)
+
+	// checkAmbiguity on InstAlts, build onepass Prog if possible
+	p = makeOnePass(p)
+
+	if p != notOnePass {
+		cleanupOnePass(p, prog)
+	}
+	return p
+}

diff --git a/src/pkg/regexp/onepass_test.go b/src/pkg/regexp/onepass_test.go
new file mode 100644
index 0000000..7b2beea
--- /dev/null
+++ b/src/pkg/regexp/onepass_test.go

@@ -0,0 +1,208 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package regexp
+
+import (
+	"reflect"
+	"regexp/syntax"
+	"testing"
+)
+
+var runeMergeTests = []struct {
+	left, right, merged []rune
+	next                []uint32
+	leftPC, rightPC     uint32
+}{
+	{
+		// empty rhs
+		[]rune{69, 69},
+		[]rune{},
+		[]rune{69, 69},
+		[]uint32{1},
+		1, 2,
+	},
+	{
+		// identical runes, identical targets
+		[]rune{69, 69},
+		[]rune{69, 69},
+		[]rune{},
+		[]uint32{mergeFailed},
+		1, 1,
+	},
+	{
+		// identical runes, different targets
+		[]rune{69, 69},
+		[]rune{69, 69},
+		[]rune{},
+		[]uint32{mergeFailed},
+		1, 2,
+	},
+	{
+		// append right-first
+		[]rune{69, 69},
+		[]rune{71, 71},
+		[]rune{69, 69, 71, 71},
+		[]uint32{1, 2},
+		1, 2,
+	},
+	{
+		// append, left-first
+		[]rune{71, 71},
+		[]rune{69, 69},
+		[]rune{69, 69, 71, 71},
+		[]uint32{2, 1},
+		1, 2,
+	},
+	{
+		// successful interleave
+		[]rune{60, 60, 71, 71, 101, 101},
+		[]rune{69, 69, 88, 88},
+		[]rune{60, 60, 69, 69, 71, 71, 88, 88, 101, 101},
+		[]uint32{1, 2, 1, 2, 1},
+		1, 2,
+	},
+	{
+		// left surrounds right
+		[]rune{69, 74},
+		[]rune{71, 71},
+		[]rune{},
+		[]uint32{mergeFailed},
+		1, 2,
+	},
+	{
+		// right surrounds left
+		[]rune{69, 74},
+		[]rune{68, 75},
+		[]rune{},
+		[]uint32{mergeFailed},
+		1, 2,
+	},
+	{
+		// overlap at interval begin
+		[]rune{69, 74},
+		[]rune{74, 75},
+		[]rune{},
+		[]uint32{mergeFailed},
+		1, 2,
+	},
+	{
+		// overlap ar interval end
+		[]rune{69, 74},
+		[]rune{65, 69},
+		[]rune{},
+		[]uint32{mergeFailed},
+		1, 2,
+	},
+	{
+		// overlap from above
+		[]rune{69, 74},
+		[]rune{71, 74},
+		[]rune{},
+		[]uint32{mergeFailed},
+		1, 2,
+	},
+	{
+		// overlap from below
+		[]rune{69, 74},
+		[]rune{65, 71},
+		[]rune{},
+		[]uint32{mergeFailed},
+		1, 2,
+	},
+	{
+		// out of order []rune
+		[]rune{69, 74, 60, 65},
+		[]rune{66, 67},
+		[]rune{},
+		[]uint32{mergeFailed},
+		1, 2,
+	},
+}
+
+func TestMergeRuneSet(t *testing.T) {
+	for ix, test := range runeMergeTests {
+		merged, next := mergeRuneSets(&test.left, &test.right, test.leftPC, test.rightPC)
+		if !reflect.DeepEqual(merged, test.merged) {
+			t.Errorf("mergeRuneSet :%d (%v, %v) merged\n have\n%v\nwant\n%v", ix, test.left, test.right, merged, test.merged)
+		}
+		if !reflect.DeepEqual(next, test.next) {
+			t.Errorf("mergeRuneSet :%d(%v, %v) next\n have\n%v\nwant\n%v", ix, test.left, test.right, next, test.next)
+		}
+	}
+}
+
+const noStr = `!`
+
+var onePass = &onePassProg{}
+
+var onePassTests = []struct {
+	re      string
+	onePass *onePassProg
+	prog    string
+}{
+	{`^(?:a|(?:a*))$`, notOnePass, noStr},
+	{`^(?:(a)|(?:a*))$`, notOnePass, noStr},
+	{`^(?:(?:(?:.(?:$))?))$`, onePass, `a`},
+	{`^abcd$`, onePass, `abcd`},
+	{`^abcd$`, onePass, `abcde`},
+	{`^(?:(?:a{0,})*?)$`, onePass, `a`},
+	{`^(?:(?:a+)*)$`, onePass, ``},
+	{`^(?:(?:a|(?:aa)))$`, onePass, ``},
+	{`^(?:[^\s\S])$`, onePass, ``},
+	{`^(?:(?:a{3,4}){0,})$`, notOnePass, `aaaaaa`},
+	{`^(?:(?:a+)*)$`, onePass, `a`},
+	{`^(?:(?:(?:a*)+))$`, onePass, noStr},
+	{`^(?:(?:a+)*)$`, onePass, ``},
+	{`^[a-c]+$`, onePass, `abc`},
+	{`^[a-c]*$`, onePass, `abcdabc`},
+	{`^(?:a*)$`, onePass, `aaaaaaa`},
+	{`^(?:(?:aa)|a)$`, onePass, `a`},
+	{`^[a-c]*`, notOnePass, `abcdabc`},
+	{`^[a-c]*$`, onePass, `abc`},
+	{`^...$`, onePass, ``},
+	{`^(?:a|(?:aa))$`, onePass, `a`},
+	{`^[a-c]*`, notOnePass, `abcabc`},
+	{`^a((b))c$`, onePass, noStr},
+	{`^a.[l-nA-Cg-j]?e$`, onePass, noStr},
+	{`^a((b))$`, onePass, noStr},
+	{`^a(?:(b)|(c))c$`, onePass, noStr},
+	{`^a(?:(b*)|(c))c$`, notOnePass, noStr},
+	{`^a(?:b|c)$`, onePass, noStr},
+	{`^a(?:b?|c)$`, onePass, noStr},
+	{`^a(?:b?|c?)$`, notOnePass, noStr},
+	{`^a(?:b?|c+)$`, onePass, noStr},
+	{`^a(?:b+|(bc))d$`, notOnePass, noStr},
+	{`^a(?:bc)+$`, onePass, noStr},
+	{`^a(?:[bcd])+$`, onePass, noStr},
+	{`^a((?:[bcd])+)$`, onePass, noStr},
+	{`^a(:?b|c)*d$`, onePass, `abbbccbbcbbd"`},
+	{`^.bc(d|e)*$`, onePass, `abcddddddeeeededd`},
+	{`^(?:(?:aa)|.)$`, notOnePass, `a`},
+	{`^(?:(?:a{1,2}){1,2})$`, notOnePass, `aaaa`},
+}
+
+func TestCompileOnePass(t *testing.T) {
+	var (
+		p   *syntax.Prog
+		re  *syntax.Regexp
+		err error
+	)
+	for _, test := range onePassTests {
+		if re, err = syntax.Parse(test.re, syntax.Perl); err != nil {
+			t.Errorf("Parse(%q) got err:%s, want success", test.re, err)
+			continue
+		}
+		// needs to be done before compile...
+		re = re.Simplify()
+		if p, err = syntax.Compile(re); err != nil {
+			t.Errorf("Compile(%q) got err:%s, want success", test.re, err)
+			continue
+		}
+		onePass = compileOnePass(p)
+		if (onePass == notOnePass) != (test.onePass == notOnePass) {
+			t.Errorf("CompileOnePass(%q) got %v, expected %v", test.re, onePass, test.onePass)
+		}
+	}
+}

diff --git a/src/pkg/regexp/regexp.go b/src/pkg/regexp/regexp.go
index cca21e7..0b8336a 100644
--- a/src/pkg/regexp/regexp.go
+++ b/src/pkg/regexp/regexp.go

@@ -83,7 +83,7 @@
 	// read-only after Compile
 	expr           string         // as passed to Compile
 	prog           *syntax.Prog   // compiled program
-	onepass        *syntax.Prog   // onpass program or nil
+	onepass        *onePassProg   // onpass program or nil
 	prefix         string         // required prefix in unanchored matches
 	prefixBytes    []byte         // prefix, as a []byte
 	prefixComplete bool           // prefix is the entire regexp
@@ -165,16 +165,16 @@
 	regexp := &Regexp{
 		expr:        expr,
 		prog:        prog,
-		onepass:     prog.CompileOnePass(),
+		onepass:     compileOnePass(prog),
 		numSubexp:   maxCap,
 		subexpNames: capNames,
 		cond:        prog.StartCond(),
 		longest:     longest,
 	}
-	if regexp.onepass == syntax.NotOnePass {
+	if regexp.onepass == notOnePass {
 		regexp.prefix, regexp.prefixComplete = prog.Prefix()
 	} else {
-		regexp.prefix, regexp.prefixComplete, regexp.prefixEnd = prog.OnePassPrefix()
+		regexp.prefix, regexp.prefixComplete, regexp.prefixEnd = onePassPrefix(prog)
 	}
 	if regexp.prefix != "" {
 		// TODO(rsc): Remove this allocation by adding

diff --git a/src/pkg/regexp/syntax/prog.go b/src/pkg/regexp/syntax/prog.go
index c1d6a12..29bd282 100644
--- a/src/pkg/regexp/syntax/prog.go
+++ b/src/pkg/regexp/syntax/prog.go

@@ -6,7 +6,6 @@
 
 import (
 	"bytes"
-	"sort"
 	"strconv"
 	"unicode"
 )
@@ -115,7 +114,6 @@
 	Out  uint32 // all but InstMatch, InstFail
 	Arg  uint32 // InstAlt, InstAltMatch, InstCapture, InstEmptyWidth
 	Rune []rune
-	Next []uint32 // If input rune matches
 }
 
 func (p *Prog) String() string {
@@ -165,36 +163,6 @@
 	return buf.String(), i.Op == InstMatch
 }
 
-// OnePassPrefix returns a literal string that all matches for the
-// regexp must start with.  Complete is true if the prefix
-// is the entire match. Pc is the index of the last rune instruction
-// in the string. The OnePassPrefix skips over the mandatory
-// EmptyBeginText
-func (p *Prog) OnePassPrefix() (prefix string, complete bool, pc uint32) {
-	i := &p.Inst[p.Start]
-	if i.Op != InstEmptyWidth || (EmptyOp(i.Arg))&EmptyBeginText == 0 {
-		return "", i.Op == InstMatch, uint32(p.Start)
-	}
-	pc = i.Out
-	i = &p.Inst[pc]
-	for i.Op == InstNop {
-		pc = i.Out
-		i = &p.Inst[pc]
-	}
-	// Avoid allocation of buffer if prefix is empty.
-	if i.op() != InstRune || len(i.Rune) != 1 {
-		return "", i.Op == InstMatch, uint32(p.Start)
-	}
-
-	// Have prefix; gather characters.
-	var buf bytes.Buffer
-	for i.op() == InstRune && len(i.Rune) == 1 && Flags(i.Arg)&FoldCase == 0 {
-		buf.WriteRune(i.Rune[0])
-		pc, i = i.Out, &p.Inst[i.Out]
-	}
-	return buf.String(), i.Op == InstEmptyWidth && (EmptyOp(i.Arg))&EmptyBeginText != 0, pc
-}
-
 // StartCond returns the leading empty-width conditions that must
 // be true in any match.  It returns ^EmptyOp(0) if no matches are possible.
 func (p *Prog) StartCond() EmptyOp {
@@ -221,29 +189,17 @@
 
 const noMatch = -1
 
-// OnePassNext selects the next actionable state of the prog, based on the input character.
-// It should only be called when i.Op == InstAlt or InstAltMatch, and from the one-pass machine.
-// One of the alternates may ultimately lead without input to end of line. If the instruction
-// is InstAltMatch the path to the InstMatch is in i.Out, the normal node in i.Next.
-func (i *Inst) OnePassNext(r rune) uint32 {
-	next := i.MatchRunePos(r)
-	if next != noMatch {
-		return i.Next[next]
-	}
-	if i.Op == InstAltMatch {
-		return i.Out
-	}
-	return 0
-}
-
 // MatchRune returns true if the instruction matches (and consumes) r.
 // It should only be called when i.Op == InstRune.
 func (i *Inst) MatchRune(r rune) bool {
 	return i.MatchRunePos(r) != noMatch
 }
 
-// MatchRunePos returns the index of the rune pair if the instruction matches.
-// It should only be called when i.Op == InstRune.
+// MatchRunePos checks whether the instruction matches (and consumes) r.
+// If so, MatchRunePos returns the index of the matching rune pair
+// (or, when len(i.Rune) == 1, rune singleton).
+// If not, MatchRunePos returns -1.
+// MatchRunePos should only be called when i.Op == InstRune.
 func (i *Inst) MatchRunePos(r rune) int {
 	rune := i.Rune
 
@@ -387,496 +343,3 @@
 		bw(b, "anynotnl -> ", u32(i.Out))
 	}
 }
-
-// Sparse Array implementation is used as a queue.
-type queue struct {
-	sparse          []uint32
-	dense           []uint32
-	size, nextIndex uint32
-}
-
-func (q *queue) empty() bool {
-	return q.nextIndex >= q.size
-}
-
-func (q *queue) next() (n uint32) {
-	n = q.dense[q.nextIndex]
-	q.nextIndex++
-	return
-}
-
-func (q *queue) clear() {
-	q.size = 0
-	q.nextIndex = 0
-}
-
-func (q *queue) reset() {
-	q.nextIndex = 0
-}
-
-func (q *queue) contains(u uint32) bool {
-	if u >= uint32(len(q.sparse)) {
-		return false
-	}
-	return q.sparse[u] < q.size && q.dense[q.sparse[u]] == u
-}
-
-func (q *queue) insert(u uint32) {
-	if !q.contains(u) {
-		q.insertNew(u)
-	}
-}
-
-func (q *queue) insertNew(u uint32) {
-	if u >= uint32(len(q.sparse)) {
-		return
-	}
-	q.sparse[u] = q.size
-	q.dense[q.size] = u
-	q.size++
-}
-
-func newQueue(size int) (q *queue) {
-	return &queue{
-		sparse: make([]uint32, size),
-		dense:  make([]uint32, size),
-	}
-}
-
-// mergeRuneSets merges two non-intersecting runesets, and returns the merged result,
-// and a NextIp array. The idea is that if a rune matches the OnePassRunes at index
-// i, NextIp[i/2] is the target. If the input sets intersect, an empty runeset and a
-// NextIp array with the single element mergeFailed is returned.
-// The code assumes that both inputs contain ordered and non-intersecting rune pairs.
-const mergeFailed = uint32(0xffffffff)
-
-var (
-	noRune = []rune{}
-	noNext = []uint32{mergeFailed}
-)
-
-func mergeRuneSets(leftRunes, rightRunes *[]rune, leftPC, rightPC uint32) ([]rune, []uint32) {
-	leftLen := len(*leftRunes)
-	rightLen := len(*rightRunes)
-	if leftLen&0x1 != 0 || rightLen&0x1 != 0 {
-		panic("mergeRuneSets odd length []rune")
-	}
-	var (
-		lx, rx int
-	)
-	merged := make([]rune, 0)
-	next := make([]uint32, 0)
-	ok := true
-	defer func() {
-		if !ok {
-			merged = nil
-			next = nil
-		}
-	}()
-
-	ix := -1
-	extend := func(newLow *int, newArray *[]rune, pc uint32) bool {
-		if ix > 0 && (*newArray)[*newLow] <= merged[ix] {
-			return false
-		}
-		merged = append(merged, (*newArray)[*newLow], (*newArray)[*newLow+1])
-		*newLow += 2
-		ix += 2
-		next = append(next, pc)
-		return true
-	}
-
-	for lx < leftLen || rx < rightLen {
-		switch {
-		case rx >= rightLen:
-			ok = extend(&lx, leftRunes, leftPC)
-		case lx >= leftLen:
-			ok = extend(&rx, rightRunes, rightPC)
-		case (*rightRunes)[rx] < (*leftRunes)[lx]:
-			ok = extend(&rx, rightRunes, rightPC)
-		default:
-			ok = extend(&lx, leftRunes, leftPC)
-		}
-		if !ok {
-			return noRune, noNext
-		}
-	}
-	return merged, next
-}
-
-// cleanupOnePass drops working memory, and restores certain shortcut instructions.
-func (prog *Prog) cleanupOnePass(pOriginal *Prog) {
-	for ix, instOriginal := range pOriginal.Inst {
-		switch instOriginal.Op {
-		case InstAlt, InstAltMatch, InstRune:
-		case InstCapture, InstEmptyWidth, InstNop, InstMatch, InstFail:
-			prog.Inst[ix].Next = nil
-		case InstRune1, InstRuneAny, InstRuneAnyNotNL:
-			prog.Inst[ix].Next = nil
-			prog.Inst[ix] = instOriginal
-		}
-	}
-}
-
-// onePassCopy creates a copy of the original Prog, as we'll be modifying it
-func (prog *Prog) onePassCopy() *Prog {
-	p := &Prog{
-		Inst:   append([]Inst{}[:], prog.Inst...),
-		Start:  prog.Start,
-		NumCap: prog.NumCap,
-	}
-	for _, inst := range p.Inst {
-		inst.Next = make([]uint32, 0)
-	}
-
-	// rewrites one or more common Prog constructs that enable some otherwise
-	// non-onepass Progs to be onepass. A:BD (for example) means an InstAlt at
-	// ip A, that points to ips B & C.
-	// A:BC + B:DA => A:BC + B:CD
-	// A:BC + B:DC => A:DC + B:DC
-	for pc := range p.Inst {
-		switch p.Inst[pc].Op {
-		default:
-			continue
-		case InstAlt, InstAltMatch:
-			// A:Bx + B:Ay
-			p_A_Other := &p.Inst[pc].Out
-			p_A_Alt := &p.Inst[pc].Arg
-			// make sure a target is another Alt
-			instAlt := p.Inst[*p_A_Alt]
-			if !(instAlt.Op == InstAlt || instAlt.Op == InstAltMatch) {
-				p_A_Alt, p_A_Other = p_A_Other, p_A_Alt
-				instAlt = p.Inst[*p_A_Alt]
-				if !(instAlt.Op == InstAlt || instAlt.Op == InstAltMatch) {
-					continue
-				}
-			}
-			instOther := p.Inst[*p_A_Other]
-			// Analyzing both legs pointing to Alts is for another day
-			if instOther.Op == InstAlt || instOther.Op == InstAltMatch {
-				// too complicated
-				continue
-			}
-			// simple empty transition loop
-			// A:BC + B:DA => A:BC + B:DC
-			p_B_Alt := &p.Inst[*p_A_Alt].Out
-			p_B_Other := &p.Inst[*p_A_Alt].Arg
-			patch := false
-			if instAlt.Out == uint32(pc) {
-				patch = true
-			} else if instAlt.Arg == uint32(pc) {
-				patch = true
-				p_B_Alt, p_B_Other = p_B_Other, p_B_Alt
-			}
-			if patch {
-				*p_B_Alt = *p_A_Other
-			}
-
-			// empty transition to common target
-			// A:BC + B:DC => A:DC + B:DC
-			if *p_A_Other == *p_B_Alt {
-				*p_A_Alt = *p_B_Other
-			}
-		}
-	}
-	return p
-}
-
-// runeSlice exists to permit sorting the case-folded rune sets.
-type runeSlice []rune
-
-func (p runeSlice) Len() int           { return len(p) }
-func (p runeSlice) Less(i, j int) bool { return p[i] < p[j] }
-func (p runeSlice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
-
-// Sort is a convenience method.
-func (p runeSlice) Sort() {
-	sort.Sort(p)
-}
-
-// makeOnePass creates a onepass Prog, if possible. It is possible if at any alt,
-// the match engine can always tell which branch to take. The routine may modify
-// p if it is turned into a onepass Prog. If it isn't possible for this to be a
-// onepass Prog, the Prog syntax.NotOnePass is returned. makeOnePass is recursive
-// to the size of the Prog
-func (p *Prog) makeOnePass() *Prog {
-	// If the machine is very long, it's not worth the time to check if we can use one pass.
-	if len(p.Inst) >= 1000 {
-		return NotOnePass
-	}
-
-	var (
-		instQueue    = newQueue(len(p.Inst))
-		visitQueue   = newQueue(len(p.Inst))
-		build        func(uint32, *queue)
-		check        func(uint32, map[uint32]bool) bool
-		onePassRunes = make([][]rune, len(p.Inst))
-	)
-	build = func(pc uint32, q *queue) {
-		if q.contains(pc) {
-			return
-		}
-		inst := p.Inst[pc]
-		switch inst.Op {
-		case InstAlt, InstAltMatch:
-			q.insert(inst.Out)
-			build(inst.Out, q)
-			q.insert(inst.Arg)
-		case InstMatch, InstFail:
-		default:
-			q.insert(inst.Out)
-		}
-	}
-
-	// check that paths from Alt instructions are unambiguous, and rebuild the new
-	// program as a onepass program
-	check = func(pc uint32, m map[uint32]bool) (ok bool) {
-		ok = true
-		inst := &p.Inst[pc]
-		if visitQueue.contains(pc) {
-			return
-		}
-		visitQueue.insert(pc)
-		switch inst.Op {
-		case InstAlt, InstAltMatch:
-			ok = check(inst.Out, m) && check(inst.Arg, m)
-			// check no-input paths to InstMatch
-			matchOut := m[inst.Out]
-			matchArg := m[inst.Arg]
-			if matchOut && matchArg {
-				ok = false
-				break
-			}
-			// Match on empty goes in inst.Out
-			if matchArg {
-				inst.Out, inst.Arg = inst.Arg, inst.Out
-				matchOut, matchArg = matchArg, matchOut
-			}
-			if matchOut {
-				m[pc] = true
-				inst.Op = InstAltMatch
-			}
-
-			// build a dispatch operator from the two legs of the alt.
-			onePassRunes[pc], inst.Next = mergeRuneSets(
-				&onePassRunes[inst.Out], &onePassRunes[inst.Arg], inst.Out, inst.Arg)
-			if len(inst.Next) > 0 && inst.Next[0] == mergeFailed {
-				ok = false
-				break
-			}
-		case InstCapture, InstNop:
-			ok = check(inst.Out, m)
-			m[pc] = m[inst.Out]
-			// pass matching runes back through these no-ops.
-			onePassRunes[pc] = append([]rune{}[:], onePassRunes[inst.Out][:]...)
-			inst.Next = []uint32{}
-			for i := len(onePassRunes[pc]) / 2; i >= 0; i-- {
-				inst.Next = append(inst.Next, inst.Out)
-			}
-		case InstEmptyWidth:
-			ok = check(inst.Out, m)
-			m[pc] = m[inst.Out]
-			onePassRunes[pc] = append([]rune{}[:], onePassRunes[inst.Out][:]...)
-			inst.Next = []uint32{}
-			for i := len(onePassRunes[pc]) / 2; i >= 0; i-- {
-				inst.Next = append(inst.Next, inst.Out)
-			}
-		case InstMatch, InstFail:
-			m[pc] = inst.Op == InstMatch
-			break
-		case InstRune:
-			ok = check(inst.Out, m)
-			m[pc] = false
-			if len(inst.Next) > 0 {
-				break
-			}
-			if len(inst.Rune) == 0 {
-				onePassRunes[pc] = []rune{}[:]
-				inst.Next = []uint32{inst.Out}
-				break
-			}
-			runes := make([]rune, 0)
-			if len(inst.Rune) == 1 && Flags(inst.Arg)&FoldCase != 0 {
-				r0 := inst.Rune[0]
-				runes = append(runes, r0, r0)
-				for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
-					runes = append(runes, r1, r1)
-				}
-				sort.Sort(runeSlice(runes))
-			} else {
-				runes = append(runes, inst.Rune...)
-			}
-			onePassRunes[pc] = runes
-			inst.Next = []uint32{}
-			for i := len(onePassRunes[pc]) / 2; i >= 0; i-- {
-				inst.Next = append(inst.Next, inst.Out)
-			}
-			inst.Op = InstRune
-		case InstRune1:
-			ok = check(inst.Out, m)
-			m[pc] = false
-			if len(inst.Next) > 0 {
-				break
-			}
-			runes := []rune{}[:]
-			// expand case-folded runes
-			if Flags(inst.Arg)&FoldCase != 0 {
-				r0 := inst.Rune[0]
-				runes = append(runes, r0, r0)
-				for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) {
-					runes = append(runes, r1, r1)
-				}
-				sort.Sort(runeSlice(runes))
-			} else {
-				runes = append(runes, inst.Rune[0], inst.Rune[0])
-			}
-			onePassRunes[pc] = runes
-			inst.Next = []uint32{}
-			for i := len(onePassRunes[pc]) / 2; i >= 0; i-- {
-				inst.Next = append(inst.Next, inst.Out)
-			}
-			inst.Op = InstRune
-		case InstRuneAny:
-			ok = check(inst.Out, m)
-			m[pc] = false
-			if len(inst.Next) > 0 {
-				break
-			}
-			onePassRunes[pc] = append([]rune{}[:], anyRune[:]...)
-			inst.Next = []uint32{inst.Out}
-		case InstRuneAnyNotNL:
-			ok = check(inst.Out, m)
-			m[pc] = false
-			if len(inst.Next) > 0 {
-				break
-			}
-			onePassRunes[pc] = append([]rune{}[:], anyRuneNotNL[:]...)
-			inst.Next = []uint32{}
-			for i := len(onePassRunes[pc]) / 2; i >= 0; i-- {
-				inst.Next = append(inst.Next, inst.Out)
-			}
-		}
-		return
-	}
-
-	instQueue.clear()
-	instQueue.insert(uint32(p.Start))
-	m := make(map[uint32]bool, len(p.Inst))
-	for !instQueue.empty() {
-		pc := instQueue.next()
-		inst := p.Inst[pc]
-		visitQueue.clear()
-		if !check(uint32(pc), m) {
-			p = NotOnePass
-			break
-		}
-		switch inst.Op {
-		case InstAlt, InstAltMatch:
-			instQueue.insert(inst.Out)
-			instQueue.insert(inst.Arg)
-		case InstCapture, InstEmptyWidth, InstNop:
-			instQueue.insert(inst.Out)
-		case InstMatch:
-		case InstFail:
-		case InstRune, InstRune1, InstRuneAny, InstRuneAnyNotNL:
-		default:
-		}
-	}
-	if p != NotOnePass {
-		for i, _ := range p.Inst {
-			p.Inst[i].Rune = onePassRunes[i][:]
-		}
-	}
-	return p
-}
-
-// walk visits each Inst in the prog once, and applies the argument
-// function(ip, next), in pre-order.
-func (prog *Prog) walk(funcs ...func(ip, next uint32)) {
-	var walk1 func(uint32)
-	progQueue := newQueue(len(prog.Inst))
-	walk1 = func(ip uint32) {
-		if progQueue.contains(ip) {
-			return
-		}
-		progQueue.insert(ip)
-		inst := prog.Inst[ip]
-		switch inst.Op {
-		case InstAlt, InstAltMatch:
-			for _, f := range funcs {
-				f(ip, inst.Out)
-				f(ip, inst.Arg)
-			}
-			walk1(inst.Out)
-			walk1(inst.Arg)
-		default:
-			for _, f := range funcs {
-				f(ip, inst.Out)
-			}
-			walk1(inst.Out)
-		}
-	}
-	walk1(uint32(prog.Start))
-}
-
-// find returns the Insts that match the argument predicate function
-func (prog *Prog) find(f func(*Prog, int) bool) (matches []uint32) {
-	matches = []uint32{}
-
-	for ip := range prog.Inst {
-		if f(prog, ip) {
-			matches = append(matches, uint32(ip))
-		}
-	}
-	return
-}
-
-var NotOnePass *Prog = nil
-var debug = false
-
-// CompileOnePass returns a new *Prog suitable for onePass execution if the original Prog
-// can be recharacterized as a one-pass regexp program, or syntax.NotOnePass if the
-// Prog cannot be converted. For a one pass prog, the fundamental condition that must
-// be true is: at any InstAlt, there must be no ambiguity about what branch to  take.
-func (prog *Prog) CompileOnePass() (p *Prog) {
-	if prog.Start == 0 {
-		return NotOnePass
-	}
-	// onepass regexp is anchored
-	if prog.Inst[prog.Start].Op != InstEmptyWidth ||
-		EmptyOp(prog.Inst[prog.Start].Arg)&EmptyBeginText != EmptyBeginText {
-		return NotOnePass
-	}
-	// every instruction leading to InstMatch must be EmptyEndText
-	for _, inst := range prog.Inst {
-		opOut := prog.Inst[inst.Out].Op
-		switch inst.Op {
-		default:
-			if opOut == InstMatch {
-				return NotOnePass
-			}
-		case InstAlt, InstAltMatch:
-			if opOut == InstMatch || prog.Inst[inst.Arg].Op == InstMatch {
-				return NotOnePass
-			}
-		case InstEmptyWidth:
-			if opOut == InstMatch {
-				if EmptyOp(inst.Arg)&EmptyEndText == EmptyEndText {
-					continue
-				}
-				return NotOnePass
-			}
-		}
-	}
-	// Creates a slightly optimized copy of the original Prog
-	// that cleans up some Prog idioms that block valid onepass programs
-	p = prog.onePassCopy()
-
-	// checkAmbiguity on InstAlts, build onepass Prog if possible
-	p = p.makeOnePass()
-
-	if p != NotOnePass {
-		p.cleanupOnePass(prog)
-	}
-	return p
-}

diff --git a/src/pkg/regexp/syntax/prog_test.go b/src/pkg/regexp/syntax/prog_test.go
index 66beb74..50bfa3d 100644
--- a/src/pkg/regexp/syntax/prog_test.go
+++ b/src/pkg/regexp/syntax/prog_test.go

@@ -4,10 +4,7 @@
 
 package syntax
 
-import (
-	"reflect"
-	"testing"
-)
+import "testing"
 
 var compileTests = []struct {
 	Regexp string
@@ -115,200 +112,3 @@
 		EmptyOpContext(r1, -1)
 	}
 }
-
-var runeMergeTests = []struct {
-	left, right, merged []rune
-	next                []uint32
-	leftPC, rightPC     uint32
-}{
-	{
-		// empty rhs
-		[]rune{69, 69},
-		[]rune{},
-		[]rune{69, 69},
-		[]uint32{1},
-		1, 2,
-	},
-	{
-		// identical runes, identical targets
-		[]rune{69, 69},
-		[]rune{69, 69},
-		[]rune{},
-		[]uint32{mergeFailed},
-		1, 1,
-	},
-	{
-		// identical runes, different targets
-		[]rune{69, 69},
-		[]rune{69, 69},
-		[]rune{},
-		[]uint32{mergeFailed},
-		1, 2,
-	},
-	{
-		// append right-first
-		[]rune{69, 69},
-		[]rune{71, 71},
-		[]rune{69, 69, 71, 71},
-		[]uint32{1, 2},
-		1, 2,
-	},
-	{
-		// append, left-first
-		[]rune{71, 71},
-		[]rune{69, 69},
-		[]rune{69, 69, 71, 71},
-		[]uint32{2, 1},
-		1, 2,
-	},
-	{
-		// successful interleave
-		[]rune{60, 60, 71, 71, 101, 101},
-		[]rune{69, 69, 88, 88},
-		[]rune{60, 60, 69, 69, 71, 71, 88, 88, 101, 101},
-		[]uint32{1, 2, 1, 2, 1},
-		1, 2,
-	},
-	{
-		// left surrounds right
-		[]rune{69, 74},
-		[]rune{71, 71},
-		[]rune{},
-		[]uint32{mergeFailed},
-		1, 2,
-	},
-	{
-		// right surrounds left
-		[]rune{69, 74},
-		[]rune{68, 75},
-		[]rune{},
-		[]uint32{mergeFailed},
-		1, 2,
-	},
-	{
-		// overlap at interval begin
-		[]rune{69, 74},
-		[]rune{74, 75},
-		[]rune{},
-		[]uint32{mergeFailed},
-		1, 2,
-	},
-	{
-		// overlap ar interval end
-		[]rune{69, 74},
-		[]rune{65, 69},
-		[]rune{},
-		[]uint32{mergeFailed},
-		1, 2,
-	},
-	{
-		// overlap from above
-		[]rune{69, 74},
-		[]rune{71, 74},
-		[]rune{},
-		[]uint32{mergeFailed},
-		1, 2,
-	},
-	{
-		// overlap from below
-		[]rune{69, 74},
-		[]rune{65, 71},
-		[]rune{},
-		[]uint32{mergeFailed},
-		1, 2,
-	},
-	{
-		// out of order []rune
-		[]rune{69, 74, 60, 65},
-		[]rune{66, 67},
-		[]rune{},
-		[]uint32{mergeFailed},
-		1, 2,
-	},
-}
-
-func TestMergeRuneSet(t *testing.T) {
-	for ix, test := range runeMergeTests {
-		merged, next := mergeRuneSets(&test.left, &test.right, test.leftPC, test.rightPC)
-		if !reflect.DeepEqual(merged, test.merged) {
-			t.Errorf("mergeRuneSet :%d (%v, %v) merged\n have\n%v\nwant\n%v", ix, test.left, test.right, merged, test.merged)
-		}
-		if !reflect.DeepEqual(next, test.next) {
-			t.Errorf("mergeRuneSet :%d(%v, %v) next\n have\n%v\nwant\n%v", ix, test.left, test.right, next, test.next)
-		}
-	}
-}
-
-const noStr = `!`
-
-var onePass = &Prog{}
-
-var onePassTests = []struct {
-	re      string
-	onePass *Prog
-	prog    string
-}{
-	{`^(?:a|(?:a*))$`, NotOnePass, noStr},
-	{`^(?:(a)|(?:a*))$`, NotOnePass, noStr},
-	{`^(?:(?:(?:.(?:$))?))$`, onePass, `a`},
-	{`^abcd$`, onePass, `abcd`},
-	{`^abcd$`, onePass, `abcde`},
-	{`^(?:(?:a{0,})*?)$`, onePass, `a`},
-	{`^(?:(?:a+)*)$`, onePass, ``},
-	{`^(?:(?:a|(?:aa)))$`, onePass, ``},
-	{`^(?:[^\s\S])$`, onePass, ``},
-	{`^(?:(?:a{3,4}){0,})$`, NotOnePass, `aaaaaa`},
-	{`^(?:(?:a+)*)$`, onePass, `a`},
-	{`^(?:(?:(?:a*)+))$`, onePass, noStr},
-	{`^(?:(?:a+)*)$`, onePass, ``},
-	{`^[a-c]+$`, onePass, `abc`},
-	{`^[a-c]*$`, onePass, `abcdabc`},
-	{`^(?:a*)$`, onePass, `aaaaaaa`},
-	{`^(?:(?:aa)|a)$`, onePass, `a`},
-	{`^[a-c]*`, NotOnePass, `abcdabc`},
-	{`^[a-c]*$`, onePass, `abc`},
-	{`^...$`, onePass, ``},
-	{`^(?:a|(?:aa))$`, onePass, `a`},
-	{`^[a-c]*`, NotOnePass, `abcabc`},
-	{`^a((b))c$`, onePass, noStr},
-	{`^a.[l-nA-Cg-j]?e$`, onePass, noStr},
-	{`^a((b))$`, onePass, noStr},
-	{`^a(?:(b)|(c))c$`, onePass, noStr},
-	{`^a(?:(b*)|(c))c$`, NotOnePass, noStr},
-	{`^a(?:b|c)$`, onePass, noStr},
-	{`^a(?:b?|c)$`, onePass, noStr},
-	{`^a(?:b?|c?)$`, NotOnePass, noStr},
-	{`^a(?:b?|c+)$`, onePass, noStr},
-	{`^a(?:b+|(bc))d$`, NotOnePass, noStr},
-	{`^a(?:bc)+$`, onePass, noStr},
-	{`^a(?:[bcd])+$`, onePass, noStr},
-	{`^a((?:[bcd])+)$`, onePass, noStr},
-	{`^a(:?b|c)*d$`, onePass, `abbbccbbcbbd"`},
-	{`^.bc(d|e)*$`, onePass, `abcddddddeeeededd`},
-	{`^(?:(?:aa)|.)$`, NotOnePass, `a`},
-	{`^(?:(?:a{1,2}){1,2})$`, NotOnePass, `aaaa`},
-}
-
-func TestCompileOnePass(t *testing.T) {
-	var (
-		p   *Prog
-		re  *Regexp
-		err error
-	)
-	for _, test := range onePassTests {
-		if re, err = Parse(test.re, Perl); err != nil {
-			t.Errorf("Parse(%q) got err:%s, want success", test.re, err)
-			continue
-		}
-		// needs to be done before compile...
-		re = re.Simplify()
-		if p, err = Compile(re); err != nil {
-			t.Errorf("Compile(%q) got err:%s, want success", test.re, err)
-			continue
-		}
-		onePass = p.CompileOnePass()
-		if (onePass == NotOnePass) != (test.onePass == NotOnePass) {
-			t.Errorf("CompileOnePass(%q) got %v, expected %v", test.re, onePass, test.onePass)
-		}
-	}
-}
commit	6c10e64a906039678f5dffda8b453005ffcdb92e	[log] [tgz]
author	Russ Cox <rsc@golang.org>	Wed May 28 14:08:44 2014 -0400
committer	Russ Cox <rsc@golang.org>	Wed May 28 14:08:44 2014 -0400
tree	0e7aa14f692e05d57180593035367aaad4a24a29
parent	0782ee3ad57a21bd3566f20e76e4e453613e7a23 [diff]