| // Copyright 2015 The Go Authors. All rights reserved. | 
 | // Use of this source code is governed by a BSD-style | 
 | // license that can be found in the LICENSE file. | 
 |  | 
 | // backtrack is a regular expression search with submatch | 
 | // tracking for small regular expressions and texts. It allocates | 
 | // a bit vector with (length of input) * (length of prog) bits, | 
 | // to make sure it never explores the same (character position, instruction) | 
 | // state multiple times. This limits the search to run in time linear in | 
 | // the length of the test. | 
 | // | 
 | // backtrack is a fast replacement for the NFA code on small | 
 | // regexps when onepass cannot be used. | 
 |  | 
 | package regexp | 
 |  | 
 | import ( | 
 | 	"regexp/syntax" | 
 | 	"sync" | 
 | ) | 
 |  | 
 | // A job is an entry on the backtracker's job stack. It holds | 
 | // the instruction pc and the position in the input. | 
 | type job struct { | 
 | 	pc  uint32 | 
 | 	arg bool | 
 | 	pos int | 
 | } | 
 |  | 
 | const ( | 
 | 	visitedBits        = 32 | 
 | 	maxBacktrackProg   = 500        // len(prog.Inst) <= max | 
 | 	maxBacktrackVector = 256 * 1024 // bit vector size <= max (bits) | 
 | ) | 
 |  | 
 | // bitState holds state for the backtracker. | 
 | type bitState struct { | 
 | 	end      int | 
 | 	cap      []int | 
 | 	matchcap []int | 
 | 	jobs     []job | 
 | 	visited  []uint32 | 
 |  | 
 | 	inputs inputs | 
 | } | 
 |  | 
 | var bitStatePool sync.Pool | 
 |  | 
 | func newBitState() *bitState { | 
 | 	b, ok := bitStatePool.Get().(*bitState) | 
 | 	if !ok { | 
 | 		b = new(bitState) | 
 | 	} | 
 | 	return b | 
 | } | 
 |  | 
 | func freeBitState(b *bitState) { | 
 | 	b.inputs.clear() | 
 | 	bitStatePool.Put(b) | 
 | } | 
 |  | 
 | // maxBitStateLen returns the maximum length of a string to search with | 
 | // the backtracker using prog. | 
 | func maxBitStateLen(prog *syntax.Prog) int { | 
 | 	if !shouldBacktrack(prog) { | 
 | 		return 0 | 
 | 	} | 
 | 	return maxBacktrackVector / len(prog.Inst) | 
 | } | 
 |  | 
 | // shouldBacktrack reports whether the program is too | 
 | // long for the backtracker to run. | 
 | func shouldBacktrack(prog *syntax.Prog) bool { | 
 | 	return len(prog.Inst) <= maxBacktrackProg | 
 | } | 
 |  | 
 | // reset resets the state of the backtracker. | 
 | // end is the end position in the input. | 
 | // ncap is the number of captures. | 
 | func (b *bitState) reset(prog *syntax.Prog, end int, ncap int) { | 
 | 	b.end = end | 
 |  | 
 | 	if cap(b.jobs) == 0 { | 
 | 		b.jobs = make([]job, 0, 256) | 
 | 	} else { | 
 | 		b.jobs = b.jobs[:0] | 
 | 	} | 
 |  | 
 | 	visitedSize := (len(prog.Inst)*(end+1) + visitedBits - 1) / visitedBits | 
 | 	if cap(b.visited) < visitedSize { | 
 | 		b.visited = make([]uint32, visitedSize, maxBacktrackVector/visitedBits) | 
 | 	} else { | 
 | 		b.visited = b.visited[:visitedSize] | 
 | 		for i := range b.visited { | 
 | 			b.visited[i] = 0 | 
 | 		} | 
 | 	} | 
 |  | 
 | 	if cap(b.cap) < ncap { | 
 | 		b.cap = make([]int, ncap) | 
 | 	} else { | 
 | 		b.cap = b.cap[:ncap] | 
 | 	} | 
 | 	for i := range b.cap { | 
 | 		b.cap[i] = -1 | 
 | 	} | 
 |  | 
 | 	if cap(b.matchcap) < ncap { | 
 | 		b.matchcap = make([]int, ncap) | 
 | 	} else { | 
 | 		b.matchcap = b.matchcap[:ncap] | 
 | 	} | 
 | 	for i := range b.matchcap { | 
 | 		b.matchcap[i] = -1 | 
 | 	} | 
 | } | 
 |  | 
 | // shouldVisit reports whether the combination of (pc, pos) has not | 
 | // been visited yet. | 
 | func (b *bitState) shouldVisit(pc uint32, pos int) bool { | 
 | 	n := uint(int(pc)*(b.end+1) + pos) | 
 | 	if b.visited[n/visitedBits]&(1<<(n&(visitedBits-1))) != 0 { | 
 | 		return false | 
 | 	} | 
 | 	b.visited[n/visitedBits] |= 1 << (n & (visitedBits - 1)) | 
 | 	return true | 
 | } | 
 |  | 
 | // push pushes (pc, pos, arg) onto the job stack if it should be | 
 | // visited. | 
 | func (b *bitState) push(re *Regexp, pc uint32, pos int, arg bool) { | 
 | 	// Only check shouldVisit when arg is false. | 
 | 	// When arg is true, we are continuing a previous visit. | 
 | 	if re.prog.Inst[pc].Op != syntax.InstFail && (arg || b.shouldVisit(pc, pos)) { | 
 | 		b.jobs = append(b.jobs, job{pc: pc, arg: arg, pos: pos}) | 
 | 	} | 
 | } | 
 |  | 
 | // tryBacktrack runs a backtracking search starting at pos. | 
 | func (re *Regexp) tryBacktrack(b *bitState, i input, pc uint32, pos int) bool { | 
 | 	longest := re.longest | 
 |  | 
 | 	b.push(re, pc, pos, false) | 
 | 	for len(b.jobs) > 0 { | 
 | 		l := len(b.jobs) - 1 | 
 | 		// Pop job off the stack. | 
 | 		pc := b.jobs[l].pc | 
 | 		pos := b.jobs[l].pos | 
 | 		arg := b.jobs[l].arg | 
 | 		b.jobs = b.jobs[:l] | 
 |  | 
 | 		// Optimization: rather than push and pop, | 
 | 		// code that is going to Push and continue | 
 | 		// the loop simply updates ip, p, and arg | 
 | 		// and jumps to CheckAndLoop. We have to | 
 | 		// do the ShouldVisit check that Push | 
 | 		// would have, but we avoid the stack | 
 | 		// manipulation. | 
 | 		goto Skip | 
 | 	CheckAndLoop: | 
 | 		if !b.shouldVisit(pc, pos) { | 
 | 			continue | 
 | 		} | 
 | 	Skip: | 
 |  | 
 | 		inst := re.prog.Inst[pc] | 
 |  | 
 | 		switch inst.Op { | 
 | 		default: | 
 | 			panic("bad inst") | 
 | 		case syntax.InstFail: | 
 | 			panic("unexpected InstFail") | 
 | 		case syntax.InstAlt: | 
 | 			// Cannot just | 
 | 			//   b.push(inst.Out, pos, false) | 
 | 			//   b.push(inst.Arg, pos, false) | 
 | 			// If during the processing of inst.Out, we encounter | 
 | 			// inst.Arg via another path, we want to process it then. | 
 | 			// Pushing it here will inhibit that. Instead, re-push | 
 | 			// inst with arg==true as a reminder to push inst.Arg out | 
 | 			// later. | 
 | 			if arg { | 
 | 				// Finished inst.Out; try inst.Arg. | 
 | 				arg = false | 
 | 				pc = inst.Arg | 
 | 				goto CheckAndLoop | 
 | 			} else { | 
 | 				b.push(re, pc, pos, true) | 
 | 				pc = inst.Out | 
 | 				goto CheckAndLoop | 
 | 			} | 
 |  | 
 | 		case syntax.InstAltMatch: | 
 | 			// One opcode consumes runes; the other leads to match. | 
 | 			switch re.prog.Inst[inst.Out].Op { | 
 | 			case syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL: | 
 | 				// inst.Arg is the match. | 
 | 				b.push(re, inst.Arg, pos, false) | 
 | 				pc = inst.Arg | 
 | 				pos = b.end | 
 | 				goto CheckAndLoop | 
 | 			} | 
 | 			// inst.Out is the match - non-greedy | 
 | 			b.push(re, inst.Out, b.end, false) | 
 | 			pc = inst.Out | 
 | 			goto CheckAndLoop | 
 |  | 
 | 		case syntax.InstRune: | 
 | 			r, width := i.step(pos) | 
 | 			if !inst.MatchRune(r) { | 
 | 				continue | 
 | 			} | 
 | 			pos += width | 
 | 			pc = inst.Out | 
 | 			goto CheckAndLoop | 
 |  | 
 | 		case syntax.InstRune1: | 
 | 			r, width := i.step(pos) | 
 | 			if r != inst.Rune[0] { | 
 | 				continue | 
 | 			} | 
 | 			pos += width | 
 | 			pc = inst.Out | 
 | 			goto CheckAndLoop | 
 |  | 
 | 		case syntax.InstRuneAnyNotNL: | 
 | 			r, width := i.step(pos) | 
 | 			if r == '\n' || r == endOfText { | 
 | 				continue | 
 | 			} | 
 | 			pos += width | 
 | 			pc = inst.Out | 
 | 			goto CheckAndLoop | 
 |  | 
 | 		case syntax.InstRuneAny: | 
 | 			r, width := i.step(pos) | 
 | 			if r == endOfText { | 
 | 				continue | 
 | 			} | 
 | 			pos += width | 
 | 			pc = inst.Out | 
 | 			goto CheckAndLoop | 
 |  | 
 | 		case syntax.InstCapture: | 
 | 			if arg { | 
 | 				// Finished inst.Out; restore the old value. | 
 | 				b.cap[inst.Arg] = pos | 
 | 				continue | 
 | 			} else { | 
 | 				if 0 <= inst.Arg && inst.Arg < uint32(len(b.cap)) { | 
 | 					// Capture pos to register, but save old value. | 
 | 					b.push(re, pc, b.cap[inst.Arg], true) // come back when we're done. | 
 | 					b.cap[inst.Arg] = pos | 
 | 				} | 
 | 				pc = inst.Out | 
 | 				goto CheckAndLoop | 
 | 			} | 
 |  | 
 | 		case syntax.InstEmptyWidth: | 
 | 			flag := i.context(pos) | 
 | 			if !flag.match(syntax.EmptyOp(inst.Arg)) { | 
 | 				continue | 
 | 			} | 
 | 			pc = inst.Out | 
 | 			goto CheckAndLoop | 
 |  | 
 | 		case syntax.InstNop: | 
 | 			pc = inst.Out | 
 | 			goto CheckAndLoop | 
 |  | 
 | 		case syntax.InstMatch: | 
 | 			// We found a match. If the caller doesn't care | 
 | 			// where the match is, no point going further. | 
 | 			if len(b.cap) == 0 { | 
 | 				return true | 
 | 			} | 
 |  | 
 | 			// Record best match so far. | 
 | 			// Only need to check end point, because this entire | 
 | 			// call is only considering one start position. | 
 | 			if len(b.cap) > 1 { | 
 | 				b.cap[1] = pos | 
 | 			} | 
 | 			if old := b.matchcap[1]; old == -1 || (longest && pos > 0 && pos > old) { | 
 | 				copy(b.matchcap, b.cap) | 
 | 			} | 
 |  | 
 | 			// If going for first match, we're done. | 
 | 			if !longest { | 
 | 				return true | 
 | 			} | 
 |  | 
 | 			// If we used the entire text, no longer match is possible. | 
 | 			if pos == b.end { | 
 | 				return true | 
 | 			} | 
 |  | 
 | 			// Otherwise, continue on in hope of a longer match. | 
 | 			continue | 
 | 		} | 
 | 	} | 
 |  | 
 | 	return longest && len(b.matchcap) > 1 && b.matchcap[1] >= 0 | 
 | } | 
 |  | 
 | // backtrack runs a backtracking search of prog on the input starting at pos. | 
 | func (re *Regexp) backtrack(ib []byte, is string, pos int, ncap int, dstCap []int) []int { | 
 | 	startCond := re.cond | 
 | 	if startCond == ^syntax.EmptyOp(0) { // impossible | 
 | 		return nil | 
 | 	} | 
 | 	if startCond&syntax.EmptyBeginText != 0 && pos != 0 { | 
 | 		// Anchored match, past beginning of text. | 
 | 		return nil | 
 | 	} | 
 |  | 
 | 	b := newBitState() | 
 | 	i, end := b.inputs.init(nil, ib, is) | 
 | 	b.reset(re.prog, end, ncap) | 
 |  | 
 | 	// Anchored search must start at the beginning of the input | 
 | 	if startCond&syntax.EmptyBeginText != 0 { | 
 | 		if len(b.cap) > 0 { | 
 | 			b.cap[0] = pos | 
 | 		} | 
 | 		if !re.tryBacktrack(b, i, uint32(re.prog.Start), pos) { | 
 | 			freeBitState(b) | 
 | 			return nil | 
 | 		} | 
 | 	} else { | 
 |  | 
 | 		// Unanchored search, starting from each possible text position. | 
 | 		// Notice that we have to try the empty string at the end of | 
 | 		// the text, so the loop condition is pos <= end, not pos < end. | 
 | 		// This looks like it's quadratic in the size of the text, | 
 | 		// but we are not clearing visited between calls to TrySearch, | 
 | 		// so no work is duplicated and it ends up still being linear. | 
 | 		width := -1 | 
 | 		for ; pos <= end && width != 0; pos += width { | 
 | 			if len(re.prefix) > 0 { | 
 | 				// Match requires literal prefix; fast search for it. | 
 | 				advance := i.index(re, pos) | 
 | 				if advance < 0 { | 
 | 					freeBitState(b) | 
 | 					return nil | 
 | 				} | 
 | 				pos += advance | 
 | 			} | 
 |  | 
 | 			if len(b.cap) > 0 { | 
 | 				b.cap[0] = pos | 
 | 			} | 
 | 			if re.tryBacktrack(b, i, uint32(re.prog.Start), pos) { | 
 | 				// Match must be leftmost; done. | 
 | 				goto Match | 
 | 			} | 
 | 			_, width = i.step(pos) | 
 | 		} | 
 | 		freeBitState(b) | 
 | 		return nil | 
 | 	} | 
 |  | 
 | Match: | 
 | 	dstCap = append(dstCap, b.matchcap...) | 
 | 	freeBitState(b) | 
 | 	return dstCap | 
 | } |