cmd/greplogs/internal/logparse/failure.go - build - Git at Google

 // Copyright 2015 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package logparse

 import (
 	"fmt"
 	"regexp"
 	"strconv"
 	"strings"
 	"sync"
 )

 // Failure records a failure extracted from an all.bash log.
 type Failure struct {
 	// Package is the Go package of this failure. In the case of a
 	// testing.T failure, this will be the package of the test.
 	Package string

 	// Test identifies the failed test function. If this is not a
 	// testing.T failure, this will be "".
 	Test string

 	// Message is the summarized failure message. This will be one
 	// line of text.
 	Message string

 	// FullMessage is a substring of the log that captures the
 	// entire failure message. It may be many lines long.
 	FullMessage string

 	// Function is the fully qualified name of the function where
 	// this failure happened, if known. This helps distinguish
 	// between generic errors like "out of bounds" and is more
 	// stable for matching errors than file/line.
 	Function string

 	// File is the source file where this failure happened, if
 	// known.
 	File string

 	// Line is the source line where this failure happened, if
 	// known.
 	Line int

 	// OS and Arch are the GOOS and GOARCH of this failure.
 	OS, Arch string
 }

 func (f Failure) String() string {
 	s := f.Package
 	if f.Test != "" {
 		s += "." + f.Test
 	}
 	if f.Function != "" || f.File != "" {
 		if s != "" {
 			s += " "
 		}
 		if f.Function != "" {
 			s += "at " + f.Function
 		} else {
 			s += "at " + f.File
 			if f.Line != 0 {
 				s += fmt.Sprintf(":%d", f.Line)
 			}
 		}
 	}
 	if s != "" {
 		s += ": "
 	}
 	s += f.Message
 	return s
 }

 func (f *Failure) canonicalMessage() string {
 	// Do we need to do anything to the message?
 	for _, c := range f.Message {
 		if '0' <= c && c <= '9' {
 			goto rewrite
 		}
 	}
 	return f.Message

 rewrite:
 	// Canonicalize any "word" of the message containing numbers.
 	//
 	// TODO: "Escape" any existing … to make this safe as a key
 	// for later use with canonicalFields (direct use is
 	// unimportant).
 	return numberWords.ReplaceAllString(f.Message, "…")
 }

 // numberWords matches words that consist of both letters and
 // digits. Since this is meant to canonicalize numeric fields
 // of error messages, we accept any Unicode letter, but only
 // digits 0-9. We match the whole word to catch things like
 // hexadecimal and temporary file names.
 var numberWords = regexp.MustCompile(`\pL*[0-9][\pL0-9]*`)

 var (
 	linesStar = `(?:.*\n)*?`
 	linesPlus = `(?:.*\n)+?`

 	// failPkg matches the FAIL line for a package.
 	//
 	// In case of failure the Android wrapper prints "exitcode=1" without a newline,
 	// so for logs prior to the fix for https://golang.org/issue/49317 we need to
 	// strip that from the beginning of the line.
 	failPkg = `(?m:^(?:exitcode=1)?FAIL[ \t]+(\S+))`

 	// logTruncated matches the "log truncated" line injected by the coordinator.
 	logTruncated = `(?:\n\.\.\. log truncated \.\.\.)`

 	endOfTest = `(?:` + failPkg + `|` + logTruncated + `)`

 	canonLine = regexp.MustCompile(`\r+\n`)

 	// testingHeader matches the beginning of the go test std
 	// section. On Plan 9 there used to be just one #.
 	testingHeader = regexp.MustCompile(`^#+ Testing packages`)

 	// sectionHeader matches the header of each testing section
 	// printed by go tool dist test.
 	sectionHeader = regexp.MustCompile(`^##### (.*)`)

 	// testingFailed matches a testing.T failure. This may be a
 	// T.Error or a recovered panic. There was a time when the
 	// test name included GOMAXPROCS (like how benchmark names
 	// do), so we strip that out.
 	testingFailed = regexp.MustCompile(`^--- FAIL: ([^-\s]+).*\n(` + linesStar + `)` + endOfTest)

 	// testingError matches the file name and message of the last
 	// T.Error in a testingFailed log.
 	testingError = regexp.MustCompile(`(?:.*\n)*\t([^:]+):([0-9]+): (.*)\n`)

 	// testingPanic matches a recovered panic in a testingFailed
 	// log.
 	testingPanic = regexp.MustCompile(`panic: (.*?)(?: \[recovered\])`)

 	// gotestFailed matches a $GOROOT/test failure.
 	gotestFailed = regexp.MustCompile(`^# go run run\.go.*\n(` + linesPlus + `)` + endOfTest)

 	// buildFailed matches build failures from the testing package.
 	buildFailed = regexp.MustCompile(`^` + failPkg + `\s+\[build failed\]`)

 	// timeoutPanic1 matches a test timeout detected by the testing package.
 	timeoutPanic1 = regexp.MustCompile(`^panic: test timed out after .*\n(` + linesStar + `)` + endOfTest)

 	// timeoutPanic2 matches a test timeout detected by go test.
 	timeoutPanic2 = regexp.MustCompile(`^\*\*\* Test killed.*ran too long\n` + endOfTest)

 	// coordinatorTimeout matches a test timeout detected by the
 	// coordinator, for both non-sharded and sharded tests.
 	coordinatorTimeout = regexp.MustCompile(`(?m)^Build complete.*Result: error: timed out|^Test "[^"]+" ran over [0-9a-z]+ limit`)

 	// tbEntry is a regexp string that matches a single
 	// function/line number entry in a traceback. Group 1 matches
 	// the fully qualified function name. Groups 2 and 3 match the
 	// file name and line number.
 	// Most entries have trailing stack metadata for each frame,
 	// but inlined calls, lacking a frame, may omit that metadata.
 	tbEntry = `(\S+)\(.*\)\n\t(.*):([0-9]+)(?: .*)?\n`

 	// runtimeFailed matches a runtime throw or testing package
 	// panic. Matching the panic is fairly loose because in some
 	// cases a "fatal error:" can be preceded by a "panic:" if
 	// we've started the panic and then realize we can't (e.g.,
 	// sigpanic). Also gather up the "runtime:" prints preceding a
 	// throw.
 	runtimeFailed        = regexp.MustCompile(`^(?:runtime:.*\n)*.*(?:panic: |fatal error: )(.*)`)
 	runtimeLiterals      = []string{"runtime:", "panic:", "fatal error:"}
 	runtimeFailedTrailer = regexp.MustCompile(`^(?:exit status.*\n)?(?:\*\*\* Test killed.*\n)?` + endOfTest + `?`)

 	// apiCheckerFailed matches an API checker failure.
 	apiCheckerFailed = regexp.MustCompile(`^Error running API checker: (.*)`)

 	// goodLine matches known-good lines so we can ignore them
 	// before doing more aggressive/fuzzy failure extraction.
 	goodLine = regexp.MustCompile(`^#|^ok\s|^\?\s|^Benchmark|^PASS|^=== |^--- `)

 	// testingUnknownFailed matches the last line of some unknown
 	// failure detected by the testing package.
 	testingUnknownFailed = regexp.MustCompile(`^` + endOfTest)

 	// miscFailed matches the log.Fatalf in go tool dist test when
 	// a test fails. We use this as a last resort, mostly to pick
 	// up failures in sections that don't use the testing package.
 	miscFailed = regexp.MustCompile(`^.*Failed: (?:exit status|test failed)`)
 )

 // An extractCache speeds up failure extraction from multiple logs by
 // caching known lines. It is *not* thread-safe, so we track it in a
 // sync.Pool.
 type extractCache struct {
 	boringLines map[string]bool
 }

 var extractCachePool sync.Pool

 func init() {
 	extractCachePool.New = func() interface{} {
 		return &extractCache{make(map[string]bool)}
 	}
 }

 // Extract parses the failures from all.bash log m.
 func Extract(m string, os, arch string) ([]*Failure, error) {
 	fs := []*Failure{}
 	testingStarted := false
 	section := ""
 	sectionHeaderFailures := 0 // # failures at section start
 	unknown := []string{}
 	cache := extractCachePool.Get().(*extractCache)
 	defer extractCachePool.Put(cache)

 	// Canonicalize line endings. Note that some logs have a mix
 	// of line endings and some somehow have multiple \r's.
 	m = canonLine.ReplaceAllString(m, "\n")

 	var s []string
 	matcher := newMatcher(m)
 	consume := func(r *regexp.Regexp) bool {
 		matched := matcher.consume(r)
 		s = matcher.groups
 		if matched && !strings.HasSuffix(s[0], "\n") {
 			// Consume the rest of the line.
 			matcher.line()
 		}
 		return matched
 	}
 	firstBadLine := func() string {
 		for _, u := range unknown {
 			if len(u) > 0 {
 				return u
 			}
 		}
 		return ""
 	}

 	for !matcher.done() {
 		// Check for a cached result.
 		line, nextLinePos := matcher.peekLine()
 		isGoodLine, cached := cache.boringLines[line]

 		// Process the line.
 		isKnown := true
 		switch {
 		case cached:
 			matcher.pos = nextLinePos
 			if !isGoodLine {
 				// This line is known to not match any
 				// regexps. Follow the default case.
 				isKnown = false
 				unknown = append(unknown, line)
 			}

 		case consume(testingHeader):
 			testingStarted = true

 		case consume(sectionHeader):
 			section = s[1]
 			sectionHeaderFailures = len(fs)

 		case consume(testingFailed):
 			f := &Failure{
 				Test:        s[1],
 				Package:     s[3],
 				FullMessage: s[0],
 				Message:     "unknown testing.T failure",
 			}

 			// TODO: Can have multiple errors per FAIL:
 			// ../fetchlogs/rev/2015-03-24T19:51:21-41f9c43/linux-arm64-canonical

 			sError := testingError.FindStringSubmatch(s[2])
 			sPanic := testingPanic.FindStringSubmatch(s[2])
 			if sError != nil {
 				f.File, f.Line, f.Message = sError[1], atoi(sError[2]), sError[3]
 			} else if sPanic != nil {
 				f.Function, f.File, f.Line = panicWhere(s[2])
 				f.Message = sPanic[1]
 			}

 			fs = append(fs, f)

 		case consume(gotestFailed):
 			fs = append(fs, &Failure{
 				Package:     "test/" + s[2],
 				FullMessage: s[0],
 				Message:     firstLine(s[1]),
 			})

 		case consume(buildFailed):
 			// This may have an accompanying compiler
 			// crash, but it's interleaved with other "ok"
 			// lines, so it's hard to find.
 			fs = append(fs, &Failure{
 				FullMessage: s[0],
 				Message:     "build failed",
 				Package:     s[1],
 			})

 		case consume(timeoutPanic1):
 			fs = append(fs, &Failure{
 				Test:        testFromTraceback(s[1]),
 				FullMessage: s[0],
 				Message:     "test timed out",
 				Package:     s[2],
 			})

 		case consume(timeoutPanic2):
 			tb := strings.Join(unknown, "\n")
 			fs = append(fs, &Failure{
 				Test:        testFromTraceback(tb),
 				FullMessage: tb + "\n" + s[0],
 				Message:     "test timed out",
 				Package:     s[1],
 			})

 		case matcher.lineHasLiteral(runtimeLiterals...) && consume(runtimeFailed):
 			start := matcher.matchPos
 			msg := s[1]
 			pkg := "testing"
 			if strings.Contains(s[0], "fatal error:") {
 				pkg = "runtime"
 			}
 			traceback := consumeTraceback(matcher)
 			matcher.consume(runtimeFailedTrailer)
 			fn, file, line := panicWhere(traceback)
 			fs = append(fs, &Failure{
 				Package:     pkg,
 				FullMessage: matcher.str[start:matcher.pos],
 				Message:     msg,
 				Function:    fn,
 				File:        file,
 				Line:        line,
 			})

 		case consume(apiCheckerFailed):
 			fs = append(fs, &Failure{
 				Package:     "API checker",
 				FullMessage: s[0],
 				Message:     s[1],
 			})

 		case consume(goodLine):
 			// Ignore. Just cache and clear unknown.
 			cache.boringLines[line] = true

 		case consume(testingUnknownFailed):
 			fs = append(fs, &Failure{
 				Package:     s[1],
 				FullMessage: s[0],
 				Message:     "unknown failure: " + firstBadLine(),
 			})

 		case len(fs) == sectionHeaderFailures && consume(miscFailed):
 			fs = append(fs, &Failure{
 				Package:     section,
 				FullMessage: s[0],
 				Message:     "unknown failure: " + firstBadLine(),
 			})

 		default:
 			isKnown = false
 			unknown = append(unknown, line)
 			cache.boringLines[line] = false
 			matcher.pos = nextLinePos
 		}

 		// Clear unknown lines on any known line.
 		if isKnown {
 			unknown = unknown[:0]
 		}
 	}

 	// TODO: FullMessages for these.
 	if len(fs) == 0 && strings.Contains(m, "no space left on device") {
 		fs = append(fs, &Failure{
 			Message: "build failed (no space left on device)",
 		})
 	}
 	if len(fs) == 0 && coordinatorTimeout.MatchString(m) {
 		// all.bash was killed by coordinator.
 		fs = append(fs, &Failure{
 			Message: "build failed (timed out)",
 		})
 	}
 	if len(fs) == 0 && strings.Contains(m, "Failed to schedule") {
 		// Test sharding failed.
 		fs = append(fs, &Failure{
 			Message: "build failed (failed to schedule)",
 		})
 	}
 	if len(fs) == 0 && strings.Contains(m, "nosplit stack overflow") {
 		fs = append(fs, &Failure{
 			Message: "build failed (nosplit stack overflow)",
 		})
 	}

 	// If the same (message, where) shows up in more than five
 	// packages, it's probably a systemic issue, so collapse it
 	// down to one failure with no package.
 	type dedup struct {
 		packages map[string]bool
 		kept     bool
 	}
 	msgDedup := map[Failure]*dedup{}
 	failureMap := map[*Failure]*dedup{}
 	maxCount := 0
 	for _, f := range fs {
 		key := Failure{
 			Message:  f.canonicalMessage(),
 			Function: f.Function,
 			File:     f.File,
 			Line:     f.Line,
 		}

 		d := msgDedup[key]
 		if d == nil {
 			d = &dedup{packages: map[string]bool{}}
 			msgDedup[key] = d
 		}
 		d.packages[f.Package] = true
 		if len(d.packages) > maxCount {
 			maxCount = len(d.packages)
 		}
 		failureMap[f] = d
 	}
 	if maxCount >= 5 {
 		fsn := []*Failure{}
 		for _, f := range fs {
 			d := failureMap[f]
 			if len(d.packages) < 5 {
 				fsn = append(fsn, f)
 			} else if !d.kept {
 				d.kept = true
 				f.Test, f.Package = "", ""
 				fsn = append(fsn, f)
 			}
 		}
 		fs = fsn
 	}

 	// Check if we even got as far as testing. Note that there was
 	// a period when we didn't print the "testing" header, so as
 	// long as we found failures, we don't care if we found the
 	// header.
 	if !testingStarted && len(fs) == 0 {
 		fs = append(fs, &Failure{
 			Message: "toolchain build failed",
 		})
 	}

 	for _, f := range fs {
 		f.OS, f.Arch = os, arch

 		// Clean up package. For misc/cgo tests, this will be
 		// something like
 		// _/tmp/buildlet-scatch825855615/go/misc/cgo/test.
 		if strings.HasPrefix(f.Package, "_/tmp/") {
 			f.Package = strings.SplitN(f.Package, "/", 4)[3]
 		}

 		// Trim trailing newlines from FullMessage.
 		f.FullMessage = strings.TrimRight(f.FullMessage, "\n")
 	}
 	return fs, nil
 }

 func atoi(s string) int {
 	v, err := strconv.Atoi(s)
 	if err != nil {
 		panic("expected number, got " + s)
 	}
 	return v
 }

 // firstLine returns the first line from s, not including the line
 // terminator.
 func firstLine(s string) string {
 	if i := strings.Index(s, "\n"); i >= 0 {
 		return s[:i]
 	}
 	return s
 }

 var (
 	tracebackStart = regexp.MustCompile(`^(goroutine [0-9]+.*:|runtime stack:)\n`)
 	tracebackEntry = regexp.MustCompile(`^` + tbEntry)
 )

 // consumeTraceback consumes a traceback from m.
 func consumeTraceback(m *matcher) string {
 	// Find the beginning of the traceback.
 	for !m.done() && !m.peek(tracebackStart) {
 		m.line()
 	}

 	start := m.pos
 loop:
 	for !m.done() {
 		switch {
 		case m.hasPrefix("\n") || m.hasPrefix("\t") ||
 			m.hasPrefix("goroutine ") || m.hasPrefix("runtime stack:") ||
 			m.hasPrefix("created by "):
 			m.line()

 		case m.consume(tracebackEntry):
 			// Do nothing.

 		default:
 			break loop
 		}
 	}
 	return m.str[start:m.pos]
 }

 var (
 	// testFromTracebackRe matches a traceback entry from a
 	// function named Test* in a file named *_test.go. It ignores
 	// "created by" lines.
 	testFromTracebackRe = regexp.MustCompile(`\.(Test[^(\n]+)\(.*\n.*_test\.go`)

 	panicWhereRe = regexp.MustCompile(`(?m:^)` + tbEntry)
 )

 // testFromTraceback attempts to return the test name from a
 // traceback.
 func testFromTraceback(tb string) string {
 	s := testFromTracebackRe.FindStringSubmatch(tb)
 	if s == nil {
 		return ""
 	}
 	return s[1]
 }

 // panicWhere attempts to return the fully qualified name, source
 // file, and line number of the panicking function in traceback tb.
 func panicWhere(tb string) (fn string, file string, line int) {
 	m := matcher{str: tb}
 	for m.consume(panicWhereRe) {
 		fn := m.groups[1]

 		// Ignore functions involved in panic handling.
 		if strings.HasPrefix(fn, "runtime.panic") || fn == "runtime.throw" || fn == "runtime.sigpanic" {
 			continue
 		}
 		return fn, m.groups[2], atoi(m.groups[3])
 	}
 	return "", "", 0
 }
	// Copyright 2015 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package logparse

	import (
	"fmt"
	"regexp"
	"strconv"
	"strings"
	"sync"
	)

	// Failure records a failure extracted from an all.bash log.
	type Failure struct {
	// Package is the Go package of this failure. In the case of a
	// testing.T failure, this will be the package of the test.
	Package string

	// Test identifies the failed test function. If this is not a
	// testing.T failure, this will be "".
	Test string

	// Message is the summarized failure message. This will be one
	// line of text.
	Message string

	// FullMessage is a substring of the log that captures the
	// entire failure message. It may be many lines long.
	FullMessage string

	// Function is the fully qualified name of the function where
	// this failure happened, if known. This helps distinguish
	// between generic errors like "out of bounds" and is more
	// stable for matching errors than file/line.
	Function string

	// File is the source file where this failure happened, if
	// known.
	File string

	// Line is the source line where this failure happened, if
	// known.
	Line int

	// OS and Arch are the GOOS and GOARCH of this failure.
	OS, Arch string
	}

	func (f Failure) String() string {
	s := f.Package
	if f.Test != "" {
	s += "." + f.Test
	}
	if f.Function != "" \|\| f.File != "" {
	if s != "" {
	s += " "
	}
	if f.Function != "" {
	s += "at " + f.Function
	} else {
	s += "at " + f.File
	if f.Line != 0 {
	s += fmt.Sprintf(":%d", f.Line)
	}
	}
	}
	if s != "" {
	s += ": "
	}
	s += f.Message
	return s
	}

	func (f *Failure) canonicalMessage() string {
	// Do we need to do anything to the message?
	for _, c := range f.Message {
	if '0' <= c && c <= '9' {
	goto rewrite
	}
	}
	return f.Message

	rewrite:
	// Canonicalize any "word" of the message containing numbers.
	//
	// TODO: "Escape" any existing … to make this safe as a key
	// for later use with canonicalFields (direct use is
	// unimportant).
	return numberWords.ReplaceAllString(f.Message, "…")
	}

	// numberWords matches words that consist of both letters and
	// digits. Since this is meant to canonicalize numeric fields
	// of error messages, we accept any Unicode letter, but only
	// digits 0-9. We match the whole word to catch things like
	// hexadecimal and temporary file names.
	var numberWords = regexp.MustCompile(`\pL[0-9][\pL0-9]`)

	var (
	linesStar = `(?:.\n)?`
	linesPlus = `(?:.*\n)+?`

	// failPkg matches the FAIL line for a package.
	//
	// In case of failure the Android wrapper prints "exitcode=1" without a newline,
	// so for logs prior to the fix for https://golang.org/issue/49317 we need to
	// strip that from the beginning of the line.
	failPkg = `(?m:^(?:exitcode=1)?FAIL[ \t]+(\S+))`

	// logTruncated matches the "log truncated" line injected by the coordinator.
	logTruncated = `(?:\n\.\.\. log truncated \.\.\.)`

	endOfTest = `(?:` + failPkg + `\|` + logTruncated + `)`

	canonLine = regexp.MustCompile(`\r+\n`)

	// testingHeader matches the beginning of the go test std
	// section. On Plan 9 there used to be just one #.
	testingHeader = regexp.MustCompile(`^#+ Testing packages`)

	// sectionHeader matches the header of each testing section
	// printed by go tool dist test.
	sectionHeader = regexp.MustCompile(`^##### (.*)`)

	// testingFailed matches a testing.T failure. This may be a
	// T.Error or a recovered panic. There was a time when the
	// test name included GOMAXPROCS (like how benchmark names
	// do), so we strip that out.
	testingFailed = regexp.MustCompile(`^--- FAIL: ([^-\s]+).*\n(` + linesStar + `)` + endOfTest)

	// testingError matches the file name and message of the last
	// T.Error in a testingFailed log.
	testingError = regexp.MustCompile(`(?:.\n)\t([^:]+):([0-9]+): (.*)\n`)

	// testingPanic matches a recovered panic in a testingFailed
	// log.
	testingPanic = regexp.MustCompile(`panic: (.*?)(?: \[recovered\])`)

	// gotestFailed matches a $GOROOT/test failure.
	gotestFailed = regexp.MustCompile(`^# go run run\.go.*\n(` + linesPlus + `)` + endOfTest)

	// buildFailed matches build failures from the testing package.
	buildFailed = regexp.MustCompile(`^` + failPkg + `\s+\[build failed\]`)

	// timeoutPanic1 matches a test timeout detected by the testing package.
	timeoutPanic1 = regexp.MustCompile(`^panic: test timed out after .*\n(` + linesStar + `)` + endOfTest)

	// timeoutPanic2 matches a test timeout detected by go test.
	timeoutPanic2 = regexp.MustCompile(`^\\\* Test killed.*ran too long\n` + endOfTest)

	// coordinatorTimeout matches a test timeout detected by the
	// coordinator, for both non-sharded and sharded tests.
	coordinatorTimeout = regexp.MustCompile(`(?m)^Build complete.*Result: error: timed out\|^Test "[^"]+" ran over [0-9a-z]+ limit`)

	// tbEntry is a regexp string that matches a single
	// function/line number entry in a traceback. Group 1 matches
	// the fully qualified function name. Groups 2 and 3 match the
	// file name and line number.
	// Most entries have trailing stack metadata for each frame,
	// but inlined calls, lacking a frame, may omit that metadata.
	tbEntry = `(\S+)\(.\)\n\t(.):([0-9]+)(?: .*)?\n`

	// runtimeFailed matches a runtime throw or testing package
	// panic. Matching the panic is fairly loose because in some
	// cases a "fatal error:" can be preceded by a "panic:" if
	// we've started the panic and then realize we can't (e.g.,
	// sigpanic). Also gather up the "runtime:" prints preceding a
	// throw.
	runtimeFailed = regexp.MustCompile(`^(?:runtime:.\n).(?:panic: \|fatal error: )(.)`)
	runtimeLiterals = []string{"runtime:", "panic:", "fatal error:"}
	runtimeFailedTrailer = regexp.MustCompile(`^(?:exit status.\n)?(?:\\\ Test killed.*\n)?` + endOfTest + `?`)

	// apiCheckerFailed matches an API checker failure.
	apiCheckerFailed = regexp.MustCompile(`^Error running API checker: (.*)`)

	// goodLine matches known-good lines so we can ignore them
	// before doing more aggressive/fuzzy failure extraction.
	goodLine = regexp.MustCompile(`^#\|^ok\s\|^\?\s\|^Benchmark\|^PASS\|^=== \|^--- `)

	// testingUnknownFailed matches the last line of some unknown
	// failure detected by the testing package.
	testingUnknownFailed = regexp.MustCompile(`^` + endOfTest)

	// miscFailed matches the log.Fatalf in go tool dist test when
	// a test fails. We use this as a last resort, mostly to pick
	// up failures in sections that don't use the testing package.
	miscFailed = regexp.MustCompile(`^.*Failed: (?:exit status\|test failed)`)
	)

	// An extractCache speeds up failure extraction from multiple logs by
	// caching known lines. It is not thread-safe, so we track it in a
	// sync.Pool.
	type extractCache struct {
	boringLines map[string]bool
	}

	var extractCachePool sync.Pool

	func init() {
	extractCachePool.New = func() interface{} {
	return &extractCache{make(map[string]bool)}
	}
	}

	// Extract parses the failures from all.bash log m.
	func Extract(m string, os, arch string) ([]*Failure, error) {
	fs := []*Failure{}
	testingStarted := false
	section := ""
	sectionHeaderFailures := 0 // # failures at section start
	unknown := []string{}
	cache := extractCachePool.Get().(*extractCache)
	defer extractCachePool.Put(cache)

	// Canonicalize line endings. Note that some logs have a mix
	// of line endings and some somehow have multiple \r's.
	m = canonLine.ReplaceAllString(m, "\n")

	var s []string
	matcher := newMatcher(m)
	consume := func(r *regexp.Regexp) bool {
	matched := matcher.consume(r)
	s = matcher.groups
	if matched && !strings.HasSuffix(s[0], "\n") {
	// Consume the rest of the line.
	matcher.line()
	}
	return matched
	}
	firstBadLine := func() string {
	for _, u := range unknown {
	if len(u) > 0 {
	return u
	}
	}
	return ""
	}

	for !matcher.done() {
	// Check for a cached result.
	line, nextLinePos := matcher.peekLine()
	isGoodLine, cached := cache.boringLines[line]

	// Process the line.
	isKnown := true
	switch {
	case cached:
	matcher.pos = nextLinePos
	if !isGoodLine {
	// This line is known to not match any
	// regexps. Follow the default case.
	isKnown = false
	unknown = append(unknown, line)
	}

	case consume(testingHeader):
	testingStarted = true

	case consume(sectionHeader):
	section = s[1]
	sectionHeaderFailures = len(fs)

	case consume(testingFailed):
	f := &Failure{
	Test: s[1],
	Package: s[3],
	FullMessage: s[0],
	Message: "unknown testing.T failure",
	}

	// TODO: Can have multiple errors per FAIL:
	// ../fetchlogs/rev/2015-03-24T19:51:21-41f9c43/linux-arm64-canonical

	sError := testingError.FindStringSubmatch(s[2])
	sPanic := testingPanic.FindStringSubmatch(s[2])
	if sError != nil {
	f.File, f.Line, f.Message = sError[1], atoi(sError[2]), sError[3]
	} else if sPanic != nil {
	f.Function, f.File, f.Line = panicWhere(s[2])
	f.Message = sPanic[1]
	}

	fs = append(fs, f)

	case consume(gotestFailed):
	fs = append(fs, &Failure{
	Package: "test/" + s[2],
	FullMessage: s[0],
	Message: firstLine(s[1]),
	})

	case consume(buildFailed):
	// This may have an accompanying compiler
	// crash, but it's interleaved with other "ok"
	// lines, so it's hard to find.
	fs = append(fs, &Failure{
	FullMessage: s[0],
	Message: "build failed",
	Package: s[1],
	})

	case consume(timeoutPanic1):
	fs = append(fs, &Failure{
	Test: testFromTraceback(s[1]),
	FullMessage: s[0],
	Message: "test timed out",
	Package: s[2],
	})

	case consume(timeoutPanic2):
	tb := strings.Join(unknown, "\n")
	fs = append(fs, &Failure{
	Test: testFromTraceback(tb),
	FullMessage: tb + "\n" + s[0],
	Message: "test timed out",
	Package: s[1],
	})

	case matcher.lineHasLiteral(runtimeLiterals...) && consume(runtimeFailed):
	start := matcher.matchPos
	msg := s[1]
	pkg := "testing"
	if strings.Contains(s[0], "fatal error:") {
	pkg = "runtime"
	}
	traceback := consumeTraceback(matcher)
	matcher.consume(runtimeFailedTrailer)
	fn, file, line := panicWhere(traceback)
	fs = append(fs, &Failure{
	Package: pkg,
	FullMessage: matcher.str[start:matcher.pos],
	Message: msg,
	Function: fn,
	File: file,
	Line: line,
	})

	case consume(apiCheckerFailed):
	fs = append(fs, &Failure{
	Package: "API checker",
	FullMessage: s[0],
	Message: s[1],
	})

	case consume(goodLine):
	// Ignore. Just cache and clear unknown.
	cache.boringLines[line] = true

	case consume(testingUnknownFailed):
	fs = append(fs, &Failure{
	Package: s[1],
	FullMessage: s[0],
	Message: "unknown failure: " + firstBadLine(),
	})

	case len(fs) == sectionHeaderFailures && consume(miscFailed):
	fs = append(fs, &Failure{
	Package: section,
	FullMessage: s[0],
	Message: "unknown failure: " + firstBadLine(),
	})

	default:
	isKnown = false
	unknown = append(unknown, line)
	cache.boringLines[line] = false
	matcher.pos = nextLinePos
	}

	// Clear unknown lines on any known line.
	if isKnown {
	unknown = unknown[:0]
	}
	}

	// TODO: FullMessages for these.
	if len(fs) == 0 && strings.Contains(m, "no space left on device") {
	fs = append(fs, &Failure{
	Message: "build failed (no space left on device)",
	})
	}
	if len(fs) == 0 && coordinatorTimeout.MatchString(m) {
	// all.bash was killed by coordinator.
	fs = append(fs, &Failure{
	Message: "build failed (timed out)",
	})
	}
	if len(fs) == 0 && strings.Contains(m, "Failed to schedule") {
	// Test sharding failed.
	fs = append(fs, &Failure{
	Message: "build failed (failed to schedule)",
	})
	}
	if len(fs) == 0 && strings.Contains(m, "nosplit stack overflow") {
	fs = append(fs, &Failure{
	Message: "build failed (nosplit stack overflow)",
	})
	}

	// If the same (message, where) shows up in more than five
	// packages, it's probably a systemic issue, so collapse it
	// down to one failure with no package.
	type dedup struct {
	packages map[string]bool
	kept bool
	}
	msgDedup := map[Failure]*dedup{}
	failureMap := map[Failure]dedup{}
	maxCount := 0
	for _, f := range fs {
	key := Failure{
	Message: f.canonicalMessage(),
	Function: f.Function,
	File: f.File,
	Line: f.Line,
	}

	d := msgDedup[key]
	if d == nil {
	d = &dedup{packages: map[string]bool{}}
	msgDedup[key] = d
	}
	d.packages[f.Package] = true
	if len(d.packages) > maxCount {
	maxCount = len(d.packages)
	}
	failureMap[f] = d
	}
	if maxCount >= 5 {
	fsn := []*Failure{}
	for _, f := range fs {
	d := failureMap[f]
	if len(d.packages) < 5 {
	fsn = append(fsn, f)
	} else if !d.kept {
	d.kept = true
	f.Test, f.Package = "", ""
	fsn = append(fsn, f)
	}
	}
	fs = fsn
	}

	// Check if we even got as far as testing. Note that there was
	// a period when we didn't print the "testing" header, so as
	// long as we found failures, we don't care if we found the
	// header.
	if !testingStarted && len(fs) == 0 {
	fs = append(fs, &Failure{
	Message: "toolchain build failed",
	})
	}

	for _, f := range fs {
	f.OS, f.Arch = os, arch

	// Clean up package. For misc/cgo tests, this will be
	// something like
	// _/tmp/buildlet-scatch825855615/go/misc/cgo/test.
	if strings.HasPrefix(f.Package, "_/tmp/") {
	f.Package = strings.SplitN(f.Package, "/", 4)[3]
	}

	// Trim trailing newlines from FullMessage.
	f.FullMessage = strings.TrimRight(f.FullMessage, "\n")
	}
	return fs, nil
	}

	func atoi(s string) int {
	v, err := strconv.Atoi(s)
	if err != nil {
	panic("expected number, got " + s)
	}
	return v
	}

	// firstLine returns the first line from s, not including the line
	// terminator.
	func firstLine(s string) string {
	if i := strings.Index(s, "\n"); i >= 0 {
	return s[:i]
	}
	return s
	}

	var (
	tracebackStart = regexp.MustCompile(`^(goroutine [0-9]+.*:\|runtime stack:)\n`)
	tracebackEntry = regexp.MustCompile(`^` + tbEntry)
	)

	// consumeTraceback consumes a traceback from m.
	func consumeTraceback(m *matcher) string {
	// Find the beginning of the traceback.
	for !m.done() && !m.peek(tracebackStart) {
	m.line()
	}

	start := m.pos
	loop:
	for !m.done() {
	switch {
	case m.hasPrefix("\n") \|\| m.hasPrefix("\t") \|\|
	m.hasPrefix("goroutine ") \|\| m.hasPrefix("runtime stack:") \|\|
	m.hasPrefix("created by "):
	m.line()

	case m.consume(tracebackEntry):
	// Do nothing.

	default:
	break loop
	}
	}
	return m.str[start:m.pos]
	}

	var (
	// testFromTracebackRe matches a traceback entry from a
	// function named Test* in a file named *_test.go. It ignores
	// "created by" lines.
	testFromTracebackRe = regexp.MustCompile(`\.(Test[^(\n]+)\(.\n._test\.go`)

	panicWhereRe = regexp.MustCompile(`(?m:^)` + tbEntry)
	)

	// testFromTraceback attempts to return the test name from a
	// traceback.
	func testFromTraceback(tb string) string {
	s := testFromTracebackRe.FindStringSubmatch(tb)
	if s == nil {
	return ""
	}
	return s[1]
	}

	// panicWhere attempts to return the fully qualified name, source
	// file, and line number of the panicking function in traceback tb.
	func panicWhere(tb string) (fn string, file string, line int) {
	m := matcher{str: tb}
	for m.consume(panicWhereRe) {
	fn := m.groups[1]

	// Ignore functions involved in panic handling.
	if strings.HasPrefix(fn, "runtime.panic") \|\| fn == "runtime.throw" \|\| fn == "runtime.sigpanic" {
	continue
	}
	return fn, m.groups[2], atoi(m.groups[3])
	}
	return "", "", 0
	}